From 3f616366bdd2824d4e1ceec8524e99d35e91f841 Mon Sep 17 00:00:00 2001 From: CNugteren Date: Sat, 17 Oct 2015 15:57:04 +0200 Subject: [PATCH 01/50] Prepared the changelog for the next release --- CHANGELOG | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CHANGELOG b/CHANGELOG index 29d666bb..15fe8a88 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,4 +1,7 @@ +Development version (next release) +- + Version 0.5.0 - Improved structure and performance of level-2 routines (xSYMV/xHEMV) - Reduced compilation time of level-3 OpenCL kernels From 9bf6be8426fdf93bf22530eb9379c653d9285d1a Mon Sep 17 00:00:00 2001 From: CNugteren Date: Fri, 23 Oct 2015 11:01:44 +0200 Subject: [PATCH 02/50] Added alpha and beta to tuner meta-data --- include/internal/tuning.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/include/internal/tuning.h b/include/internal/tuning.h index 6ea530ba..5645a5e5 100644 --- a/include/internal/tuning.h +++ b/include/internal/tuning.h @@ -127,9 +127,11 @@ void Tuner(int argc, char* argv[]) { {"precision", precision_string} }; for (auto &o: C::GetOptions()) { - if (o == kArgM) { metadata.push_back({"arg_m", std::to_string(args.m)}); } - if (o == kArgN) { metadata.push_back({"arg_n", std::to_string(args.n)}); } - if (o == kArgK) { metadata.push_back({"arg_k", std::to_string(args.k)}); } + if (o == kArgM) { metadata.push_back({"arg_m", std::to_string(args.m)}); } + if (o == kArgN) { metadata.push_back({"arg_n", std::to_string(args.n)}); } + if (o == kArgK) { metadata.push_back({"arg_k", std::to_string(args.k)}); } + if (o == kArgAlpha) { metadata.push_back({"arg_alpha", ToString(args.alpha)}); } + if (o == kArgBeta) { metadata.push_back({"arg_beta", ToString(args.beta)}); } } tuner.PrintJSON("clblast_"+C::KernelFamily()+"_"+precision_string+".json", metadata); } From a2d5d7770e01763cb26d050cf4ef3fe5f4fcec83 Mon Sep 17 00:00:00 2001 From: CNugteren Date: Sun, 25 Oct 2015 16:27:14 +0100 Subject: [PATCH 03/50] Moved the tuner database script to a separate folder --- {src => scripts/database}/database.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename {src => scripts/database}/database.py (100%) diff --git a/src/database.py b/scripts/database/database.py similarity index 100% rename from src/database.py rename to scripts/database/database.py From 179ad0666ddcccb5c49caa88d9debd53004a3439 Mon Sep 17 00:00:00 2001 From: CNugteren Date: Sun, 25 Oct 2015 16:48:26 +0100 Subject: [PATCH 04/50] Fixed an arguments-related bug in the GEMV tuner --- src/tuning/xgemv.cc | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/tuning/xgemv.cc b/src/tuning/xgemv.cc index 6a066518..675f0bf9 100644 --- a/src/tuning/xgemv.cc +++ b/src/tuning/xgemv.cc @@ -108,6 +108,9 @@ class TuneXgemv { tuner.AddArgumentScalar(0); tuner.AddArgumentScalar(1); tuner.AddArgumentScalar(0); // Conjugate transpose + tuner.AddArgumentScalar(0); // Additional parameter + tuner.AddArgumentScalar(0); // Banded 'kl' + tuner.AddArgumentScalar(0); // Banded 'ku' } // Describes how to compute the performance metrics From ccd1a5c7cc5cce238600b95fcd2d6c10338a65a7 Mon Sep 17 00:00:00 2001 From: CNugteren Date: Sun, 25 Oct 2015 16:49:29 +0100 Subject: [PATCH 05/50] Updated tuning database script according to the new JSON format --- scripts/database/database.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/scripts/database/database.py b/scripts/database/database.py index 2852b54c..4ce26277 100644 --- a/scripts/database/database.py +++ b/scripts/database/database.py @@ -20,7 +20,9 @@ import json import pandas as pd # Constants -ATTRIBUTES = ["device", "type", "vendor", "precision", "kernel_family", "arg_m", "arg_n", "arg_k"] +ATTRIBUTES = ["device", "device_vendor", "device_type", "device_core_clock", "device_compute_units", + "precision", "kernel_family", + "arg_m", "arg_n", "arg_k", "arg_alpha", "arg_beta"] # Pandas options pd.set_option('display.width', 1000) @@ -125,8 +127,8 @@ def PrintData(df): # Loops over the different entries for this family and prints their headers for precision, dfprecision in dffamily.groupby(["precision"]): f.write(GetPrecision(family, precision)) - for vendor, dfvendor in dfprecision.groupby(["vendor"]): - for devtype, dfdevtype in dfvendor.groupby(["type"]): + for vendor, dfvendor in dfprecision.groupby(["device_vendor"]): + for devtype, dfdevtype in dfvendor.groupby(["device_type"]): f.write(GetDeviceVendor(vendor, devtype)) for device, dfdevice in dfdevtype.groupby(["device"]): devicename = "\"%s\"," % device @@ -158,7 +160,7 @@ if len(sys.argv) != 3: # Parses the command-line arguments path_json = sys.argv[1] path_clblast = sys.argv[2] -file_db = path_clblast+"/src/database.db" +file_db = path_clblast+"/scripts/database/database.db" glob_json = path_json+"/*.json" # Checks whether the command-line arguments are valid; exists otherwise From bb4e78f7371f6c1ab56692d25ea06ae32634d8c1 Mon Sep 17 00:00:00 2001 From: CNugteren Date: Sun, 25 Oct 2015 16:49:59 +0100 Subject: [PATCH 06/50] Added initial tuning database with Intel Iris data --- scripts/database/database.db | Bin 0 -> 581880 bytes 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 scripts/database/database.db diff --git a/scripts/database/database.db b/scripts/database/database.db new file mode 100644 index 0000000000000000000000000000000000000000..284369902f895180d0899149be37e99b5ca26bea GIT binary patch literal 581880 zcmeF)1)Nn?_y6%Bl^9wSrMpB@K-7C^5D9@1FlZE%8bZY`KvWb(#P06y?(S|$l}?oy z{&Vl%Ynad4zw;k>Kv7x#Ua#MJ-s|jh&b|BY^F8Oz%;R%*{$b;Xj2%8?VxwW>CX8rw z+=L-xMilKZWYUmMssEIjuiiPC95QO`q!AOw4jDbMXuHwlhMm}b$k-vrkC;$2vq1L9 zf_0D0EL0aq95r_Mh*OFV8av|TL0wb-F|+XhvE6Ult+-jo_+qzBE!Z`9y5N5 zdtGBDkDfGY$b<<)rW74CVZ^X;V<%3UFnQRdqM1eAsG`|#A+7IS^wwcv~Ui-`%*t2`~v1(#ekL*3$W!4IM?K5j*&mP%(*2&l_*ExH&?a;S(+dTvw6@>PE89; zO>%6XE*YIVWlzn4!T997w8TrwUL&pWF&x>iX=ZEHX_k4A>U7U+qdGk@4@Re7=RTQ- z;GJc6nq(fTTTL?)y45T*;-Ru%bz2VV)wfAzJKbuU*CYGCn(5MoklvAc)g4)6WYLjZ z!>C(!7j7V)*Huf=>V?rXZQC44REYbjq#`&!1=vc8t{buVAb`?|NU`}kVH*L{86 z&)15+R`Rv7uT^}l>T5M$tNSWbQ`5VaueE)x<7-`C>-k#W*9N{e^mTt<8~J*GuZ?|e z;%if1oB7(@*A~7W=xa+~Tlw1B*Mod*!uV2w#u$^>|-L`Z~(j6MQ|<*U`R?@pY`P<9r?OYo@Og ze4Xg)Bwr`{dXldv`+AD6Q+z$u*VBAG-PbdGJ=52-e3fF?znclW{@py-_3!4vu75WV zcK>(tVE2DF4|4x^N=vi*hnoqzf4F(r*89huJ!ajfd{F5`VyF5?sZtHz_d%yPcX7`EP&zs#m4E7hm5MTePKX3Lg zZ@0-|yH4FFZr7=shr#{=uXpFXdmrzxDUmHh*pg{@0&(*!be#M_P_Otu2zpw6%~v&Y zAFI_}GY^gZ{9ASN|98(r%i85tpmFbEC(zHEC(zHEC(zHEC(zHEC(zHEC(zHEC(zHEC(zHEC(zHEC(zH zc2y3@?~!*^-PONOmfvIUs=BM@UCwLs@9sY`AAfh<)${*%zh8a6UH$%}$bJ61s_Oap zyKVEodtd*%>bb+t>(-skgUv&725cSJIC1WIs+=dT%6a0d zoF}fvomcvC=aqikd8HqBUg^tu;;Nh{uF84hs+=dT#+_ICap#qO+F=uN4m+=X@3HSa_Pxh)z;eKHz;eKHz;eKHz;eKHz;eKHz;eKHz;eKHz;eKHz;eKH zz;eKHz;eKHz;eKHz;eKHz;eKHz;eKHz;eKHz;eKHz;eKHz;eKHz;eKHz;eKHz;eKH z;IGMn#*sU&)F{TfFXN@^?Qzz9y}#ZbXX)#zkC&JA{&AM$NR@G7UG;Xc)c@QW0 zBbF+CsnXA@#*LT$w$`}w$o_h}?&oE_zgX`leW|its`TS(+&Jk=mHqRw9;e64cB!&m zSG`>@QW0BbF+CsnXA@ z#*LT$w$`}w$o_h}?&oE_zgX`leW|its`TS(+&Jk=mHqRw9;e64cB!)6o>Tss=M<^{*PQkGr!iuYX%>-1)b4e?3l)Cw;N5+q+$l&&&FJ+Hw0! ze|xJOU-pahw)W*XQe{7>(w8dzxGLkM>Tz*>Jx*Kp(^bYvUt9W8WxK9=yIAU9HC`Wg zXIozXw$`}wZ|nYgoE%U3VqLd)yB?pH_4%~p_Lu(lRyn@x7w2v5%WF0HP87EaAM_b0paiz*Qu~ZqatBjW_h3^%KlQNFUJwfcB#^rD&uvP@zR&^y2^GvUiwmHyVRY1f41YLmY@Bd z#91~E$r-Tg!PbGT1G^3^2P_9H2P_9H2P_9H2P_9H2P_9H2P_9H2P_9H2P_9H2P_9H z2P_Bv>p395hs>+W?HTFtsj@waTz-kA%DB9$9xv84+DYfx z*5k@JdHrIkk$*k1U8-!4v-D+usj{C`87G!1{Yaf(wo8?9;<&o4<7EFRkIzBINtNU4 zD#wu;H(vTuWxO0u`ckDYRrkeGqdewO#_1~i>3*DLynbDAbZ8>{)<*8Ss-C&$&>^?78RR2eVl7t40pUo2I| z$$nCGUt905EyvM)Z5b!WZ|vtYuW_=UR6SmfCw-~Xmn!>~O87Ect&#UU=h-E*q99OFJr8ZXc8@KdW@Hb}U{o)+gm*Ys) zab!Q)F5{&yj@vGM8821#(^bYxU&hB(8K?Ktec3Kmj;Aes zUE_`?b6j1x9mE*{Ev92;cuF815UHVdGdz>RR4>Dd?*mA;JEec3Km#>G|HU$)DB zaaE5KOO^e!*n?mF;nk)I7*|U1fh6 zFID<7UiW3YR2e6AXWyUgc<%mV^I-FkoB>-0whn9^*mYnzU^!qpU^!qpU^!qpU^!qp zU^!qpU^!qpU^!qpU^!qpU^!qpU^(z#&w;^_yUu(0DwZn0m(<%MKVDZoPL3nv^mcJx zRgR;p-Y>81GCuD0=>27!RN23=djGPYY?mtI#8PGawpQ6M?sdpJx=<% z>TyxbdXd*D`-`PYUsu`QSRGgTQsum2Iga$jy6Wv>IgV7>PpoU)b~&DG*Hw-a*Vo&{ z`nb~9$7$^6O>dX|b=BkKcrs3J7w1*wIJ)Zn^4c!r<6e*6U&cw5{TthLnwI=W&zH=P z^u{I%gu9%K^&)%K^&)%K^&)%K^&)%K^&)%K^&)%K^&)%K^&)%K^&) z%K^&)%K^&)%K^&)%K^&)%Yj{=1M=^sk(w5{bt$!!w^aT2HuCp6vcFW>U#jdcRgNR$ zrOJ4zGG40eCsp>7D*H*5{iIs%xX;(-!R8@31GWxq9oRas>%elra=>!Ha=>!Ha=>!H za=>!Ha=>!Ha=>!Ha=>!Ha=>!Ha=>!Ha=>!Ha=>z6YYxcoq5o^Oapczdf2~ITef?AF ze{J2!`}wb}8+kwflyxJo!|ul}r)(Z<9+ER)>%i85tpmFbEC(zHEC(zHEC(zHEC(zH zEC(zHEC(zHEC(zHEC(zHEC(zHEC(zHEC(zHEC+Vq9FV_P*wHF~AF!iUey_iybw}Ti z?7yS$NA};*_apnu>)O#OuWLu^j=m1re@9=3>>oug&v&%S>)O#OuWLu^j=mq+e@EYs z?7yS$NA|bp4}1Qw=a1d@`NOV%yZ-I^x9i_>z;eKHz;eKHz;eKHz;eKHz;eKHz;eKH zz;eKHz;eKHz;eKHz;eKHz;eKHU~3M@|L%*fX^~qex|Z^NUG@HAy}wxRFP8mumHl*; z{dCp)i}n6uy}wxY(^dA`-}DdV%bku*-uy5 zPglLaSnn^^`-|;<>~hxT!R8@31GWxq9oRas>%elra=>!Ha=>!Ha=>!Ha=>!Ha=>!H za=>!Ha=>!Ha=>!Ha=>!Ha=>!Ha=>z6_ss$Qdw2Q0y7gTy*gV)gBxk^`3%3q-_B`10 zhnrWM2b+iF4A?rbbztkjt^>;f%K^&)%K^&)%K^&)%K^&)%K^&)%K^&)%K^&)%K^&) z%K^&)%K^&)%K^&)%K^&)%K^&)%K^&)%K^&)%K^&)%K^&)%K^&)%YmJl18z3c(sKTL z5zcT@pP5rXcGPmdxZ_Aaud0uu$LsCddc0VV6HC?O zbYHgXD&utB-f^=$R<*W>keu~a=?TaVM*#Zu$O>+NylbU!c4aiz+5sk*N%E~7TarAh-U0aVA>v3YKdYtadc3owhuG>3K_Lu$Q ztowSr-Y%A^$7}0xdb?O^+<3h`Zk+Cm?fJ~jpv{BLLvjXe9oRasbzs+l<$&dY<$&dY z<$&dY<$&dY<$&dY<$&dY<$&dY<$&dY<$&dY<$&dY<-qpkfE%6n-|y&g^7~4uaoeT; zzt)|7d^w&}{kn8tELFyfcXrjs*VgBg?NZ~mOJ9!@OV#6~FIBe3RXt8DRqrSLZLN9j zpVxLd&$f<>8=u$ydb?O3Ppqqqmnz$}rLU`u(^baB^=13^Rym&RCzdMxZLPAORK1_> zi>1nV@y@RL_}cn>vR!K2cIoSJVySwZ^rg!7xT?pArRx2pzpXW|{qx!`=h@bAapUva zUvC%d_STW^Kgo7oWn5f;XKk0`=&H}V zt(N2H@zU2dZk+UGe4KS(#!J=PwPl=C*{-W>*Hw?xmhsZxR?9f4vcFW_7fY3KQl+n} zY?rFHYs)yPvi*Ox>?bwuc+!`;t>g8$?cHCGm*Yu|+b(^nGG40mrAl9_^z*7RetWAN zSMMi%UE{_{U&hB-_hr0PyC1SruCiTf+;-{9IH}T)t1?ci9v9b_adF$FudD1QRkn+zN?%vKT`X0` zi+8lj@ubEbZ(Fy^IH~%0(wCaob{Qu%uko^7j~7eT6|<8)uPZ*R@(IC{HS zSAE{N?b4U=VqIms)VS@^mvK_1A6I3ZR6Q=PFXQ62OJ7&nPpWJeOO?K^db?Pvj2G`{ zmE%c`JKnZ#mvK_{@uV*`ukA8UYF^`IyB;r=s>j*$hno?b2b+iF4A?rbbztkjt^>;f z%K^&)%K^&)%K^&)%K^&)%K^&)%Yhxufl`sXj{d2&s(*d|l-k(8p1-U9Q$H7ZUAy{o z`Kv#N9lgG6f0r*d4>k|U8L)L=>%i85T?du}mIIapmIIapmIIapmIIapmIIapmIIap zmIIapmIIapmIIapmIIapmIIapmIIap|91|!?$-Y=|Ghe?TerCXBz>v7+xEZv{r=tO zlh0M^?tXsD4YvYp9&8?xGhpk$)`6`9yACV|EC(zHEC(zHEC(zHEC(zHEC(zHEC(zH zEC(zHEC(zHEC(zHEC(zHEC(zHEC(zH{+b+cM-Bh?BJ|&Hm}RD1wfWdR=EHKweaa^ASAUr*e5q#t)4>BpT%`ucp@js52zSLJoYRe2q8RbEG2)vrf8?!3~E zJFoQP&MSS(oxdh`Y+h|%lQUrJz}A7S1G^3^2P_9H2P_9H2P_9H2P_9H2P_9H2P_9H z2P_9H2P_9H2P_9H2P_9H2P_9H2P_9H2P_9H2P_Bj$^rd%K=OAz@^?hG-Q|e>dnlV9 zH!rq6{u}EE+IiUZZS^j$s(ih8r4-#Nwgch|pvlKGHo^XKwoTi1)_t>x|Rl()9tZN1xi zx9=I21C|4p1C|4p1C|4p1C|4p1C|4p1C|4p1C|4p1Ako($j_za_Z3p*=k{XxdA^L7 zD&xdbWxTF3ep`<#PUIgX5zzF2D9IO$83@lxe@RsH*yD&zC2a-6)z$#xkp zRr+#Xc|B5PoUXE6_Scs2vRzxoS5@y@#z|lHm+?|%oLH)i*Hy-E>v3hg97kK9Pj8p~ z^f)=cjMt9aPsYo3sdD_d?J`cPjMp`<{bhSqyG~Oem8K@+HaecjA=2QAoWqQB?Yhc1-M8nD)H1QZ z+pxdiNi77s9_)HZ&Va20TL-od>^iUh(s?S}{yQ@BTIq$Ce+~vHMt1ka+9&8?x zGhpk$)`6`9yACV|EC(zHEC(zHEC(zHEC(zHEC(zHEC(zHEC(zHEC(zHEC(zHEC(zH zEC(zHEC(zHEC(zHEC>GX9Ec)!-EC`?zq{JjI@lk7TdVwC)3(;L{qeW8M!VVj{kzwb zozGoQHV-xr$r-S9VC%rv!GGsE$m_W$ujib+o@?@Yj@i}cw>+~vvpoCn2EB(0hN?*|Ibbfd2cWy#76q^6zF;jyXL+-G<&Ayb z_IW2~z}A7S16v1n9as)n4pZNJ3rANHDE;|D|>70CMy#q_J8F16?ZO5B+pj#%lSFy zcdut<;>Vsd=Z-D2IOl!1@yiq6{(0sJ&rMpDbN+UN7k*Uz+l7hb(cb8`A~AcPf=8Cv ze|h5Fn>%K|lm&^@+iWx6zpv$q{HYhRBIo!$hL-Bz`mqIx{ZemaS)xn!Y&_6;RpPO? zPI}?SeHSO%q+ZCPM77j}E=}B)e7U!-&Y3?quZt6vlGD?3S>l?jlm9GCR7k#zM;0bt zubSL(VWMyLti4usW#X!_WzKu)ou3mgzmt4{3;n#!&zU!w4>#|N63I`k!SqFm=O!iJ z*>e9r=OQ<%wR&X*+L0A~pR37bTMK?yZv+`Oj@>&hg}Z zC68YI>qQAyS0<(|IpV3xQ|J5hEKZDByz%Z;pDauqmKv}uaYy#Mcxb``zaHi%GE)m? zVWM~TUQ3Qxnpm6s_-ilp^E*H1^_-UaV3+31&&jD5yuh!E`H9prd|*jpdG@Lq_{_qD zY_F4As0;jhTAo<_N%!neZ0r2GbvHk;zHV~CF3nj_scCp=UXEw9O)iurIj_sDgZYWn z^xZz+uj3_&P1*0Z`_Ki6#PE={=OEY4Zq*^4=Qi7iSrOZ~I; z^{1w-yL@ho{PBLvnGd&)=On|(uX7;;jJ9u3p^>ICw<*)y&#QX0h|JmfnuTQu< z%St3y^~||h{`20LI3smMulMti7P;#(%U|c46RG9)!kUDe|1AHxY)VW?P1GizE9(<( zJ!JXMXN!Mbn-d>bPtMGGf8B3Rxa%{^&(lV~Ue+a2an@i<&T-wk+~kkD)_<;B63JA% zdsSNGKF2>3?sHk6sFe!Be3838vJx&wHv7+iyoYyU{>(k`2ZN14Kf3?5fvV2Z#@z>o(zpno9uTS3hW}g!q6Unsu^UU== zSGM@;M&^BE;`3y@?zui^eq4@i_Q&0jvp(E)nijp9eYd#umCZgMH~6~Vf6n=$WO}@C z}^?9+>->;VV`{4?|-zxt( ztVp=yFZ1hbwSV8Ma_;-?^I75VhszVG+vZy<{ks3dUw2CrZk?~pxlg+L->QULN6T~W zuP#@XCsL0AZ>;lq@rQpdSe{7TE+1N+Gv6+^R^{A3+;hUBocpu8&gc2hVWI#07vO-T&t09N*>GVxNam%AonME{&C}f$$1@e zUw6+_^L_5E@~>-Q&i&b~N4alry`Rf{UG7`%d2X@4u2$tdS4~X4fQA0`&r75plm1+k zb6=PHjJu!BPb8mKu3xd(uZOhAJ;!bGIkP$EI(PG)7Ns6px%Q?%$hp@=dNI zx4yO{Zc09;6yB1P&+c_@$+;i7>u-yHom>2UGb`tQP2!;_f-({Zf%zAH^bfJ(h~x^V@QNzm$AimXp8kKDaFB`OxLd zvYh8}mtRFAcO4Xo-1S+|AHSe~?kN?ej&n!lD0Mr(rc{*r{xPv~)Pe=KJTxUwp1C_pevJA1w8Gx57Uc7K_~dxmYC6m2N|&$UVzn;~qXG?wl$o-~)d(N5HI7)pVJgtHIJeD<% z-19(jw=No$j$FPLk5bp|;iV&YKTC_;^J|v>p11Y6)O}Ab>*oE6vXT3qS2jw0E|-^! z+QeR^V?h(1~r$r<8bA_UjyT2Cq>%Bze?w{#?dUgS8% z{rgP!_qTLEAL%*k#I57>$URS%^y{L8o7Y>@-Mm&W;d8G<+2u>SDmlm5ko+;#>`HE)&aLP_-%3#`PkUE#Ieb(NzuqcE?)jif&ihZ@ zzq-`&*Soy0D*pcAU+;Id-TFVfieCp+{CrmS^HI^|-Ln;a{#0`JsiQ0T&$+r=XOCBk z+;dE2H?B@)H(y^>ch7SNRd(z02f1HVa`%m2Yr1*7zjEZB%PL3i=Yy5}eW%G@ zRocJbGXC={96Z^OZj;%<*&;! zZoSVb=|8vP{&~8nzwR^L=QO^HyYBYw?4B>~pB<&X=Y81Az28F9qty4^S5J2Lt^O0E z)Q_X5w{`36n}l0OS@lx$@YBn4-1Rtosyknc&MxOZo9^ygRa-}??^ELrai7P81Kf4E zp|88nZadlK#T5;s)MM*~=eYOTsX=NTe0soaw?4jUmPWKoA(>fckAGk?(Xw^tX*n8FSu@w`#d*xcCUA6 zTX+Ag(#(C1$4747>U4DdM8dtErZe38UU;6H$8S$f-B+(%Feh?9PHpI}%K_cob@g^% zcir53p<6F~4s?0dEW_PjKWrSOey;uUcz2v{ZgbBghxT&oqi0+9^Q{XL?)83qs9Rqz zpX=7?`xD&#{oda0bFbgreQ#WTwVT)W=S8XKy!YC;JUXLIl=_~K)yUmfUhC-A_si|v zeQn}-F8A}bcI)EJmTp~Kb7*RvUq13?cYT&T$L-g)otx)EP2Kxl)zrPtA4j|UMOt_F zzMt*u@@xJ@{<>)9Ue`lo+`LY1F*2OWbwZ{ro8P-22k`E;sHxA@#bSK5MSe zzZULwZ<^-b_o##2^`1Y&t+zLNx$CAy2Y3ATUETTL=;-ddZy)BKi(0pI*X3QuM5&+a zJlVmm(^IbW_pSQwdFP$_ZoNExk(J%z06)bAf=T;S&Kj?S(>?J~EX3eR!R9eW*-I)AHrv)pxmeh0sPdb)X9 z-Oc6gtP9=q*Dd@bKSb>Gs3-&lX|)1d~~C`UM?NxpF=WI`&AfznS1Viw~MleDrJwH^Mo4W2QHJp`N7x`Yk++F8;3{0K(%9n0&dEBX+ z%Z)z|ICZsdMkXmd%i#62)B-Jp5b!s_nGeg-RUy-dP~o7pL35+?zksi z=&rA6?)6^S>Q;B&9&_FGl$|&3`M0CXf$m-1`Hwl%9j9!vyXT&=7rDIb*T=1|S+m^wy=tbrzTdjUt()6Ny6+W*E_Um0@D=X5>Nv+; z$73%@T^~2qxy6k;ZKlhKio@M{yx+Z#tM<9weGj_7r+a?+Xq11BTH}9@w#NN@W_>9) zZ--R#&mZgj&tKO1@4uzodDd6;_y08s_k6I{{oL&A((XEcu&Vz)wATOJ=8r_``@nap z-&6mzWy^}r?s+Sp|2c>}?^km7`7`$S&n@}f`YN36)_IXiZe4xb*yZ-(GeNX7i~ zX#xLyxjFgzQu7p7-Mpo_-07At=eb*+SKYdh-+Sfr&rJFtL%UM}GBwtNBq{WqWg zK9|q0!~FiZ`Tg@@KL1>s&t0ESm-FB6i}-Ox-2L#lBFWD=`=71P(eCvY@Zal-y8G2* z1zpa}&F8-Fx31xzGt0X?`lh`9-nN&2zRvI0SwYudlFxsB<^AvH%J}b5C8O+<|9|H% z^*p+GUw8kytDb*OtLdKq<~8#_f2iiJ=buXZ=bNT(9^b9$zb_PZxpqT&m&fN<@UOq2 z|9MX}_j!KYIC8&Nsp7t8b}8-h?}VoQzF)&V4@@ua-)~v}eYcjI_oWTp`HNR^Ido`4 zcmF6~)xVAsZeAa2=D+{dbl2zcrQQ3OQqFxonRWc@tmUuss{VD=_dhqNmYlb=w6n`5 z>;HHDQa>+xzLcA<{1x2yxDqw|`aQtsR1N>Vxtja@UTNlXVMJXwkHxCH=e#Ey`t?@E zt-mMh`s=8s`}xHsjor_a3VAQD?sEH+7VdmU)OGi#+p4+!o^I}*??0*FexB2+iGQE6 ze`ELafy|ojIXkNEa{unuetsJH=e+vv`~%y&*Z2H>?mB(DrvLs}%iRa|Y~`QpYy0EY z$#ecD4INo@WU+2(Q#0}no!&e1h`QbKb<5ZNl!DVn9zLV*OEuEcs%C!z$$zS*{=Ze5 z2e%Mx9(Ip;xT&~%pH^)ik~3iIVE0-FRo&aQ`@hXYY6iA42g=6#Zzs?GWTQ@cj|E_u1)p@t8*UPSc|Nr*$@9NL*-+umo z`{%!_U;n@T^WW94U;Tkk@8lo&q^JJCr^Jk7GyBx-mS+EFGhj1dGY~%m{r5~uOKaMw zaq6GUzGMb@B-xMbPYxiDB9A7IA&(^ol7q;>#_PH9MsB>B8*k;t+qv;hZoHct@8!n(x$!}6e3%;_<;KUk@kwrc z8h>Z}Yq#(+{*3&b{DS=AVK3_3-EPSK!KEa(&uI_W{DM9?W2bV>!C(m|(8&?y^q$_1Uh zf=>CMvv<(hC+JiNI{OBl{en)#pi?R6R1P{-f=<<-Q!VIJ4>~o1PR*cGE9le?I(33h z-Jnx1=+qB74T4U?ptFC_X%uu02s({}PLrV1H0U%7I?aPli=cC0&}kWTS_PffLFb^L z(*-av$Zs${ET>D)&?FuRK8cDCMJ-k5N8Wd7$zj<-y8Bl!q!0Qy#87 zLisr5%43wrDvwhhubinoL3yI`B<0D7JmH0)^J36> zDd@Z$bY2NMuLhmhg3jwf=Z&EAX3%*n=)4_t-U&MI2A%hU&ilzuuIG&pls{DdNcm&s zPn17Z{!ICEuIbFGga!KV<%B7XdD3?_(r@WVPdF8#8_ff8(ysz?p$`zF>DOXmmqFhzE znsRmJ8p<`5Ybn=OuA^L6xt?-;7Ic~iofbjo zz@XDI=(Gwtt%J@%L8nd7IXLJX5_Ap?I*FhY1)a7*r(Mu#A9OkdosL1LQ_$%gbh-qc zu0iLppwlhr93FJK2b~^4r)SXV6?BdWI=zEVpP=wu{2x$^W#<$lWjl?NyvrF^vV zG0Mj(4^$qcJXm>%@=)br%EOgMC?BVMyz)rpQOYMMpQt=qd5rQ{<#Ed6l{1wmC{I+L zq&!*qB;}KpPf?zte5&$k%BL%zk?p_sw}f_dn*o~vn*p1F|78aFtb1lMz47#WR?s;+ z=$sRD&J8-}1)cMQ&eWhYE$B=SIu`_;8A0d5pmR~sxj5*|3_6ztolAqxte|sQ(78P5 z%nmwN1f4lS=gOcnH|Sgybgm9M*94twgU)q9=lY;?L(sV~=-d=^ZVo!P1f5%h&TT>G z_Mme|(77|{+!b{04m$S)oqL1MeL?5`pz}b`c`)ca6m%XAI*$aMM}y8|LFe(H^F+{j zGUz-Nbe;}6&jg)kgU)k7=lP)XLeP0J=)4qkUJg311f5rd&TB#E^`P@c(0McHycKlb z4m$4yop*!IdqL;@pz}e{`7r2w6m&igI-dlcPlL{9LFeCPehxbGg3kP)vmod!3_6Q~&f=i+OVIf>==>IRmIR%pL1$Ue zSsrv&1f7*ZXI0Qyo$TcLslpoNwaV+1*DL?7{D<$k$HI-{A*H*5hTvxfCa((3n$_ zd9d;j<)O;Ml!q&iP(Dui_@wd8dt}fV6?9GrIwuC5(LrZS&>0(a#s!`6K_@flOb9v? zgU+O&Gdbv-6m(8bc5>YxPf?zte5&$k%BL%zp?s$DS;}WCpQC)P@_EYVD^FFPraV1q ztd9$V&WxaQVbHlK=v*9hW(J*0g3hHuXI9X;Ea+SwbY=&gD}v6PpmSx=nHzMj3OZK@ zooj;5wL#~)pmTlDxgqG>7<6t5IyVQMTY}E5LFcxhb9>OaBk0^2bnXf|cL$w&g3i4` z=f0qGf6#d#=scM0jfA5(r@`3dDGm7h|6TKO5}XO*8*eqQ+nbamETf+TlpR3ca`5$eqZ?mY6QC2J(a;!ZEHn-p4`o6Vpo!2VXfkvXbTV`bGzB^pIt@AHgp9v2f7lP3ta_W4P66W3tb0Y z58VLW2;BtT4BZ0V3f%_X4&4FW3Ec(V4c!CX3*86Z4?O@q2t5Qn3_Sup3Oxor4m|-q z2|Wcp4Lt)r3q1!t54`}r2)zWo47~!q3cUus4!r@r3B3iq4ZQ=s3%v)u4}AcA2z>;7 z41EH93VjBB4t)WA34H~94SfTB3w;ND5B&iB2>k^849$b)LkpmV&?0Ct^b7PW^c%DU zS_&QE~6jVA}JO8_q9dUbRpnvxIbKfP);IkINMSb%%OD zJ)vIE5m0Za57ZaRfR2RvLH(ft&{5FQ&@s@l&_HMqG#DBJ4TXk5!=Vw-anSM5NN5yv z0(2rY8X5zQg~mbSp-gB3G!dEvO@>Z_PKHi_ra-4cr$MJfXFz8{XF+E}=RoH|=RxN~ zQ=w_lbm#(T26Q2G5p*#$6S@St6q*HH23-!#hOU6-KvzO@p{t;)p=+RPq3fXQp&OtZ zp_`zapp|7B?p>LpXq3@vYp&y_hp`W0ip?T1JXaTekS_CbIet~|4euI`kOQB`Z za%cs#5?TeVhSor9p>@!D=y&K3=uc<^v=Q0_ZHBU-El^ta-`#BeMwAcA4;6q4LWQ8h z&>qm9P!XsoR17K(r9&m4l29qAG*kvE3zdWRg33dCL;FA#pnakJpo&l>s4`RqstQ$u zszWuPnoupMHdF_y3)O?_Lk*yY(Ed;(=m4lO)C6h@HG`T%EuaITmQX9GHFOZv209oz z1UeK-KoQgyY6rE4IzSzvPEcp43)B@l4C)3Q4t0lmKs}*e&=F8?s1MW^%7Bi9`a#LR zBix#21E8azqoHG0!@KVg-(M`ht7b`gwBG_hR%V`h0cS{ho(Z)py|*B&|jUqW9&UqjzO z-$LI(-$Oq@KSDo2KST4N`OpGrA+!iu4E+NA3jGExftEtcpykjCXeG1?S`Dp%)g3{`=uLe-$^P>pPD&+kd|8q2@X&1>}T zzJ2R=lA8EbYC*N3I#6Ax9#kJ{05yd6hZ;c#K#id$P*bQG)EsI79SF6AT0yO$gP=Ch z!O$Vlp-=*fptevus6EsH>Iij$IzwHcuFzpnH|TJvJJbW}3H5@GfOJJToj)IPcj)9Ja210|N!O#$BC^QTj4vm10gN}zrLZhG)pcA3d&=_beG!7aMWkM66 ziO?iyGISDjGIR}*33QdEiLl;0ZpbMdkpo^iI z&?V5N&@AXO=yGT_bOkgAx)PcTT?JhYT?1VUT?btc-2mMP-2~kX-2&YT-3Hwb-2vSR z-38qZ-2>eV-3Q$dJperjJp?@rJpw%nJqA4vJpnxlJq0}tJp(-pJqJAxy#T!ky#&1s zy#l=oy#~Dwy#c)my#>7uy#u`qy$8JyeE@w3eFS|BeFA+7eFl9FeF1$5eFc3DeFJ?9 zeFuFH{Q&(4{RI6C&4cDc3!sJ2B4{!63-l}W8?*#k3N3?{Lo1+_&?;y(v<6xWt%KG> zze9gOe?l9ejnF1&Gn55wfzq=74IW#+7372RLj|CMP$8%=v7u5M zo41-!0xAiWf=WYWpt4XoXfLQdv^TU5Q~}x-+7GG-Re~x*RiLU+HK;mN1F8wtf@(u` zpt?{!s6NyHY6$HQHG&R+8beK>rcg7eIn)9=5NZjvf?7idL2aOep+lfUp#&5`ZJ~Bh zd#D4{5$XhWhPpsqp~IkV(BV*bs0Y*&>IEGE^@jRDeW48KNT?r_{5KM%@+_4M1E8az zqoHG0!@KVg-(M`ht7b`gwBG_hR%V`h0cS{ho(Z)py|*B&YoT?}dgyoP z59m*51GEv^1Z{@0pe;~Z5q$rH@0r-Fti7>CsYI~3KfHjL+MZns3cShDh-u^ z%0lI!y`b{Y-q1c!1!!MrKd2&9391ZLfvQ5)pz2T!s3uekstwhF>O%FP`cMO?A+$f# z2s!|23^jq8Ld~G&Pz&fls3p`2Y7HF(wSf+X4uKAZ5>N!Sh1x;wp$H>9z z4uiTuheO?=9#BuH7jy*F8|nk~g)*Qcp?*+*XaIB+bTo7fbSyLw8Uzi7hCoB1VbE}B z1aus9JTwv-1)Tt$2#touKx3hC(0C{lngC6NCP9;-lc1BKQ=lo(snBWA>ChR_nb29# z+0Z%AxzKsg`Os8o8Z;fc0Ga_^2wenS49$ctfi8t+L6y5$`UCnC+5l~YHbI-AENBaqRutd=p!`q) zs324bDh%xb?FkiuibBPp;!rwN0xAiWf=WYWpt4XoXfLQdv^TU5Q~}x-+7GG-Re~x* zRiLU+HK;mN1F8wtf@(u`pt?{!s6NyHY6$HQHG&R+8beK>rcg7eIn)9=5NZjvf?7id zL2aOep+lfUp#&5`ZJ~Bhd#D4{5$XhWhPpsqp~IkV(BV*bs0Y*&>IEGE^@jRDeW48K zNT?sw9~uB11sx3?104$uga$!_p&`&vXc#mc8UY;#9S@C!MnNY)Cqkp4G0<3O95f!v zgeE`}p-Ir>Y-RtibLRXz&rZVDlc7_fDbT6VY0&A=8PJ)~SXLRA?GB z9l8LT0bK}P1YHcxgf4+Dg=RsQL6<|bp(~&{(3Q|!=ql)H=o;u+=sM_n=mzLU=qBi9 z=oaW!=r-tf=nm*k=q~7P=pN`^=sxIv=mF?K=ppD~=n?2q=rQPV=n3da=qc!F=o#o) z=sD1N=pE=?=soCt=mY3O=p*Q3=o9Eu=ribZ z=nLpe=qu=J=o{!;=sW0p=m+S>q_Y2tFs*r$*3FwWZa(2B-S}BI=IO?K-B_R-3w2|W zZYPH;U*+QQat}8^v`a zT{lYTMoHZ$r5mMnql|8p)s1qxv6pU?*Nwe(V;|k9pd0(@#(ug{Q8y~-MrGZoq8n9p zqnd71*NqywQByZ+=|*kcsG}Qob)%kc)Ypv$y3tTK_ScO@x^aMRG}etKy3tfOn(0P! z-Dsg32kJ&k-Dss7t##ud-Dsm52kXWmx^bv(By=OvjkdbcPB+@?MhD&Ks2iPhqqA;w z(T%RUahPs&(~ZM*qq}bO(2btD(Mvau(2d@@(MLD>>PCic9H|@qbfdp+4A70EbmM5< zI7T;))s2C=F-SKC>&6h>7^)k?bYr+~jL?nabmMs47^xehbmIiwI8ir7>&6(}7^@rO zbYr}3Wa`EQ-I%BwlXPRUZk(hWC+o&3x-msJPSuUmbmMg0I72ti)Qz)r<80kHM>o#Z zjq`NleBGF;8`E@Sx^7&c8#8p{LfyDXH!jwVnYwX_Zd|GxvvlJ!-MCygX6wckx-myL zuGEdWx^b0mT&)||=*G3lM$XTXu7j?JZh&rtZh~%xZh>xvZi8-z?tt!u?t<=y?t$)w z?t|`!9)KQ%9)cc*9)TW(9)li-o`9Z&o`Rl+o`If)o`ar;UVvVNUV>hRUV&bPUV~nT z-hkeO-h$qS-htkQ-h?2 zeu936=0WqJ1<*oh5wsZk1^N~G4O#*%g_c3fp%u_dXce>?S_7?x)sVy4>f=qLi`S0Yy+-s2$WE>Hu|wIzgSGE>KtKFsK`J zIMf~L0riA>K}SHnp*~PwC<8hY>Ie0Q20%waM?=Rz$3g?4LC|1m2s9KL1`UTsK*vGH zLnEP4&4xIs=37rL<4V?p> z3!Mj@4^0)N{=3T4py|*B&|jUqW9&UqjzO-$LI(-$Oq@KSDo2KST4N`OpGrA+!iu4E+NA z3jGExftEtcpykjCXeG1?S`Dp%)y$g0#${oLDiueP)(>7R2!-T)rIOo^`Qn(Luh}f5p)357-|AFg_=Rlp%&1AP)n#4 z)EYVnY6Be%9ReK+C7=ju3$=sVLmi-wqOKE0O`OmP>I`*(xCnhv<_Mi{SN&B{RwS=HbR@A%}^G!1xhR7zyGD49P>f>p#o4rs1Q^b+5_4X zDgqUSib2Jpbf^SW5-J6ihRQ%?p>oh(P9me1#}?P5^4pth7N+-KnFvIK!-vJ zD1zET?V$Eh2dE>|3F-`Wfx1G6LEWIkq3%!*s3+75Is)nq^?~|A8PJhXKd3)606Gdf z8af6#He3F`<(SIFf!I0-8Vn7AhC;)j;m`=^IOuq2Bs2;-0Xh*H4UK`uLgS$EP$o11 zng~sTCPODdCqt(|Q=n6!)1cF#GoUk}v!Jt~bD(pf^Puygsn9fNI&=Xv1G*5p2)Y=W z30(qR3eAEpgD!_=Lsvj^pev!d&{fdY&^6Gt&~?!D&<)Uy&`r?I&@Ird&~4D|&>hg7 z&|T2o&^^$-(0$PT&;!te&_mF}&?C^J&|}c!&=b&;&{NRU&@<4p&~wo9&PU3&|A>k&^yq((0kDP&oh(P9me1#}?P5^4pth7N+-KnFvIK!-vJD1zET?V$Eh2dE>|3F-`W zfx1G6LEWIkq3%!*s3+75Is)nq^?~|A8PJhXKd3)606Gdf8af6#78(c*f(AoFprOz( zXgD+iIu1G>8VQYpPJm8?MnhwuvCueZJd_DdfF?qdpvlll(8jUGV}`cD)bukI`jtgCiE8cHuMhkF7zJs zKJ)?fA@mXSG4u)aDfAiiIrIheCG-{aHS`ViE%Y7qJ@f<&Rm4r$`rJ*uVS*RSe7gQeF8`=k|0PPFy2UUbB zL6xB@P*tcJR2`}T)r4w6wV^stU8o*ZA8G(Kg!YFTK?gvMp(apMs2S88Y5^SxwS-zh zt)YXUHqgP)A<&^v0*auvP&=qS)B)-Ub%HuWU7)VeVNf^daHu=f1L_I&f{uWCLw%sW zPzH1))DP+p4Sh$t)VuMC)5^d2YEr>PHzsd9idK; zALI`OK!H#Y)ENqfLZDEn3ls)*g~Fi-s2kKB>H+nHdO^LRK2Tq%AJiWj01bo&L4%aLit%O!VtD!Z}T4)`#9@+qHgf>B&p)Jr>XdAR0+5zo^c0s$L zJr}%jN2RTD7kSpW{MS-G1(V*y13@9cP3yKZJf#O2(p!iS% zC?S*xN(?1|l0wO#&y*fPA5jP$$R_@`nPTKqv_63JJTo210|N!O#$BC^QTj4vm0DLZhJ3&=_beG!7aMO@Jmslc34a6lf|m4Vn(kfM!Co zpxMwIXf8Alnh!027D9`l#n2LHDYOh)4y}MzLaU(F&>Cnhv<_MiZGbjHo1o3m7HBK9 z4cZRvfOba8Ir9H3;U2t8dbji*>AlkXr1wi7kUl7VNcyn!5$U7S$E1%-pO8K&eMATYRr0+{VkbWrrNcyq# z6X~bY&!nGAzmR?@{Yv_^^c(57((k0-OMj65DE&$Lv-B6~uhQS7zf1p+{we)S`nR-G zg8%y*`A6DW+C|z`+D$r&bX4hR($S@3NXL|pB^_Hjj&xk15K$rBg_!lujj`S~`t%TIqDs>7_GBXOzw)omo1IbXMtX(%GeRNavK!C7oM3k91yX zcj0;8wrAtVclrAM*TDpvMS?O}p<)te~SNzj|-l6sH zSC~q2sVrSZx~gFUxoq-#polCCXXN4lhn-9F&}!qnhtn@hP@zN8dCrVF}o-92@daCp^>FLrlq-RRclAbL+M|!UGJn8w;3#1oHFOps? zy+nGc^fKw?(krA_O0SY$Exks1t@JwS_0k)pH%f1k-YmUEdaLv{>Fv@xq<2d1lHM)7 zM|!XHKI#3^2c!>5ACf*SeMI`G^fBq<(kG-(N}rNGEqzA%tn@kQ^U@cjFG^pMzASx3 z`l|Fb>Fd%rq;E>!lD;i{NBXYxJ?Z<>52PPTKazed{Y3hy^fT$_(l4Z6O23kRE&WFN zt@JzT_tGDvKT3a+{w)1P`m6Le>F?4%q<>2PlKw62lu%v)NIOfrNV`hANk@^6DjiKa zx^xWbn9{MNV@t=8jw>BcI=*xQ>4egWq!UXgkxnX|Ogg!A3h9*6siad&r;$!8olZKv zbOz~+(wU?)OJ|YJDxFO_yL1leoYJ|Zb4%xu&MWOMoliQyw1;#7>4MUQqzg+IkuEA- zOuD#q3F(s3rKC$smys?jT~4~ZbOq^((v_quOIMMuDqT&wx^xZcn$oqTYfIOWt}9(n zy1sM+>4wscq#H{&k!~v8OuD&r3+a~9t)yE^w~_XgZY$kR+DqD7y1lfIbO&i)>5kH! zr2VA*r30h`rGun9O9xAbNQX*ykq(pYDjhBzA>B>7yL1ofp3=RfdrS9`?kn9-y1(=Q z>4DONqz6k6ksc~NOnSKV24nmZq!&vskzOjjOnSNW3h9;7tE5*;uaRCWy-s?)^akmT z(wn3=OK*|hD!om5yYvp}ozlCccT4Y)-YdONdcX7m>4VaTqz_9Ukv=MYO!~O=3F(v4 zr=(9ypOHQ*eNOtk^abgQ(wC$!OJ9+`Dt%4*y7Ud{o6@(WZ%f~izAJrC`o8o7>4(yf zq#sK^k$x)uO!~R>3+b2AucTi~zma|`{Z9J5^atsW(x0S1OMj96D*a9RyYvs~pVGgi ze@i%bS&xE(s88YO2?CqFP%U-p>!hY#L`Km zlS(I(PA;87I;C_f>D1C`q|-{LlTI(4K{}&!Ch5%5S){W{XOqq@okKdObS~-K(s`uw zO1n$vlg=;gAzeVapmZVW!qP>gi%J)hE-qa{x}C)0=q{~W|lP)h^LAs)JCF#o2 zRivv*SCg(TT|>I2bS>%H(siWkO4pOFFWo@8p>!kZ#?nosn@Ts6ZZ6$Ix}|h0>DJP1 zq&=nEO1G2tlJ=HvFYP1ULE2ZkqjV=}KWTsI0O>&KAnDH1!O|hpq0(KX!=$@Phf7CD zca!ce-9x&kbT8@N(tV`+O81lQFFindp!6W=!O}ydhe{8V9xgpXdZhFy>Cw_-q{m8+ zlO8WUL3*O}B3R#Pm`W5Jwtk?^epMw(sQKeO3#y?FTFr|q4Xl@#nMZpmr5^_ zUM{^tdZqL#>DAI}q}NKXlU^^qL3*R~Ch5)6Tco#2Z!_r5jk4hhtJ}!Mi`lR$J>C@6@q|Zv9lRhtfLHeTfCF#r3SER2>Uz5Hr zeM9=D^eySz(s!iqO5c;dFa1FJq4Xo^$I?%vpGrTIelGn&`la+M>DSV4q~A)vlYTG# zLHeWgC+W}9U!=cEf0O<${X_bv^e^e((oTux`Mw3~Dk>8R4tq@zp6kd7%G zOFFi69O<~y@ucHRCy-7kok%*dbQ0;L(#fQgOQ(=dDV<6>wR9Tkw9@IM(@STN&M2Kp zI8#S(q_a!skj^QcOFFl79_hT&?$Y_B^Gkb37mzL}T}ZmHbP?&I(#52UOP7!? zDP2msv~(HiveM8jGzq^nEUkgh3ROS-mn9qGE#^`z@dH;`^9 z-AKBzbQ9^O(#@osOSh12DcwrCwR9V4PwBSO?WDb=y`|er`$%_?_Lc4^-AUR{+Fv?A zI#4=Dy0dh!bcl4QbQkF`>8{e@(h<_#q`OP^knSnnOS-poAL+i*{iOR#50D-xJxF@6 z^bqNx(!->OOOKEqDLqPhwDcJ1vC`wD$4gI;o+v#@db0Es>8aAwq^C>Ike(?$OM15S z9O=2z^Q7lXFOXg+y-0eo^b+Z%(#xcmORtb#DZNU1we%Y4wbJXP*Gq4Z-YC6Edb9Kv z>8;Y+q_<1&klrc1OM18T9_hW(`=s|vACNvMeMtJS^bzT!(#NEaOP`QFDSb-%wDcM2 zv(o3J&r4sBz9@Z3`m*#D>8sM$q_0cgkiIE>OZv9-9qGH$_oVMjKahSX{Yd(;^b_f) z($A!yOTUnQDg8?Nwe%b5x6<#V-%Edx{wV!P`m^*G>95k?q`yo5kp3zCOZvC8QxbXp zFYPStBJC>eCLKjOs&q8z=+ZHyV@k)8jx8NWI<9m)>G;wKq!UUfl1?n0L^`Q-GU?>f zDWp?Mr;<)BoklvXbUNwu(ix;PN@tSJES*I=1>c$CDaOP4Yh$h zp|(&v$P4m@+Cx522gn!d2z7$|Ab%(T3WS27&QLHE0);|dpfIQ_6b?l|-JtGJ52z>9 z3+fH^f%-!Ip#IPRXdpBQ8Vn7AhC;)j;m`0p!?7R=ppn7dJH{*o(8Y6>-j znnNw1mQX9GHPi<3gxW&wATP)pY7hB99Uxz*Bh(4Zn&?V?HbOpK!U4yPeH=vu)E$B9M2f7R0gYH8Qpoh>S z=rQyJdI~**o?|iJ>G=QYaae97+MDgi=AN zp)^ohC>@j@$^d1AGC`T4EKpV`8y$g0#${oLDiueP)(>7R2!-T)rIOo^`Qn( zL#Pqd7-|AFg_=Rlp%zd}s1?*2Y6E#fZJ~CM7vv4KhkT$8kT28`>IC^g{!joE2n9i% zp?S_7?x)Vz3?S=M1`=JBS zLFf>47&-zSg^oeTp%c(a=oEAsIs=`B&Ozs)3(!UA5_B250$qi!LD!)h&`szTbQ`(@ z-G%N!_n`;SL+BCo76g=oR!DdIPQD`+CR7Wm z4b_3_LiM2fPy?tT)Cg(}HG!H!&7kH`3#cX33Th3tfjptMP&>#A@`l<&K2Qh97wQOg zg8U$VC;$qCf}qY&FcboXLS3LRs4El>ML^x4?obb?C)5k-4fTQgLj9or&;V#4Gzc0D z4S|M2!=T~N2xufU3K|WKfyP4Ppz+WIXd*NTnhZ^Wrb5%8>Cg;lCNvA04b6e(Li3>c z&;n>7v(CA8CUgtB4c&q6LieEi&;#fp^ay$kJ%OG=&!Fef3+N^E3VIE_f!;#zp!d)R=p*zA z`V4)6zCz!i@6ZqEC-e*Y4LPO2>p#dDa)DeSHz*1e6^aH$hhji6p;%CCC=L`CiU-Aq z5C`tuI8|Gk4O39d<@WKeP_1(Xs>1*L}4Kxv_LPlo84VWrngqS)puD zb|?pw6UqhUhVnppA$KSrlpped3P1&+LQr9-2vig*1{H@&KqaA4P-&H>v9U7>I&0_p~Jhk8Ig zpIe0Q20#O$LC|1m2s9KL1`UTsKqH}1&}e83G!_~MjfW;c6QN1aWM~RB z6`BT3hh{)Cp;^#uXbv`*reF3*rM2~*rwR7*rC{|*rnL5*rV91*r(X9IG{MFIHWkNIHEYJ zIHowRIH5SHIHfqPIHNeLIHx$TxS+VGxTLtOxT3hKxTd(SxS_bIxTUzQxTCnMxTm&oBDo@kBBdggBDEroBCR5wBE2GmBBLUcBC{fkBC8^sBD*4oBBvskBDW%sBCo<- zkx!9d;h`v?D5xl;D6A-=D5@x?D6S}>D5)r=D6J@?D61%^D6goXsHmusH&)@ zsII7?sHv!>sI91@sH>=_sIO?CXsBqUXsl?WXsT$YXs&3XXsKwWXsu|Y@Km%_v{QH~ zycO*gK8g+sUqwenCxxHFUlE`PR0JtHD}ogvicm!tMVO+iB3u!n=%(nd=%MJT=%whb z=%eVX=%?tf7@!!a7^E1i7@`=e7^WDm7@-)c7^N7k7^4`g7^fJon4p-bn53Ajn4*}f zn5LMnn4y@dn5CGln4_4hn5USpSfE&_Sfp62SfW^}Sf*I6SfN;{SfyC4Sfg00Sf^O8 z*r3>`*reF3*rM2~*rwR7*rC{|*rnL5*rV91*r(X9IG{MFIHWkNIHEYJIHowRIH5SH zIHfqPIHNeLIHx$TxS+VGxTLtOxT3hKxTd(SxS_bIxTUzQxTCnMxTmDNRgq1RU6DhPQ;|!NTaibRSK+S6r^v7HP!v!U zR1{JaRuoYbRTNVcSCmkcRFqPbR+LecRg_bdS5#0`R8&$_R#Z_`Ra8?{SJY6{RMb+` zR@70{Rn$||S2R#GR5VgFRy0vGRWwsHSF}*HRJ2mGRAKQ1n#vQuJ2zQS?>xQ}kC1 zPz+QIQVdoMQ4CcKQw&#(P>fWJQjAuNQH)iLQ;b(kP)t-zQcPA%QA|}#Q%qOPP|Q@! zQp{G&QOs4$Q_NQ^P%Km|QY=<1Q7ly~Q!H1kP^?s}Qmj_2QLI(0Q><5PP;69eQfyXi zQEXLgQ*2l4Q0!FfQtVdjQS4RhQ|wn9P#jboQXEzsQ5;nqQyf>EP@GhpQk+(tQJhtr zQ=C^^P+U}8Qe0MCQCwAAQ(RZvP~249QruSDQQTGBQ`}cPP&`yTQan~XQ9M;VQ#@C^ zP`p&UQoL5YQM^^WQ@mGvP<&K;QhZi?QG8W=Q+!waQ2bQ zh@yz9h^C0Hh@pt7h^2_Fh@*(Bh^L6JNT5ilNTf)tNTNupNTx`xNTEonNTo=vNTW!r zNT*1z$e_rm$fU@u$fC%q$fn4y$f3xo$fd}w$fL-sa98A0L}_e>M80g z8Ymhn8YvnpnkbqonkkwqS}0m7S}9s9+9*5~Z58blUJ7qTdxejpgThzQQPD}^r|?$< zC;}Bhiq48)MTjC)(M1ua=&A@;L@2r`x+{7pdMbJ;dMo-U`YQS<`YQ$~1}X+A1}lar zhAM_BhAT!WMk+=rMk~fB#wx}s#w#W$CMqT=CM%{WrYfc>rYmMBW-4YWW-I0><|^hX z<|`H`7Ah7g7Aux0mMWGhmMc~$Rw`C0Rx8#h)+*L1)+;tBHYzqLHY>I$wkozMwkvih zb}Du$b}RNM_A2%%_A3r34k`{Q4l9l*jw+5Rjw?I4fKft_n9r z6h%};G(~hp3`I;uEJbWZ97SA3JVks(0!2baB1K|F5=Bx)GDUJl3PnmqDn)8V8bw+~ zIz@U#21Q0iCPijN7DZM?Hbr(t4nLQzstN>N%-Mp0H#PElS_K~YgrNl{r*MNw5zO;KG@Ls3&vOHo@3P_am{Sg}O0RIyC4T(Ls2Qn5<0TCqm4R*3QL#y}S+Pa2Rk2O6U9m&4 zQ?X02Td_y6SFumAUvWTjP;p3cSaC#gRB=pkTya8iQgKRgT5(2kR&h>oUU5NjQE^Fe zS#d>iRdG#mU2#KkQ*ldiTX9EmS8-2qU-3ZkQ1M9dSn)*hRPjvlT=7EjQt?XhTJc8l zR`E{pUhzTkQSnLfS@A{jRq;*nUGYQlQ}IjjTj7*eU;ika6)p-_g_|OZBB~;qBDx}m zBBmmiBDNxqBCaByBEBMlBB3IYBC#TgBB>&oBDo@kBBdggBDEroBCR5wBE2GmBBLUc zB6B3%{=74^YCx#BUvL&OE18YVPUawUlDWv-WF9gv=}zV&^OGKA0kR-jh%8JNA&Zj5 z$l_!PvLsoGEKQan%aY~D@?-_FB3X&7OjaSQlGVuSWDT+=S&OVq)*_~PZ{YZZ@fD9yq$j)Ri z8A67VUC1!9D;Z8kklo1cWDl|@*^BH=_96R{{mB000CFHXh#X7~A%~K~$l>G&awIv5 z98HcP$CBg7@#F+@A~}hiOim%ElGDiPwA)k`Z$miq>@+J9-d`-R~-;(dh_v8oiBl(H^OnxE1lHbVh9k;TapWJ$6VS(+?EmLyh=z24q9B5!sk*LN+Cvk$sS}+vKQH#>_he?`;q<00pvh(5IL9} zLJlQ|k;BOm{0<5xJOLLM|njk;};y&Xq|MsgFmncPBdCAX2=$sOcQau>Oq z+(Ygq_mTU_1LQ&S5P6t9LLMcLk;lmsw~z9rw0@5vA3NAeT- znfyY2CBKp1$sgoT@)!A=bV|?HzoawiLb{S}WE3(g8I6oi#vo&ovB=nD95OB$kBmyu7A?uR$$ogahvLV@sY)m#Go084Q=41=9CE1E>O|~ID$+l!W(u?#a z+mk+I2hx}9NOmIqNPjYb3?zfd&SWqdLWYuE$S|@i8BRu!-N^1_53(oOi|kGIA^Vd3 z$o}L2av(W~983-&hmym{;p7N%Bsq#4O^zYQlH~av`~hTud$@my*lK<>U%-CAo@RO|Bu=lIzIz=0@)7Tk$tUDf@)`M@ zd_le>Uy-lLH{@IL9r>R8Kz<}Yk)O#g~qze%SId<9H8lP;ty=|)B& zqmt3c=wu8sCK-#2O~xVPlJUs+WCAiFnTSkGCLxoO$;jkn3Nj^`icC$WA=8rS$n<0e zG9#IZ%uHq>vy$1!>|_oyCz*@PP39r^ z$kJpPvMgDSEKgP-E0UGS%48L?Dp`%JPSzl6lC{X%WF4|DS&ytwHXs|4jmXAi6S67U zjBHM}AX}2H$kt>V(vxgUwj;erZ?Zk1TgUG?;5OOFvj2upmAV-p;$kF5&ax6KH z98XRlCz6xM$>bDrDmjguPR<}_lC#L!~5^^cIj9gBxAXk#B z$kpT;axJ-zTu*KwH@-6v}d{2HLKa!uw&*T^KEBTH5PW~W&lE28`q*F$| z{w1AB7t)nCo7N@$x38pvI<$1tVUKRYmha`T4Zgq4q2D1N7g4B zkPXR3WMi@k*_3QXHYZz_&DcdyqZJUSw~w580RONA@QNkORp<oJLM3XOJ@^8RO6Qamohf+TedZpZLMWn zXW7PTedfr?X6{dXW8Ccwhxx=qhHECv}~m;TWQNy#QY~?IldCOM8vQ@Nfl`LCj%T~p* zRkdu@EL(NUR>QK@v~0C3TW!l$$FkM6Z1pT#eaqIsvNg19jVxPZ%htrQHMMNbEL(HS z*21#2v}~;`TWibK#KCqb%EK%QnWcjkRp!EZca?Ho>w@ zv}}_s+hof&#j;JcY||{;bjvowvdxU#qW}5(zdD{?|NZMfbDC{VbIfV3In6Vt`R261 zoEDnXB6C`7PD{*bsW~k(r{(6f!kku`(<*aXZBA>JbJ}fAd(3ICIqfs2{pNJQoDQ1PA#*xxPDjk?s5u=or{m^y z!kkW;(8v@OGpF8&}vGpG0F z^ue4yn$st9`fN^L%;~E+eKV);=Jdmyewx!SbNX#gPMM7_tpAvkvpKn#ldCzonNt*V zifT^L%qhA##W1Is<`m1EVw+PObBb$D@ysc{IVCWsgyxjUoD!Q;5_3vwPRYzExjCgU zr>l*ybjn^P8Z%4$y8%qhD$qeC4>hq{gq^&B1QJ32IQbZF@4 z(8$rDv7$A;{68v!g?>qeFRPqc74PIkTYt3n$IjuLR4d%4boHm)$ zW^>wNPFu}sn>lSaryb_B)0}ph({6LxV@`X`X`eamH>U&UbkLj*nbTo&I$};o&FPpq z9XF>F=5+GU^yl~Y@MGtc`Olp;r!(es)|}3n(|L2cU``j!>5@5JHm57*bk&@$nbUQ1 zx?xT?&FPjo-8QE?=5*Jb?wQklb9!J-56$V3IXyO~C+76joSvD}b8~uOPA|>rl{vjO zr#I&G)|}p%(|dFJU``*+>61BqHm5J<^wpfcnbUW3`e9B#&FPmp{Wd43EXEgsf6U3* zoLtPw)tubSDT+BoHK%Ci6y2O+m{Ux1ie*l*%_)vK#WkmR<`mzY5|~p$b4p}RiOngA zIVClxWagCIoKl!mN^?qOPN~f)jX9+?r*!6&-kdU+Q$};jWKNmQDT_H}HK%Oml--rcg7e zIn)Ab3AKV+Lv0{Ws4dhE@`Ajf_K*+M0rG`9LY*Sz7WsMP__^O0c0Q>_Uc-UCD4Vg6u|iCwq`R$zEh{vJcsp>__$|2ap5FLF8a^2sxA-Mh+)OkR!=a zvy$1!>|_oyCz*@PP39r^$kJpPvMgDSEKgP-E0UGS%48L?Dp`%JPSzl6lC{X% zWF4|DS&ytwHXs`+!-5-;jmaitQ?eP^oNPh1BwLZK$u^`X*_LcadXe5_d(wyOK>Cs$ z$xfsn=}!ibfn*TbnG7aF$WXFNB>(e0qCc<7!~XOA#V~W~YEI$i6k$%?%&EIM^)RQN z=G4oadYe-pbLwkO{miMqISnwUf#x*GoCcfI5OW%8PQ%P;xH*k5r;+9~%A7`<(-?Ca zYfj_LX}md2FsF&;G|8MMo6{6?nrcqd%xStg%`m5#<}}NkW}DL-bDC>T^UP_!IV~`! zh32%#oEDqY5_4K=PRq<`xjC&cr{E^>l($nMzyZdtZQ`Z8pFEAw63wNYi#Qp$GXO~uJNpEeCwLP zx+b))iL7hl$Tjl2_3i%ruG8QoP*NxvlpIO{rG!#Jsi8DbS|}Zq9?Ae^gfc;yp)62V zC>xX=$^qqsazVMFJWyW99m)sghdiJHP(i2=R2V7(6@`jH#i0^VNvIT58Y%;og~~zY zp$bq%s1j5essdGoszKGE8c)A_xlzLGz&n&_ZYtv=~|fErpgr%b^v}N@x|d8d?Lbh1Nmq zp$*VRXcM#<+5&Bbwn5vW9nel_7qlDN1MP+OLHnTt&_U=BbQn4U9fgiT$DtF@N$3=G z8ae}=h0a0ep$pJO=n`}px&mE=u0hwK8_-SY7IYiB1Kox0LHD5t&_n1E^cZ>qJ%ye@ z&!HF4OXwB!8hQi0h2BB$p%2hU=o9oA`T~80zCquiAJ9+e7xWu)%C4_${(+n!7swTI zgQ7rDp=eNaC{R*HzYa zwRK%%UDsOIb=Gygb=_cHH~v}wyi42oFTBb6Pj9xaTdeC=>$=UlZnv&Gtm{tey34xm zwyt}u>t5@+&${l9T%-T_sQkYV$phwe(3}pL(_wQuVopcR>6kekH>VTkbkdwonbT== zI%7^}&FP#uoj0co=5*1VE}7G1bGl+qSIy~~IbAoW8|HM=oNk%ZZF9O~PIt}eo;lq& zrw8Wr(3~Eb(_?dbVop!Z>6tk_H>Vfo^wOMOnbT`?dSgy+&FP&vy*H;1=Je5=KAF>J zbNXUVU(M;8Iej;$ALjJaoPL?pZ*y|WVLS}|V@}TI)=HzBhQOqf-IYl$4=;jo| zoMM_&EOUx&PI1gBt~tdsr}*ZSz?>4AQzCOpY)(ncDXBRnGpFR{l){`+no}xsN^MSQ z%qguor8B4W=9IylGMZB+bINQ^SL^ zBlD9UWC5}uS%@r579oq0#mM4h39=+viY!f*ASPVF zCRvNDP1YgnlJ&^?WCOAx*@$dRHX)mm&B*3t3$i8Iifm1`Aw9{qWINJ}^d{SrK4b^d zm+VM(BK=5zGJp&ugUHTgFd0IIl3mC!vMU)*Mv&de?qmw3r;^ji>EsM@COM0oP0k_b zlJm&<(2pxhBLr0*a&@t#ZbOJgFoq|q7XP~ptIp{oe z0lElXf-XZ>psUa|=sI)*x(VHaZbNsVyU;!8ex&~MJoZ1of93)CkbFcwCZCW`$!Fwq z@&);ld_}${-;i&~cjSBW1No8sM1Cf}kYCAfM$B<*m zapZV%0y&YKL{28BkWw<-1G$mhL~bUxkXy-ZwA)k`Z$miq>@+J9-d`-R~-;(dh_v8oiBl(H^OnxE1lHbVhyr)0hGZkMG1-J{N;V^#lP$=WWGk{Y*@pBa+mh`_FVdTAPx_D@NMEue*@^Tc z{mB3_kPIR_lfh&N8A^5`!^o~=I2l29BfFD5$ev^`vNzd>>`V3|`;!C6f#e`^Fgb)A zN)983lOxEHk#zp^-9wN3!K0){OOKHrD?Ltnyz~U=iPDp#CreL}o+>>}db;!s>6y~A zq-RUdk)A6(PkO%e0_la)i=-DzFOgm zH%o7k-YUIKdb{)v>7CNMq<2g2k=`r4PkO)f0qKL%holcnACW#PeN6he^a<&c(x;?P zOP`TGD}7G-yz~X>i_({*FH2vMzAAl9`nvQD>6_BGq;E^#k-jT^Px`*}1L=p-kE9<< zKaqYa{Y?6~^b6^i(yydnOTUqREB#LTz4Qm^kJ6u{KTCg+{wn=V`n&WG>7UZSq<>30 z<^I3N*ngy*rCp?5rQM{XNJo{9CLLWmhICBnSkkd0JMyPTJo5kf3J?d13&n%tLkXaS zP$DQXlmto&C4-VfDWH^4DkwFS21*O1gVIA8po~x^C^M7=$_izJvO_tboKP+(HrQ~)Xn6@m&wMWCWkF{n6H0xAiWf=WYWpt4Xos612wst8qrDnnJEs!%nk zI#dIy3DtsXLv^6KP(7$V)BtJ-HG&#LO`xVwGpIS#0%{4hf?7juAWx_*)DH53yrK4x z57Ytjg*rl=AV0_-3V;HkAgD7G423|UP!}i+>I#KJ5l}a%JJbW}3H5?{Lw%sWP(P?Y zGyob14T1(kL!hD1Flab50vZ{q|Gxf_9xXjadaU#~>G9GNq$f&GlAbI*MS80AH0kNm zGo)w!=|Are{rBHsmRx2_&yk)hJx_YR^aAOH(u<@QOD~aLD!oj4x%3L@mC~!ES4*#v zUMsy$dcE`p>5bBxq&G`%k=`o3O?tcZ4(XlJyQFtZ?~&dsy-#|-^a1IE(ubrEOCOOw zDt%1)xbz9>lhUW8PfMSXJ}Z4r`n>c7>5Gy5zh{`2pv%w|=qhv#x(?ldZbG-9+t3~8 zE_4sN4?Tb$LXV)w&=cq>^bC3qy?|aqub|h^8|W?c4tfuLfIdQ>pwG}3=qvOM`VReo zenP*X-;h%t{mA+Ua)w+WSI7;D0!4+QLD8WYP)sNm6dQ^I#f9QQ@u37zLMRcG7)k;q zg_1$Zp%hR`C>4}CQvZHEN+X?CI-PWS=?u~tr87xqmd+xbRXUq=cIh0_Ii+(+=Z@_E zJznL3@LWQ8hP!XsoR17K}sek_rN=TQKE+t)Bx{P#L>2lKLr7K8R zl&&OQS-OgJRq1Nd)un4l*Oaa$U0b@2bY1Cs()FbqNH>&jB;8oLiF8xxX41{2TS&K* zZYAAXx{b7_bX)0m(q7Wu((R>vq&rCaN_Uj*B<&~dFC8Eq7}@{(oC$(DL%~o86bf~L z!l15DI1~YOgStaKpq@}Ks5jIH>I?OQ`a=VtfzTjmFf;@j3JrsXLnEM(&?sm$GzJpqbDtXm+IjeLS2aJy&|3^nB?B(hH>*NiUXOBE3|4 zne=k$71ArES4ppyUL(C$dY$xo=?&5wr8h}$mfj-0ReGEBcIh3`JEeC?@0Q*py;pjl z^nU3B(g&pvNgtLzB7Ic)nDlY!6VfN8Pf4GaJ|lfr`keH6=?l^qr7uZemcAl=Rr;Fr zb?F<@H>Gb$-;V76JwD%o?n3vV`_KdEA@m4(3_XFKLeHS*&?|iJ>G=QYaae97+MDgi=ANp)^q1Nd5cuC!KV9=?u~tr87xqmd+xb zRXUq=cIh0_Ii+(+=a$YRombjjI-hiYX%FcF(gmdpNf(wbB3)Fvm~?UJ64E6j`+tvf zrJ&MK8K^8&4k`~-fGR?jpvq7cs47$qst(nFYC^T3+E5*+E>sVy4>f=qLXDutP!p&r z)C_7KseeBgT1dB)ZYAAXx{b7_bX)0m(q7Wu((R>vq&rCaN_Uj*B<&~dFC8EqC>-CMekbYJOy(*30eNDq`ABt2Mqi1bkD zVba5;M@Wy99wj|mdW`f~>2cEIr6)*Fl%6C#S$c}}ROxBb)1_xf&y=1eJzIK?^jztA z((|PkNH3ILB)wRAiS$zGWzx%~S4gjvUM0O+dX4m2>2=cUr8h`#l-?x0S$d1~R_Sfh z+og9%@08vpy<2*Z^j_(G()*2lH) zq$^2Rk*+3PL%No99qD?~4Wt`MH<51EGLHwhK;H^|8}#kacR=3>eHZlI(Dy*!3w6UKM4I0^uy4PKtBro81&=NPe4Bj{S@@m(9b|W3;i7Q^UyCqzX<&j^vlq%K)(w8 z8uaVXZ$Q5Z{TB4wx|5Fw?~vXly+?YV^a1HZ(nq9^NuQ8DC4ENvob(0hOVU@QuSwsK zz9oG}`kwRy=||E}q@PK@kbWioM*5xf2kB4JU!=cD|B(JAIed7fousWuTa(6+Qk25c zP`J->scI3Iq{gjOJJZE28Ggq|z!}=uYo+Pi{7!%Ch-8oUS?TsBl~N_uQX?Lzl{%@H z25FQgX_gjgm9a8T#>)ipN}KqkT{@&wx};ks$z+)#Q)QY=ml-lsX31=sBXeb*%$J4g zP-M}P*oI!qtv*2FXfn2NG@0gToQb2!(i>0gPwt$Z+&O0pJLj66vl2V!*_-4_p5#k` z6iSg4ONo?9nUqU~EcpMB^3{gnxK*G=z}4pUb$WePp+ej$((fWLrhi>HVil{Flz=NV zyk=l!#41tjvr6>_ajVSUE^XBcE2!QYvC8$fc$O>l28pxWGrD}S)8WWe|F5N2R$@4^ z&CD?~*UUUK^UW+Uv(U^UGmFhEF|*XnGBeA~tT40E%qlah&GeXAYi6C9^=3Ah*=S~y znayUZ*R!;jdL<2esb~ANmwMI{d#V4avzPiSPL5YH3~!aj?2`Jp5%^n^=kFa>guUGR?lca%tv;pt7tF%Wmhl#l9YPshiSd^6VdAB hN(@J;nQ3OGo9Q+)!^|!-qoKr+^~S=nNHi4k{s#j>---YL literal 0 HcmV?d00001 From c0d469718afef353ce7e091c1bfd6fc3d210a240 Mon Sep 17 00:00:00 2001 From: CNugteren Date: Wed, 28 Oct 2015 21:19:59 +0100 Subject: [PATCH 07/50] Now sets local memory size in xgemv tuner properly --- src/tuning/xgemv.cc | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/tuning/xgemv.cc b/src/tuning/xgemv.cc index 675f0bf9..60c73d76 100644 --- a/src/tuning/xgemv.cc +++ b/src/tuning/xgemv.cc @@ -72,7 +72,10 @@ class TuneXgemv { tuner.AddConstraint(id, MultipleOfX, {"WPT"+std::to_string(V), "VW"+std::to_string(V)}); } } - static void SetLocalMemorySize(cltune::Tuner &, const size_t, const Arguments &) { } + static void SetLocalMemorySize(cltune::Tuner &tuner, const size_t id, const Arguments &args) { + auto LocalMemorySize = [args] (std::vector v) { return v[0]*GetBytes(args.precision); }; + tuner.SetLocalMemoryUsage(id, LocalMemorySize, {"WGS"+std::to_string(V)}); + } // Sets the base thread configuration static std::vector GlobalSize(const Arguments &args) { return {args.m}; } From 09c94b17cf9019e302e388765e0c0d5768b2a969 Mon Sep 17 00:00:00 2001 From: CNugteren Date: Wed, 28 Oct 2015 21:20:42 +0100 Subject: [PATCH 08/50] Added tuning data for Tesla K40 --- scripts/database/database.db | Bin 581880 -> 2093996 bytes 1 file changed, 0 insertions(+), 0 deletions(-) diff --git a/scripts/database/database.db b/scripts/database/database.db index 284369902f895180d0899149be37e99b5ca26bea..7c8ec445083669dec4e3352236e92b08f7ad141e 100644 GIT binary patch literal 2093996 zcmeF)>4Rn2RUP;glaXwUjPX3BXW;<{-~o@i%*aAQ4JrfOO-qGTAT?4+Atc*$+YRn+ zaJ$DpqyIfV`dfFty7%WIS7~!@#LFQvYvqSpwbovHpBv|$yY7n@kyXF_EuZ`9n{R*q z&ENR`&wb@E zU;4`De*F_~zWwH}z4f&Z-G1v6=eK?2mv6uQBTr5I;@h8p>)(FpGjG53Z$9&}cmDPE zJHAQ%DE*;-fBT&udE?tY|788!4}R|LZ+!Wyzqw!6m%s6)uYd8)uYK*!-~7;LzV_DV zzVh~OeEn{MxU5=FKmC^$Ty_e)x^=ypUgc>&a#Pu{XZ`^6$TX`#t-=eEIf|@Bi}l z?f1U%9iM;eUw`p)Z{7ZhH@hY#^`iMN0Hjqf_>zkciO&wu4>w}0jrzvI!PN5B4L)8G2i?Vo+)yY_$k z%&)%rL5_EW#~nIHeyCw}qv&wo?>mu~;U8{d=u zfAtHufANh!p6V}r>eIJ>>5V`7`{a-R!cW~k4&`IFfBB6M|31$ro-F<&w}0h5+K=7- z)u%c?@nqItd&>36C!Iff``6#2{n+i_c&hW0PdfkR8-L>Wt@)`R`SDNx+>dGe2?rec!C*@-_agr@r(jL-zgq+uuz7rQ5&r9{I_PU%dUh-%S3c+wXts zMflXu{lt%b?B_rBv!A^EdvAQ#@4L*O|B>69r-dKA{ed^W`}aFO{d1qV{lSzzdHX|8 zy_lCxc(RGNfB!w|Wo!PyQ(JTK{`^nB@smIKWNZH6d;FJu`A1J((vvgt!%w{o|LPZh z@b({v@pJsZamF?T@DPgSS7H(huGK_!}Sm>XT&o;%|KMD{tTa#4mo^lUwfVU;Of0w?FyT z*MH@WZ+rX64R-repZvz}{@m?Pf8m=4*w_Ez-#=39WB>m4Uw-5ZAN<0HzVL1sef&un zed7Q9?%(;slK}dn0Q#AC0_dMT9YBws1kk5{_}@QS|M>HP1Ha|KZ$0pGiI+d$zQdzS zN?iW@p&cH5=YbzQ@Iwdw;RFB3f!}rDA3gB95B%_ff9$~TIq;7k_`L`Ii39)Sfq&}2 zKYiezIq=UO_~#D%^9TNg1OMWIf9b%-2ma*)|H^@X^}xS&;9o!RZyfkH5B$i1-*@2O zI`D5F_;(Kcy9a*%fq(D7n*)E~z#lyDhYtMv2mXTt|KWlE=)fO7@E;%ePY(R22mZ4I z|M`Lc;=qp{xE=T-2ma`RKX%}cANUgo{^Wr_b>L4Q_^|{3<$)hR@MjME#DSkY@MjPF z)Peuifj@WP&mZ{d1K&9CUmf@h2ma!LzjWY#ec*p{;D3AIFCX~d9r&38KYQSt2mZ=| zpF8mL2flUSuO9eo2Y%teFCO^cANcDBe(At3ANclxUperr2i_j|wF7_Sz^@fe|z9>9{4{T_&*-_?+$dU|4#@1tph>t|9vNV|L^KlYvI^JCwKv(Jy0eSTi_`Y(FDe!sQv_}TBb`uglU@RyH`c+q**{eQXp z|FS=i=ZY z=kIyX@9zigJKWz7+V^4l{h*%1eFyIQoa*nX?fZA%-&1?u=kIyXf7NsLdGF8jp5Oi7 zcld+HM)&uG_l@Z93Ge&xGsn5;?+Nc4ao^t)KDP0teIE0we;(5JN&5-9@B5@r9S5+# z&%bZPi~fE7-#+&LKOFc!ANao<_`e?bI|u&mf&cNq|Lws4bm0Gf;Qw*p|9Rm5b>RPf z;Qw2y~2l{>Tz7Ico9Eh{OkNeYy|GxQu zwDQ zwto-pdcNnlpojAJ|F7MjYx}?R81TUZ`?=}95&hhB--olGn;ysKx#jlro`2V$pFFhd zd1$%T??d(z@)O5F`N;#H)%PK<`}@Ez9sBj=1N-xeeIxqcAKLe!|NWtTANt=P+V^4l z?+^Xd?C|Js9{939U+R6jzYx7o z_kHMny6;2p(|sTM_m=j3_`(j4p7+mdzWys;*#A%MpLpw+Z$J5wr~WtWKl|i=!~XH_ zd-6YG|J}>~i2YankJvvo{*T!I^LP2*oIl?7>o2~XeYiRHzqs%Ji{6Lr*#Gw{_P&2P z_q~08(dWJEJ)JY}*Zbbr{hxnp-;ZMV;j#lg2R#Qp2fYuP1I>ZvKy#ou&>UzEGzXdk z&4K1XbD%lU9B2+S2bu%Tf#yJSpgGVSXbv<7ngh*&=0J0xInW$v4m1av11~2B{9b8s z-~SiA5B>execxZ!KKQ-X;{H0k=zVDJznt9fzIWd*JJ55`bI^0p`=B|{9B2+S2bu%T zf#yJSpgGVSXbv<79$F5(=+Ec7-kyW@;bnbJ-*xxb;YEKw-}Ro}_lK5;y`KH`={|HH zE<4b3&~wmpaQ;5}|I6BkKCk!v?>@YoeR$D%-}Uy_ z**?6i-0!+CH}_xG^X8$g+uVOSx!-;7zF&5r=b-1H=b-mNbD%lU9B2+S2bu%Tf#yJS zpgGVSXbv<7ngh*&=0J0xInW$v4m1av1NX@Rzwhv}@MZtL!^{4;`^(yQ>vsS5+^`=n z8ejJQzv!Y&q42l=0J0x zInW$v4m1av1I>ZvKy#ou&>UzEGzXdk&4K1XbD%lU9B2+S2bu%Tf#yJSpgGVSXbv<7 zngh*&mz4uA`u9h3z5YJwo`1P7FN$~nyZ@IR*tUHiUNpX}-0Qmg>)n0mK3sO7=b-1H z=b-mNbD%lU9B2+S2bu%Tf#yJSpgGVSXbwD-9C%Tmi%;wH^YT3pr~P@}diTHkf7yX; z+xOu`<3q{8UdR6WcOSYBmmTOi=sD;)xchzZyx*^_=J);Fe&4S4>3P+=|K0z~4s6@L z4=)<~+}`)S`|xu1;d$li*}2W%{dGUPU(edt{qO!?c3|7~eR$E>+~4=T`_O&3>_E>! z&q2>Y?}O$*bD%lU9B2+S2bu%Tf#yJSpgGVSXbv<7ngh*&=0J0xInW$v4m1Z|g&g?3 zkM6J2qix3zAL#cPTJQe6vjfMu==s>sL+_7$ADR#QJ~Th~eP}+sTKUlTsr&2Ieds=1 zcA)2==b-1H_d#=@InW$v4m1av1I>ZvKy#ou&>UzEGzXdk&4K1XbD%lU9B2+S2bu%T zf#yJSpgGVSXbv<7ngh*&=D^F!f&M=3o+Y{_ z_o4f6*@2#eo`ar)-UrQr=0J0xInW$v4m1av1I>ZvKy#ou@KAE#d4C?+`Fq~==OOz( z^yeY_KJ@1y`#wCB9PD-U^|Y&q42l`{qF3@9z20_q+Q(^!@I>4}HJ8@5A%X zhx_JM`}WtT`*7VpJn!?}`TOhB=j*-?eZKDd(C6!Qxz*h7{#|yU=b-1H=b-mNbD%lU z9B2+S2bu%Tf#yJSpgGVSXbv<7ngh*&=0J0xInW$v4m1av1I>XKp96jmWJ13mF`?fZ zn9$GDC!E%$PwUdBb?MW(bn8rLoe8Zop>-yl)}>GD(x-Ll)4Fu)OlX}6tuvu@CY;u# zPwUdBb?MW(bn8rLoe8Zop>-yl)}>GD(x-Ll)4Fu)OlX}6tuvu@CY;u#PwUdBb?MW( zbn8rLoe8Zop>-yl)}>GD(x-Ll)4Fu)OlX}6tuvu@CY;u#PwUdBb?MW(bn8rLoe8Zo zp>-yl)}>GD(x-Ll)4KG&@9z8VzVCkV@4I{7^}g$U*ZZzH&>UzEGzXdk&4K1XbD%lU z9B2+S2bu%Tf#yJSpgGVSXbv<7ngh*&=0J0xInW$v4m1av1I>ZvKyx5-!0!!C==TOE z^m~I7`n|ykr*-Mmy7XyX`m`?HIulxFLhDRuoe8IP>C?LOXC?LOXUzEGzXdk&4K1XbD%lU9B2+S2bu%Tf#$&bHwUKADe2Sal=SIyO8WFUCEYp` zT4zG*OlX}6r*-Mmy7XyX`m`?HIulxFLhDRuoe8IP>C?LOXC?LOX#JbHZs|`m`>6T9-bpOSjI1)|t>c6Iy4&X6T9-bpOSjI1)|t>c6Iy4&X-yl z)}>GD(x-Ll)4Fu)OlX}6tuvu@CY;u#PwUdBb?MW(bn8rLoe8Zop>-yl)}>GD(x-Ll z)4KFN|N8uUna{tTzn;IIzkZ(C9B2+S2bu%Tf#yJSpgGVSXbv<7ngh*&=0J0xInW$v z4m1av1I>ZvKy#ou&>UzEGzXdk&4K1XbD%lU9B2+S2bu%Tf#yJSpgGVSXbv<7ngh*& zmy-iG+rBQ`2A(=U%=*Mr&;G1WoSvBTRy_MBZVu$b6SI!P`}*}mb8tw9^c7Bi`dOUT z(Vz9yS3LXi?4v_(V_koPha}gp?YH0ahQHQ>l05s`?EfAdScF7@$8?tIgk%e z%sLM5>(>v>!66;eS2+3UXK`9bf7Vl9@$AR5j}Fx#eZt8{hx%7Med$++>U|&ca^A;u z-@EUZ9q2jeIp{g)eb5|e4m1av1I>Zv!0VU;zTfoyC!{B;Lv?(G>7O_~(fmnQhv~!B z6SJN?{rBbOLUSfgPt-SI`tiw6=Yy%^iIYF|sc-U4x;a_LC)Ah7pE^F(>5zYg{E6Au z=TDzMujBKl=ep;*=epltH3yml&4K2?lmp%$dtaQKo|wMm{AcyEK69bI#H`~hG$(zj zXUxgzVs)zPU?6fKg>El)#-^-AK$9Z z2l*i#PP#hegLF9Q>Sxg$n0<6O)mQq|uTQxWsNI34PTspBwxbf`YzS)cxuUp;Z^=TnDtNWU-UoaE-x6V+d}T#Wwx`ZS;KESdweFLfMF{;8gR zoFCFB3ooX#ra_RQ_ngM(?9W*KGky_z7 z9nur0Iz3VUq_67w(s!2g!JJ2jS-&rz`sfq7j=Q3H`fx~x^h9;2KJnDkuMSsrKA8UG zd{BLb{1fU|Uva)feQ;IhgXurZSN*Gcu9JQorhnr6iTsnU4*BrAqJEhD^u+YxeE3~a zKg|9~=TnFD6{a7D_wAqhR&&*%IXGPDS;rG|p8BL`oe!!*Iy|eV4~MIH>WTVc)^T-6 zUt#)jnEmt#`PC;*Ka1u;eK<^g;%9Yp&*C&U`_KBaZ{p^`$wyD$if7;Cr$c=(bsSDU zI-LA;$e%dX=__2#%YJ@34kumxES{ai2hGPJJu&Oa`6pd{LVbzpU)9q;adXs@(^ok4 zO}csNFm)VGzW)59KmX{@Ki>WMNAHK;54|6HKQsrL1I>ZvKy#ou&>UzEGzXdk&4K1X zbD%lU9JqfDJlgj210NqeG3z*dUjCf>thwtyi_`jN`_pG09Z$S#J^d@5>m{H1>32o* z^-cV&ZVuE3>5x7lA57n*r!Tp_L_T_QI#f^0Iu4(gKj%Jc?)uN-wEo%t^qEJ;6YpA2 z|BB~&$)|q$UD14f6F;k)1NA{Vq)*5P(>LkqORg`GkDk2mNB8#=-G}bOWe0i=dJcLH zdLJ|gngh*&=0J0xInW%qJ_qvqPv3{atWTW2!pGZwZuxLXPgIAiI$xr`NzZz6ee}ev zuXy(3`X;=qj~|+gLpn?yPn`VpvuG|qp17*>rw^ZS)u;Zv&~@efKJWUx>+|mV=Uwlk z-bcNUdLK0hngh*&S0@L&AI|s3_*pau>ciortHbn7JoW5XpD_I^o^w|ItY`nkpH-hZ ziPlYh;{1>gPgI{U{doH6iF}j3s%Ia5*S^VbUZ0139`O2p7zxIB8Ki;pNSDt4R z@+GEUJ#{?m$@wPaU*W1>eVU^VS3dRhPkhp+IjP^ZpMQnZI{LDX!?QkqIQ7vX|Adn- zb$r6d+n$FoeJg#c-*--*PklbUAD>S>_dWML_x(PqInW$v4m1av1I>ZvKy#ou&>UzE zGzXdk&4K1XbKu$>m_Dzc^!ywj=ZAOI@7ia6qB&35rK`j2qeFF=I-bZ6)hAAe{E6yN9iNbILVefPS98qggQ+KBAGJ z{?wD3Lx*%oUt#+3yW+L$=(jEo>5vZTke;ZXIQi(4f6~wPr4NUXBZufv9nxXy6QA^H z4t+xN64Re`oDYYSuAVsg>52O3c;ZxF=~++y_;|gcI=ri{?=0qg9GY{M^W7D#1NGyO zK4JQBs1KhoeK^#IC#ol&^<`i3oP+b>aMH7$zT}gC@=bnvV$Q)KKMp5-s`Ep>#HmhC z)So)8J|Q3E!zVoJTlJd*CqI4i(N{Rl(WjnxwtlTI`|%Y%9-iZqp7Z#iIu29EVfs>6 zhw3nO9Hx(+$d{PD7_G<@yu#)9=gmLGvJe!t~)#9}ek|zCwP;e^;&#>YtErLVfDVSGxLyIS)^q`sw%e z>xbqff7beHo;rNAc#iR{&>X&rr+(J24`yHL>QH?`KA65q=Tl!HKTLn>_=NhWdg|%N zVfvG=eCkj?o~YiRw>*01PxqnwaM^*LgPwz)gWdE-`B!b-{i|aKI`HWPW`Eym&gaRj;lZ3_WN?;{Ctp}eRR0;pRK14-{Sgl z>%-LbtK*64`m>&#PhXZ8M{ zFYA1XE8n*5Ij4^wrp~XP$e%d*Qs0j2f#&exiCM?Z!S&&g4(V_^t_z2guHNV9e!jX7 z-G|E#^c?gY^c+0A`(X9lQ=gu9>YjTz)HflYe#nof59foEpU$U0aaCXW^;<7dKb=33 z4`y9IysM8t`|)kt^NJ5ooa*MlsZSlIFF7ByJ`T6z`fW~iU&>TE5 z>v+z?A)k44Jdqz}J^S(WC+9N{hy0K)G3)yI@Uy7TTzxpC!?X8ueSQA-`Ty{q|L)_w z|8bZ;`nK)oIyqfma(;D4huOym`4aW5F#D40pVp)EP5pE}s6XpC4|r>8$TA5_mdQ$2mSIZ!`Led2VeKXDuOSzjW5>Jzt~ zK78`?sq?2k)%g?o6Zxk3spBx`(Gwp>9>U12^6ZO;i z>9{%_rjEn(<-FwjVD{7XsYCTdewcMU=dAqd`V!U6hjd7XbV%PKAMnYi5Ax|t9fy-I zb^e^sM_-{ikRLY>rat-k)b%G%UvhqEPWn>Mdh(S&*W-s-$D#G0KFA08a6YJxr!V`G2UEw-&OJLP=T7taAYby-UB^o2Tg@{k{n@9^ zub!AbemK>0&RM^CQ@=Ut$?4`mK0JNN`BPu@^Xun_biPD&e#obeLq2$4J?ER3y7}}( zb!ZL_C*3?g9O}dQ=rDC0@|lyU&VLqjPWGvr5BaiAhx~l>M0I}k73WKw=J916SD(=J z@I(Hr)9>rkZ|=m+PvocP96H}!>-tt`9k}wV^Czl9_3e004*A~n*-HxeK70kn|Su&=0ZB8-xVLn`>^a+hq)eIpE@0?CtvxedVdb{=$${^ zhwj5=2YL>A4tfrHA2bJ=1I>ZvKy#ou&>UzEGzXdk&4K1XbD%lU9JqfD_}qGh)92Mw zAAf#6KKal3vR@xmw>}Q}R+xS~aohI$FZI=Y_3Xo^x$0BDK6NhpFRmi+$|-(MRw6={|HHE<4b3&~wmp(EFe{&>UzEGzXdk&4K2?^*P}CQ<&e6 z;_CT*>Z-1fFVVcSI$zE;2dd+c4pV=;?e~>>qB>N^;r5K?n3rhXL_Yd-9efi`{pmM9 zb=;hZPyJKhq?UzEGzXdk&4K1XbD%lU9B2+S2bu%Tf#$&39Pl~3&)p~Fo6zU@aPm$1w(a+= zdd|VqH~Hy_=0J5kQ9UtzTU>wov#y_xL;ZN7Iy498$Kj-_CvL~rb+(Uhh1TJlI6ZNT z>%!A#ej?wbPj&0)*g~j z>p0{~G$(y{*6HwUJ$-nhxru!A?br`oosO&1@vPIKdZIea`gU9wR8LNa)4WN~K0fQf z$w%kYKOukCQ^(E4r@mD^`^=@^wSV%Pt3IK1SD5|$sarSecp`t!!TE7W&wlz{G3Vqu z6Hnc`^hE2V4~J79oe!p-d>isbKg_;~(_!|fo^>4Z!_pzn;%D_-XN!H!dd{8v z**E!eo<5lMoTCo)t#IYfdai3epE{&N{=`*1{pQoBem=;D>z|N6G5e=FeKlX5Kj*7M zKA8I1etrDVT*!w*I@FKz(ebR~aPq59xSFpHbB=XW=Z8~0^}EjFw@#w@^vR#TiKo7r zYdw8XU-InM$OWmBr$(K3~a}KV)#q&*l($y2qgX%d) zeL{We6JP1p+)v9J6%KTLh{r!V{UO}hFtM_oV8k3+t*c(#uZPU~lV zx(+^d9OnG=o#lLI(fUvy4(W;8aNnlS9GHE{`OHbq53`O#b0*|7kB^Q+`s7!K*@x>- zpE`ZQsh>aRBu}3@REKmp=?~>|=U&I7cm8xAx(}Bf=sD;)=sDUzEGzXdk&4K1X zbD%lU9B2+S2bu%Tf#$%gm;;~M_SfyR2PUV(tos~$i#h36pZfVBpFWs+^6XD;PU^|| z%)g1RkDfjpem3@1A05)6ejM_jMSbS!OC8Vp&2jzu`4Y2lRnNKRo1>0HbCc7z*l(Oa zQ6J=AVa~zz;pRd*yovoi+ds`SH|x0$ALO?_uC5=4eEMMO$+JJXIjJY-Gymq;&y`Of zOuu<_NQe4y$bS~~nWryxJnJ`cj`$L@Z&lB^d41~ULUWVT%}LIm$OrYWFz4XA4tfrHA2bJ=1I>ZvKy#ou&>Xlv2Yg@Z`&Hkc()0UM zJdq!&W~gopEw;({i&xv>p0|x+qU1&tNK}=x#q!}V;yrKT_0T^ z9apC(^1)R-eG{kO#D2LR$Oq~0v(e9&$Pd-^Cr))b)R&x|*yr7Te{>(Z50@S2Ip{g) zIp}@R9B2+S2VVainBFIE+kXDNU%okb`uWsx$Pf9vaTtjV&|GzWke--*>iX0nJy9PY zKKayP_9Zump8RIp?|<{uaY%>Rmp*)p`E+w|K1g3-&RKEuA^(Kd=f|Nw{O0(2`Ehkf z=c7Y&`EYf8P#x!kbV#4jJjjQ`K0o*Sr~A--xa>gBLC-UzEGzXdk z&4K1XbD%lU9B2+S2bu%TfwMVqv+egwe*T`^=kZYeY(GETj&-5AI3JwWq3ch7>gsg3 z($%59?f5!y^_%$m_+a|=spIDGt3!2uzKQGO)1P|wtHZ3DM~CL)kUvpfAEd+Vn|S(h zxE-$(ZVrA|)Sq*6jygX~AN}n8T;HR2{&XL@50@QyDCgj2+h32>eeZrxlnYZoozHsa zsMB%Cr;eKkCtW=;{rJtXA29opZ--yMdUAUD`6lGoze0YPb8n98HHQyZ*N5|^t`DlG zpU$U_L%y51PCDdI+=jlhx_RlRPuDfo=`gP!-{QJg>*PHB=BY#V#PqA{gM5&lxE)`| z&4K*%^e5-j2PYrD^>BS~>Q5b?`m#Rxw)pxdA06Hd=Rn=uypD;dt{-OII`qU<-CRCM zhc~f5t98ub!(q;|4nO42`lR!zLpuCyyl#9qq4TTbiR$L6PkdTOpLxmYE6jEIZ{q9Y zQ%_Wf+t9DBfAXo{9CP^S=0G|heT6H(I-KTNe?oogH^pw*^<{hv~!9Kdr|P&E;R|>BDpVoSWP_`V+IB z{S&88Xq}tbZ+`XU^t>({@Nwnv>*7!3yH=n2b3P9By(qpN`*9Pm7d@}XbtUpa zbsVnrv-R}hH}Uo4x_oqg`h?ps*Bodbf9g{`*Uf%&;L1O(v(n8^)Ms6Fe8Mfxl|DK? zp}99F_upK7(_DJ`aA@81tHUj>kDh&esZTt0b0<9O=Yu)Vb<+7DJ$>}_;pxZK^{0+Q zK7H^e&KsXTKB$iCPtFI;gOguf-$VJ_x!3XNoj=`&?!#pVdJcLHdJcLYGzXdk&4K1X zbD%lU9B2+S2bu%Tf#yJSpgHg==D_1^f8C(Zw|y>6pZKJk1Jg%0KQZh2=@asq$2W0) zKAay{r{j<>k)O^#Az${Tt`5yhPEVZX@tKpTFMYVWzT|w64~P07A3h;pBLAdYSDl}Z ztEY~u!klWq=7AN_IU1fF&M^a=TG)Kq^Fw`!{B-^a`LZu{ zb!c94dg3&X&zwYk>BH6aCFg^DIMfIE@Co@6`6u1F>il$EJ#}0iu5|TjzIxvW?(ZkM z58a2$4)h%K9P}LYK4=a!2bu%Tf#yJSpgC}T4&?W(zW?<7X>$Io)A{lAWgX|sJ{0fdEkbgox z_;}m%Hu+gS`*PiBj=nzc`n>D&?)v9l@1x#Fy^neyH3yml&4E`Z2lD+g?)|a%&vZQN z6W0gzuaMtds84<3^z6H&wlj@&DRI%`jV%vkB(;_4*4M+ z=A7i&mz;k>es$~8CqC&@AKf~A9`NxM-=(fj=c7Yhy`z_3`8CF!kho zP<=waZP-tB$T!WK>iYFfK01G5_D_C#pHKUJ(tYSYTy~)6py#0Hp!Y#@pgGVSXbv<7 zngh*&=0J0xInW$v4m1av1I>Zvz}Xz|dHm{gdiBZg^LlldeSA=T;^sg;9OfK8n00*< z>dX2xC;iFwC8p1MeCoJ5eTCVFullUR5B1@EE6hIqbp5zG9Zq#R)DQLJXHj3GbrL6E zu9tmy`pi3fKiAjif1m#k@A>aOPISN1@on4B@xRFLhiUuIB0E(?_3>Umb^ZIL%EzAIyG!bsVOj52{bdXC9uYZcf%$ zy1F^~>3E`g_D#N(ZXTbxsV7fA&X;H|Oh2xksGf7_{5W6o^r_SJO~|JY^5c-cMQ&t0 zeN&x2Vb0lx9MrE4v!4(0;rz+ziRm+s4^PxL>FUtD$@x~OFMT;Ted_wuVb*c=vwf4_JaZFWCN3C&TT zxOo$1AD_86On>^+p*}u1>FO|jc;dEr-kQg!pN>O1q-Q^!FY7p*eC8zbLvtV<>c{!$ z$?1vNuaC};Lps#A;;a6w=UnwfzSQ%2vYyj-Y3&x z&Y|nW)0cG|^5Zb)@g?%WBO| zU+T&ER;Z6J`F8UB>f^_AZr1rGT|MW}`*WB_@BHaLbRRA|&~wmp&~woHpgGVSXbv<7 zngh*&=0J0xInW$v4m1av1I>Zv!2NT;=g`pS*0|55Crlp>^(D7X)>EJAIgc;zgorIMkQiI$2MBs^>hu>`%@I)y>1FI-feE>&Mj-vySWM zPaU7?sq;hi#Hqf;`JDQ)AD{Z^kRMt{9Z%$+kU#5ms6O#6_JKc9KR=}Nr>+k9VBe4K z&x`Iu_u;YwJqJApJqNuHngh*&=0J0xInW$v4qTrD`TZvjeLsrBZQIY^ROd_7m->q5 zI?1R0l}{a-Gx6>Cy7?ggR8O5h(LAV*C#u7&Pkf8lZ`Ggv6+gS~wKx> zkS|dks^f|3FzYLxefQ=1;I{4gfai6jj<5R6;m6P7s!u)9{7F}zF#EP+fB4l`eC5-h zxDBsI>gGdp_*OjolIy4UIlA8`-G}bOWe0i=dJcLH9^QT6`RBQn&#_5QAD*ZmZrgqz z@FnLIsZKYCA7&kgd~l2XqVs1xdHV50^HQHU{}$JWt55x@ z^F#H!a(!D|mpO_2bbLZ{`5}LD`U+QleSSWA=TG;c`*7KTo`ar)o`c>8&4K1XbD%lU z9B2+S2bu%Tf#yJSpgGVSXbv<7ngeHZVDXVNSx8u4YJ?G-;Fm)WJkDkakA-{U^E%skMQGX&I9fx-^~tY~5As7g)R(xbPyX!NB8RgN*9RwE z-CW2I`6irvbhzs0a~+dE^{FrC@abRi$w!Adhn~1y`TAK0=6u{bILyBEWj%S$QHSdA zzVr3NoI_8Xt|NW;HoP9{=0X0fr=E43&pfCO`EWS-)gd2DeZ}wFH_fFdZn6Ja&+Aav zx8nLI)W1Ugd@Ife`5`?~9jfEov0rqkjwfzIA7A$4`f&bh(cEc{dagU^=J3OnuAZoW zTkP9Z*KaNk>51y*;(R!y!?QXc)StKwUw`(cPo0h@=6dS%@$?LZs%sKc9^;?&J#q}lXPdz!`g#1%|8_qv}_T!2CiG0&K>iUw? zC*)7$r{jrtt@9_EOHaPVdCWQvr#b2{ee^9}S3bxGSGqdXm#7Zax8r&sJy9QDB0nDv zQ%}x!E#}v;6epYB8V z;j#lg2R#Qp2fYuP1I>ZvKy#ou&>UzEGzXdk&4K1XbD%lU9C#IT;IrHQx_#)YW^OrJS?S;tpsPGa`0>ipC7WIrF&e-mH7K6S{4L;kaWSt|de-rrOV2qs$G&ANp=APs}=Q4%CMyPIdFt z6U~|QtmEePecUzEGzXdk&4Fi?1HSLP+4lQt`hGQa zTtD>vE6%q<{=}Q(y68E_dQd(4^h5qc{d{<0)^FlE&7JzPp8YvL{W$#Wu|7WaTZcb6 zJ^ShAPn-_Tv5tPIj%S~G>ip_CKmC04#Hmit_2@Z=FLhj9KU9Zw_^j^t&e?OW`_O&3 z>_E>!&q2>Y?}O$*bD%l!aC5->U^u;xR?qj*{PaY1{WsfwKhXKrQ_niimwh>3ovuI8 zJbH3ER5zb~7WF}WIJC~qv2S#Km~~wLs*le+NY8on$w!CQ*O&9uCqG?3Kdw$s%=*O5 z)1N*%-_7v4sQ3A|-|yXr?!#pVdJcLHdJZ1mec(CgxdrLa^A7hsOq_h?CYlFx9(}@X z+wUuNJg;NL^_c^w>oE`Nhx*iUb7A`Nsm~n9kDCkCAs^&RRL{Qj;rx((GrT^0aPq4| zzU`O`>3lHje2@?FLpnd4^sL_;*E#v=F#GX7Kll5*`_O&3>_E>!&q2>Y?}O$*bD%lU z9B2+S2bu%Tf#yJSpgGVSXbv<7ngh*&vpL{%cAvw;ZQJi7b$D~=kUvpStQ9qpOIhRjg*69;wKc1-He8{JsoKHPD|IKk8aDGVFkE_!sG@n2F z>4`Z{J#{?mEABe7pPtuk9@NJV)svsSpX=LmtNYM>xa`0~IR|+^-)#HqL&xQU9Du3o zqfcBvOrL)BM0I^rJ$3#G(~t9Su|Mii9ZyuB=Fzh+`OR@1spC0k^7Fy%msVC3*>U11VdS1^q?5BR1bLolc zGavHf{E*H^hbuknI9$y+Ti=d-yNTD^I*I%+>(*6Ip8lKRb)&=V!_DWzSGXPP@|j16 z>W~kgkPlA2oA~;-;kwMpI<9UV9Ma*iFcN^R3W2kRONU;&6-o<4YY+ zv>wbluKwBBM|{HUORjG_Irrx94j<1uReYkp} zIea)Ded6@&qeJy+-p%lO>DQM&dd^R7PNM$Qlcx{2&g56;Pam!hZ(=_tA3t36nX?`9 z`Jp+9{B+zrm~-&-o1>1y^e49-ed(>YAkglH&>5vZTE8HRv^xwpJ z$~jrr2h*S2x=>v|Ont>~;<}UT-wq$7TbB;i?}|AOhtvE?*9X%_hpzYL_UzEyskO0-`$TM{oY6Y+m3hTx?fb^UDx4% zU0=t3UHb{_K6D>0JJ55`bI^0p`=B|{9B2+S2bu%Tf#yJSpgGVSXbv<7ngh*&=0J0x zInW$v4m1av1I>ZvKy#ou&>UzEGzXdk&4K1XbD%lU9B2+S2bu%Tf#yJSpgHhr=fHmT z*Z+MNf1l*qc|A|B_Ic{-*Vpf|1FLhe@6yA`o#%Z$tNmzwe+9Y^-G|E#^c?gY^c?g) zXbv<7ngh*&=0J0xInW$v4m1av1I>ZvKy#ou&>UzEGzXdka}MmMZvKy#ou&>UzEGzXdk&4K1XbD%lU9B2+S z2bu%Tf#yJSpgGVSXbv<7ngh*&=D>^0fydjv-`C>JG3T!MqSx*HwZGoohwj5=2YL>A z4tfrHA2bJ=1I>ZvKy#ou&>UzEGzXdk&4K1XbD%lU9B2+S2bu%Tf#yJSpgGVSXbv<7 zngh*&=0J0xInW$v4m1av1I>ZvKy#ou&>UzEGzXdk4>1Q`^xrqxw*4GT*!!gS$wRzP zdM=;sxx8!cto|Nd=E+^>r+>9x)^q*jJwMO({B%FNpM6gB`>N(ZbD%lU9B2+S2bu%T zf#yJSpgGVSXbv<7ngh*&=0J0xInW$ZvKy#ou&>UzEGzXdk4=o3N@1y&D2_J1chaWyTe0*@|_e%9c{ZK#D&ky+_Khy{H zL48mkd}#aM>)Fp+_o4f6*@2#eo`ar)-UrQr=0J0xInW$v4m1av1I>ZvKy#ou&>UzE zGzXdk&4K1XbD%lU9B2+S2bu%Tf#yJSpgGVSXbv<7ngh*&=0J0xInW$v4m1av1I>Zv zz-0&gJrU^di9ml(1U}yOeS`X-KBy1!K|a`Xx$kfHq5E*zf!FmMxc{O1AIb~J2l*g< zLOy6Ne7x=F4dxtuP#;|RU)R^MU)O$ax)0rl%MSD$^c?gY^gd_~GzXdk&4K1XbD%lU z9B2+S2bu%Tf#yJSpgGVSXbv<7UPKP8elF$bSJ2PD;9dJy{+|1u`xkNUd(L{!dd~Vj zt2xjdXbv<7ngh*&=0J0xInW$v4m1av1I>ZvKy#ou&>UzEGzXdk&4K1XbD%lU9B2+S z2bu%TfmbO9{Jz%fhBw=O4qrFij<4&X;Olm7d_Aw*x$*Tplyl=c`n=q8s{7D=xa>gB zLC-UzEGzXdk&4K1XbD%lU9B2+S2bu%Tfftnnejf6=p`VAmZs_MK zuN%Io^WJs(eC_kK&)4QabD%lU9B2+S2bu%Tf#yJSpgGVSXbv<7ngh*&=0J0xInW$v z4m1b;pd5JJes1dPdEI_)>g##kes1dPX-+k#{-B)d`RMuR`RM!E=0J0xInW$J+xheLyehff>+Ji?zR&FY%;rFIpgGVSXbv<7 zngh*&=0J0xInW$v4m1av1I>ZvKy#ou&>UzEGzXdk&4K1XbD%lU9B2+S2bu%Tf#yJS zpgGVSXbv<7ngh*&=0J0xInW$<-Z|j!$6Sm4KFYP|?|)p2*M2?vul;)TU;FjwcU{+_ z>$(=Ny$=1?UWfi|+n*cPqU*XAUDvgE?boCK+OJ3dwO^0^=bdkz-}g~{AJz9!&4K1X zbD%lU9B2+S2bu%Tf#yJSpgGVSXbv<7ngh*&=0J1cy*c3X{%g_a{nz5Q?dSAby!PwS zf9=Zv zKy#ou&>UzEGzXdk&4K1XbD%l!;&b5IpTp_D_UCZ=ul+fke%Ey^x~^-{bzO_sem(lH z{d)9Y`}OE={x|=d|1UoOd*AiG>wVYzt~t;gXbwF49PoboTJ(PVTJ(PVTDsq|_>(PJh*Q5X1uSb91 zXY_qW-)B7g_ZhvfdSCUv>V4H5Xbv<7ngh*&=0J0xInW$v4m1av1I>ZvKy#ou&>UzE zGzXdk&4K1XbD%lU9B2+S2bu%Tf#yJSpgGVSXbv<7ngh*&=0J0xInW${%zixk z@$AR-CF)Dmmze!{_T$-)>r2#^s4p@5@$AR5AJ>nHkLKQa69?8mbo*O#a-QD0*AZvKy#ou&>UzEGzXdk z&4K1XbD%lU9B2+S2bu$aP!8nJz3}|G7oI=&!u2KUOVpQ`{do4{*^lc>)R(9)G5hiC z$Fm>Tm#8mMUt;#-*^g&Gt}jtvqQ1oJ$Fm>Le!Mx=oce=us^_EUqvxaVXPX1ff#yJS z;8n>1?}rn;A5QdsI5GS2?8mbo*O#a-QD0*A{%zixk@$AR-CF)Dm zmze!{_T$-)>r2#^s4p@5@$AR5AAePHyVu$GnSGzx_nFOs=0J0xInW$v4m1av1I>Zv zKy#ou&>UzEGzXdk&4K1XbD%lU9B2+S2bu%Tf#yJSpgGVSXbv<7ngh*&=0J0xInW$v z4m1av1I>ZvKy#ou@Vs*%|9*zQe^GtUpYB8V;j#n0FZOfrtoPx0=UeCRuUq$_`*7KT zo`ar)o`c>8&4K1XbD%lU9B2+S2bu%Tf#yJSpgGVSXbv<7ngep6KmXeEp!@K0_rZBA z_W9K3Q=d=Gf#yJSpgGVSXbv<7ngh*&=0J0xInW$v4m1av1I>ZvKy#ou&>UzEyv!Wv z&kgtU*L`@o`_LS04!+DB?D^~Y>-p>Z@#a8tpgGVScvd;k?<@8^={|HHE<5n7|NAUG zSFhH&dRDpGIs5D0eds=1cA)2==b-1H_d#=@InW$v4m1av1I>ZvKy#ou&>UzEGzXdk z&4K1XbD%lU9B2+S2bu%Tf#yJSpgGVSXbv<7ngh*&=0J0xInW$v4m1av1I>ZvKy#ou z&>UzEGzXdk&4K1XbD%lU9B2+S2bu%Tf#!f5*zbl%kN(SlkAV=V4wWZPhv}bmzPqA% ziRP^I^j*uXH_f51Fz2K{>&erPrw@neOIm-`9($jY>x85{|zQUZ7{;VfYKb}4urZ08%M83qk`t<9Y zc~L>dSg^tI#J*--_oP{t45cTpt~WbV!G(C+BBkfGr=EP}Q%~%3Zr{o7L-*mb13d>l2R#S951Iqbf#$&L zp9A|TfA8-Xd7rEfQ^ylme)S1+9zOZ$e2MB%9fzqWzppRn;BcBtPt-Rd|5T?>K6+yI zsi%%ZK1hdm)w3`AamWu-UvYhj`X=mibl;ioL-*mb13d>l2R#Q5?>^X1<-7l!o^$kf zo%_F~o}54Rj}Ehrt3x`Z-xtkE%z0<) zXZy^X{B%Az>FSC6iG1`G@AGrtq3%QX;j#lg2R#Qp2fYuP1I>ZvKy#ou&>UzEGzXdk z&4K1XbD%lU9B2+S2hQez&*ATj``tGGbDEnSe5vDEU-4C+e#j5$aMGvx<*AMl>Nmqw_aMGtbf8yj{>FSB* zPr5qfn>c;K?B|v*C*dUE<%yl?;1 zM~By5N6x{a`N`=}9j1=2F#GvaKg;z{mdXdQgQl}{b! zoYe6NvmfWjC!Bn#uekZ?qrrSn7)bY%lf2GzO(-9Pi_uW#~~fk6Ib>04`qCPUM?(^}C{Z`fx};i~3;prH(^>{8?kpPoC?k!>LYB9}d$;hw2li4~N-D zKZ~pWRoyygeUpFHpLIOv;fefEJvlv*FOhH3)$fYt>BAxYEb4>VmpTsl@n?-WKY6aF z4yQUjeK<@X9jZ^5J{)Er{VcBfS9R;0^-ca&f7bDwhbQvGz7O1YsQb`;xa>gBLC-UzEGzXdk*XMxmH{pJ&-uut9^_4Ft^F#F&KkJ+3q<+_V**|f5;@S1m zmvipR%}q3qKJl}Khe2Hi4e2IPD?K|3i=ssL_py#0H zpy#0XL35xv&>VREb6`K^@BV&KzCX_T#8b~Xc=~Y2m#Ch|mpZPVsGd0aQpfe5<&!UU zbD{cKocid{you8%%znN}SBI02o~RG1;}h~hzT|W`)#-`)V4tJ=PIMo-50@S2Ip{g) zIe2*YK|aUu{j|RK{UIIlUyFC0$3J0S58qimeYiQ0J|Q1W-%3|cJUeIdnWv6JI;1CN z9Z$@@ZvKy#ou&>UzEGzZS+fX~_Ce)qihpX!j~EayvH%}M{n%}L}>9am3O zzbodPiRWBC`n7yD$9%{S=@arzIQ6BjpPqcuS3Y%^bLcSZ_=Nf(AHKrLPlx85y`Ssb zcdYx+eYotvLpcZT=Lz>$>)qcwIm`LrYJU1~s6RO!PW9BU<+QFsV zJ#pom>hzq4XFWL|T-DRZ57j4}e3NcY)+e5N_N&9JC+AD#gIUM#T0iSw`K+Hf_0eJW z;aQ(J9cDj0F?~4XPoBE^S=0~pC8wXoXWf5x9v{5x_3DHACr(eC`sioTTz(wVC**@G zpE^IJ!@KI)m;K2nfBNwIqIFi7^ZDs3&Ii*^hw2mZCGw}9eDd8_*9UXGvpoCp33D#r zq)&Bzb(lI1`5-+}eL{ZqyYlSQ57p12z7?9sx8mvJhf{sh^{p`bak%Qw`qY;?4$Z+Q zoP6{|bLh!e`l_DuaH#(*W*?qt9vz2tIO*!}tZ(v9{q#h0;8drl4?m0M+!w7g^=BQ2 zXZt1}-MmEgyP`SzlAqP}C1$_+N>_*3M~7L*;k7>fiRMq3{^V01edSl@hx7^0`p)*B zoj3VX$5)uw$DjJdC;v*%`o!<6>q}gH9sGRw3e%6n?5D%5PdxoN)Q3ZQ;#8+kXwIt6 z2k-0G|4=@6?se?1NcW-paM^*LgPwz)gWdx0ugIy~!7-(CCDZyucLbjT0skPhi9&f|`dZKz_`jYe0&vHJfAEu5&K1feghgna~2i5P(^}%T#9iH{~ zec--h-G}bOWe0i=dJcLHdLJ|gngh*&=0J0xInW%qJ_q(w^xpsO!=(HE74pH<@fA+} zbZE}RpOrqXGwIe{q5cW)+RvZ!=ujPp^s|_K$)`Ddcg38ipN>O1q_2=4@?XnyPR>(@ zS;zal+jpq@(0#b}V@~QJfI;2B7OdY=#bN*@`Kjgobo3ldeoJI5a`yAbOr2Ei) zxa>gBLC-o~mDr$2EupFe%6t0$%ppHRPg z^3>Jgs?L|FFLC8l&puoorv9vXt~1T2CuTnm)4$^S`uyB?qWjQ&xa>gBLC-UzEGzXdk&4K1XbD%lU9B2+S2bu%TfwMWV-!1R`r_bkAl21Ax|fRS;j{9aH=%W4`cj`b9Zqx9;aML)%)W`Q{8`80G?za4CS4!ogLFuTlYZCw zedp+dxo+y}FzXYi!|YFeRp*D3Umd3JzC8QPpO8Os@=y9*`}xgFPM`NWLE1Iis#rf`v<|dj$$Kj-Boi8zc6Q@ISlG79U;MqEV;?$oyKJ~A3 z{qU~->^sZ(pmlLL=~?H4>WR;*ZZ5p*y8Qa#q)&Bz$Ol)tI$ZUw>NzL<>M-lc`QWO~ zcVEo;$<3w1)NzgtJniF|Y%((j6^{;c1XPuFA4g!<0b zC;v*H>eITZ>xb$O<#Xp=$Nq|RAG!~h9q2jeIp{g)eb5|e4m1av1I>ZvKy#ou&>UzE zGzXdk&4K2?tC#~me}?<3xc>cOKB)h`xSEr7JTcdw>U4fQQ5{ZoIv=D@$OqFm>3mb2 zo~RG1I&2Xh`BuIhYnhwj5=2YL>A4tfrHA2bJ=1I>ZvKy#ou&>Xlv2YmlI;eN{2pX2K1hm)>; z7N7MT^Pua%Vd}}#r#@l&@d_E>!&q2?@!@CdmQ~B=CBl0;marz3)hv}!otS9G#Q=QHS>2T83Az$KDrzh%9J^AFL zC$9R_pIkpZ`K0r~^re2bo<1DrTs$%T>h#2{0JJ55` zbI^0p`=B|{9B2+S2bu%Tf#yJSpgGVSXbv<7ngh*&=0J1cY!3K5e!pwp{b%YpeAl`E zTk6UA>4|r(^F#BJ)1msaLUZAL=jw-N=kUW-U;2{kr^D2f-_^$t&4Vjl9cCXLs>9SL zPCtv&{FSZ_bG@_obA9^`b|1PAmmPR0=OFLr{ndHzKhu|7$5}mn$)|aIS;rwCOnu^$ zZcZZqS=68YN$1OYa=sPnn{et+KOY^3lRnk?SEzr&sekfct6MMU(9fd&^rwzbs9zmV zREJZY&X+jVQ%|0Aa6U+%kPq^$INu7VIjQFy^$90GT^~N-%6GQTH=*^_ahQ5?KA3eJ zrjNeD`}XDBvvc?o&7&u$PsqQ*sh_TY#rYDaK6+yIPh6im4pYZr`clt2p1A7EIu6ZS zaXy&-)YYMSV%G6QeRLes6SI!5(46$q&!YbH(^sgEFZoKpYkiuZbJB+=>RX|{v%d7> za5YDrKQZgc`BTSX`chYi>W~gopEw<^`d2>v6J|djeL}voXb%61r;i`1Psj)PlG9<< z@d@?8^wD9~lcx`d{5VVgG+z5BZX(uAZ1a9P;CdQ+?9!+CR;q-xaN&eW~M# z`sm41S6`t%nEur93H4_^bsX|foPHL~OVmH8&4K1XbD%lU9C$r);KSSgbv#sjd|cl{!JFfHUKBpm*Tr?c?APUa zzYfj0Js-Lc-G|E#^c?gY^c?g)Xbv<7ngh*&=0J0xInW$v4m1av1I>ZvKy#ou&>UzE zGzXdk&4K1XbD%lU9B2+S2bu%Tf#yJSpgGVSXbv<7ngh*&=0J1cnjG+Z=r0SO_wT=7 z`}^(Nwx6#Fd;j(RyXOAu`RV!T`RV((=0J0xInW$v4m1av1NYB?m-T(@*>#`y_q(h4 z*M2{|>Yu((KC3tXnt#o|`{!TppWZ*ce|rBk2bu%Tf#yJSpgGVSXbv<7ngh*&=0J1c z#pJ-*&x2oD->pt(-cQyao*LT%Fy{>2VYhVBN zznGluK6jt{zP9gcn*+^(=0J0xInW$v4m1av1I>ZvKy#ou&>UzEGzXdk&4K1XbD%lU z9B2+S2bu%Tf#yJSpgHhza$sNe-GBbxNB4h+?^d4H->>x-y&o@nJ+CW#zpvk1+D}0D zq5E*zfu4h&gPw!l2hD-zKy#ou&>UzEGzXdk&4K1XbD%lU9B2+S2bu%Tf#yJSpgGVS zXbv<7ngh*&=0J0xInW$v4m1av1I>ZvKy#ou&>UzET$2O)?!Eg@zqbzGt?d6shwt}# z@4x*7bRW78mmTOi=sD;)=zY)}Xbv<7ngh*&=D>^0fql2%{ip9!;k%Xn-{`RQr+2`< zz^AK^9?{{o_2$LCgWZSj!(|704tfrH4tgIn2bu%Tf#yJSpgGVSXbv<7ngh*&=0J1c zVdlVoRqy`O=cDl5%KmS3*!t5uU|-v^z**H&VPCboD2AL_3r1Rxw`LQ_o4f6 z*@2#eo`ar)-UrQr=0J0xInW$v4m1av1I>ZvKy#ou&>UzEGzXdk&4K1XbD%lU9B2+S z2bu%Tf#yJSpgE8^u-_H${$5V`z61IDOXW_+`)hylektEy|L#Ng;j#lg2R#Qp2fYuP z1I>ZvKy#ou&>UzEGzXdk&4K1XbD%lU9B2+S2bu%Tf#yJSpgGVSXbv<7ngh*&=0J0x zInW$v4m1av1I>ZvKy#ou&>Xl+4(z+tzrS$bzn{?it@qnq?zf(oo|m4NzE5ioGzXdk z&4K1XbD%l!>~df~P5plMzTeNj?EPyVHV>PJ&n^#pKlOg<{nY!ZInW$v4m1av1I>Zv zKy#ou&>UzEGzXdk&nE}=sr?-K{rWj{U;ljv-mkBJbM^V;YWKDKdf9=VgPwz)gWdZvKy#ou&>UzEGzXdk&4K1XbKvFY zfZq$6@X@xP(+NL()F*s=)F-?->Jv`aMW3#VK3x}mx-NQNmvtw!?u6Ez(7JG1hd!-C zpVpyUXTqBY{`%O53GKs#_F=+4ryjlYr~A--xa>gBLC-UzEGzXdk z&4K1XbD%lU9B2+S2bu%Tf#yJSpgGVSXbv<7ngh*&=0J0xInW$v4m1av1I>ZvKy#ou z&>UzEGzYH9f$8_Y>C^9h)2HA2ra#{H*JncOOlX}6tux^?mp;v<^G(?MulL_I_g~LX z&ri=!-^Vovngh*&=0J0xInW%qe-2FFr_p_1H{tYsB>nNWpU(+zj{1bIW5VgW=&obJ z>AL9Cx^(MIIIT-JXToVNz4^DFukJ(l;j#lg2R#Qp2fYuP1I>ZvKy#ou&>UzEGzXdk z&4K1XbD%l!Fmqt~{FCnU&Ix^m^x(}Bf=sD;)=sD

UzEGzXdk&4K1XbD%lU9B2+S2bu%Tf#yJSpgGVSXbv<7ngh*&=0J0xInW$v4m1av z1I>Zv!22l&{N9@1NAvq_e($gK{eI~F?EBF3vG2p{c7M3fi~H;Qy6i*W&+M;f_o4f6 z*@2#eo`ar)-UrQr=0J0xInW$v4m1av1I>Y_bHL{x{oHfkmwxWK@577w-1B+gkIr{- ze_eAQy8qvNUvi(G7k}RS-}(FN+kNOhTy~)6py#0Hp!Y#@;OQKAUhi*Sx4Hd3c|SM( zK6&4V7xjIz`>5xy=kKxuFZ%xL^Kw6T-G}bOWe0i=dJg`ebKrSh+|NnwL+_v7KY!5u z)AP~u(eu&w8O?#_Ky#ou&>UzEGzXdk&4K1XbD%lU9B2+S2bu%#mmKiZvKy#ou&>UzEGzXdk&4K1XbD%lU9B2+S2bu%Tf#yJS zpgGVSXbv<7ngh*&=0J0xInW$v4m1av1FwD#`0qqO|Gkdn{`(b^p8sEw^d;}SJwNQj z`*r{N9O-l9Wj;sb63qO&R`2=l`F@$_yXUXx?;)H&ISQMzJ(myRTs9wj4tow?{m(Ia zANM})ecb!FInW$v4m1av1I>ZvKy#ou&>UzEGzXdk&4K1XbD%lU9B2+S2bu%Tf#yJS zpgGVSXbv<7ngh*&=0J0xInW$v4m1av1I>ZvKy#ou&>UzEGzXdk&4K1XbD%lU9B2+S z2bu%Tf#yJSpgGVSXbv<7ngh*&=0J0xInW$v4m1av1I>ZvKy#ou&>UzEGzXdk&4K1X zbD%lU9B2+S2bu%Tf#yJSpgGVSXbv<7ngh*&=0J0xInW$v4m1av1I>ZvKy#ou&>Z+? zIpF`J3iJP`iu?b|r;q-4+xJ%;>QAn3(mQY8&)0qbnoIlZ+vOI-&>UzE zGzXdk&4K1XbD%lU9B2+S2bu%Tf#yJSpgGVSXbv<7ngh*&=0J0xInW$v4m1av1I>Zv zKy#ou&>UzEGzXdk&4K1XbD%lU9B2+S2bu%Tf#yJSpgGVSXbv<7ngh*&=0J0xInW$v z4m1av1I>ZvKy#ou&>UzEGzXdk&4K1XbD%lU9B2+S2bu%Tf#yJSpgGVSXbv<7ngh*& z=0J0xIdCoq{Qn;Pe;uL!zCP~1yU!2#aLAv?e|9dvImvUqtm{jk_4$)eef;Xs{He|d z`4dW{bm^))Y%k8T}0uD(KjiRM6a_~f=|3=1+A#$e+0CSAX35 z^qrtu?SuV>_77hnKjgRW^!o8bKK;r0)RXsp$2%`~|9`2!)qS|^K<|g1gPw!l2hD-z zKy#ou&>UzEGzYHFfydi^j(mUW`_SZk)Ay_D`XN30)aetZ4~N-Dhw8YmD|H+`KK3R1 z@I-U74>wmIKGo^`&^i-0mk;O1Aw5wYng{7n|Knrd=#b7w$IVS2oj*C<9Gnl*Ctudn zrw+~K!`0#AI7jpe`Lo{V-JVn3hwj5=2YL>A4tfrHA2bJ=1I>ZgKL@S zUMJ>#Oh?S?HLWjbOXN2PyyJ9s-JTfqIm z{lNX;&({w)UU3{G=6I$p$3>eVzBy#uB0fI=#kB&;0atVx5kdmiMm?Ge26UCuTmYv%F8I)4ay3OCNQe`OvPe zzP!Gs>7&Xezoz*x9c>JLKubQ#`uHjR#@oT$A=v`%2kr;%2i^}{2V4hS2V4hS2V4hS z2V4hS2V4hS2V4hS2V4hS2V4i%R0p`8ulEPY)ABm8UcX;c%==eo^?tzW(zAZFQR!LF zsO#ieKGV~#E`Q!nyQb+`Ueta1I@6J7Ike=NPTSR8kIIKUZ7d;rtMk`9)6+6N>p`op zGaY%Rqvds4&1=kjXn9`@&%yMxyw3FGnV#3l6KkvQ=XK`8`?Y1dHSN!yPW;i^!P_C( z0)G}iU_Xwsy5CbTn4Z^(wdHkvAJb``>3LmarjI4WkBFIGTV7{A#7sw?mOL%fN0k?K zpT0jTo#u&IZd5)@$Lqw}MqSr@4DG3T;wY=ji^5U&udZC3pH9zq(&;pgDxc|?FD-fA zN1mAFlBd3BbRrYBF!^!j=fW`4Yn`Dm-J^FF52nCYV|@1te982pBq`O@;b zzE7v;bz0ufbmTSGyuOd=X_;PIeIKu{DIb+T`KWwiNnMnEBA^>%^=d(?#Xa z>%326=ELj17xO-Cbvdjbt-j83X_=0g`SU(n@;V(cE%Re~ZJ8g_5!34QOh;aq6LlZc z>FeZanJ?36%j>JNyicbiA5;3nbgT!{k!N~dCr_-?^Ez$Rb@I%I>1kOmG1HSL*7uX= zecJMR)P20p`-=~>?_)YzUe{J%kII+#F}*H_*J+uK zmb|vQJfbSTJlV%`8B;Bbsz8Nb#3)^;@@kTZ%nlxpNp3H zGCeUZd0O5_OMZ3fnT|HqX3bwdHkfSq{@jm8W@) zbw0!_k2uOQzYNu$QR!F5v)m{fRStPtma{s{a&&s$N2{+B^ZuB6e)3W2cwJkTN6Y)8 ztiC@AGe26LUrg0sUnge%OixU!)3LlLtMB9e%%7O|6Kl(Sm`-Dzo|x&PEbGC1X_-DM zADvF8=Y6ZQ%!lbTCLfi5OubiqKQV1oeo^`JK3d)%h5sz;&R&m<)IVwc)#M&~S*lPTa_c6V;tcT`_ zc|X(9>g!CWtxm`Lqtfv@?<3afHLuf0U5~A%z4U#=%%65m>!Z{0er=hKe9Zf4M)Duq zMO(I06tAzdJ;~GZe!gd7UXS8+I$~{^PYmyqnEBH3K3>q)GyvpilW)|S_4^>vMzj^*p?%!j;A&+FugX*I7g^L3pQo$_|@ zc1X5>`+@s``+@fZ*8$f7*MYyk4lrJ(Wt`0Tn3mUxnNHhSLi)|>((^j+C#GdSERUAy ziJ6|5R`XH!Me)ps*Vi;Z-pBN`nkS}ZIoj&$#LSoJHBY=ctM8BD{n7G%rlZy6lc(i< zv`kOTbhMh+IEH$WXMVh2Tc#sVOdC^vy1I0{9#w8sem;(h9(X%=J0x4c{lNXe{ov2n z4>-Qj>T!~omg5~U$33E&#bvlis(vgqh`EvX)%a)_CZ zd`z`3uha5ATJqX5zbKy9S7%v{t`~V)oe#^^Jk!(idJNC4?_)YzrYF|viJ2eM)ABxA z^1P3DP5ZN_%Siq6cJOvcw!ojo4|M-#zh*z@^(f2q`Z{@Sqta=f<;N1@FT_lz^JTgy zUSHSwFn?NJ=Y7OFf8y0$XFA%b^ems2>4|B{udaSf&+=%=(=tD8d7bz1Ix(-)lGo|T zGaavM9K-u&`Y6kEQTdS9_mgM2yw3cHX`|BX>rAJ4jae?!6EmH*ysjU1&H&!f_5p82r8ysmB3_0`?a>rwTKDu?Cqew~gu>Nci{Wt}Z>VukJpbj`bt2E%PJ)d$G=+_iM}iV`>~B&wR9HdgjOL#Jrz2>blNH zr{{g-qtgH0a+#i%<r79cnCXdmojh$6&--YZ zo|gA%%kl2k;r_=ZI`I(NG>1cVKmg$ITb$ViLnV$LR>rr@3)9Z9BFQ(#vsB)N|*Q4@dI(?nx z@H+E}!uoz<=A-ZDby{AhWjWgNdQ`ql&wPn_A1!%Wrl%!O8^!DVqHs*TH=Q5v)0XKp z&-{2@Tb-WIp)J$t{4~$|$ZNZ%T5; z>Gb_fN1m3vzK^`NtPicuhv_x1)3ZFL=XGs)U0a<$uk(K5)mf&G%8&Jox=vnOmiuQ} zclLT@r2ct3csnFp!2Q7e!2Q7cf$Mj2m1)|KV@+`6({pIcX!>vQYM_Dx8CSXb8d$n~hU z9=SibKe#`5oa#E@I^a6sI^a6sI`C)Ef!+yu59`XVtNAg@JrmEruI##+AG6%Pc>Z-| zd%ORoI-vVssu?o=*M8{Zu(Z3kgSSJn1>6tZ58MyDAGi*<4!91u4!91u4!91u4!91u z4!91u4!91u4!91u4!91u4!91u4!91u4*WMd;QRmLKfeDj?cn?Wt|$MEp7?m={^S1R zajENo>wxQk>wxQk>wxQk>wxQk>wxQk>wxQk>wxQk>wxQk>wxQk>wxQk>wxQk>wxQk z>wxQk>wxQk>wxQk>wxQk>wxQk>wxQk>wxQk>wxQk>wxQk>wxQk>wxQk>%d=K2NDVC z$7^bT?{}JgkpG(6-~0XJmB@chZQ>vHynl87^!iKx^mg!eNVb6cf%}2`!Mf`QtBZeD z7w4=lzFA!yv#!f`{c-(q{aJVV(y&b$Ak}cqV;C|qK;QhdLAYBJMPLuYCiksbE(*4ET!`ma-0`3Ry z2kr;n4_pUa2V4hgbim_j=?_uybnk>*r|knwdnPdLmB6&FH`0FI4&DyQ7H~gsKX5vMU%Tz6e}U3XmvTnAhSTnAhSTnAhSTnAhS zTnAhSTnAhSTnAhSTnAhSTnAhSTnAhSTnAhSTnAhSTnAhSTnAhSTnAhSTnAhSTnAhS zTnAhSTnAhS*1irT65?0-{INc7+!yIQy|mX~rF~pC<$ZZOcsnFp!2Q7e!2Q7cf$PA3 zrUQ%<*Ou**kb0~w%eZW9*(*`MwPn{~Vbt1OFQx z(D50uj?aj7d`7I}Gh%;_|C{&d?d+-POFpVx@=@iIXE{-p8RW5m!6J=RW zlw~ zydAt9k}cqV;C|qK;QhdLz;(cNz;(cNz;(cNz;(cNz;)oCqyt=kin3gPin3gPin3gP zin3AVl8-8vd{nvQqsk@Ea-uBDiLxvw%Cej&8&xj(sB+0il}kRVT=Fa@%Cej&%W|SD z%ZakCx30JUB)xTicYk+(_j!ivfa`$kfa`$kz+XcLqUJ~Bqvl8Cqvl8Cqvl8CSx%H? zIZ>A7L|K*-WuwX^A5|{-sB+0il}n!GL|K*-Wm!&?WjRqcs$B9><&uvomwZ&Y?NweL$lYTuW9)V?oymJ?-JPLyRiQI_RI z*{E{KN0m!Hs$B9><&tMPQI_RIS(X!JSx%IVDwljzx#XkDB_CBTc^@ZyobYkt&p%Fh zKlOg<{nY!Z>wxQk>wxQk>wxQk>wxQk>wxQk>wxQk>wxQk>wxQk>wxQk>wxQk>wxQk z>wxQk>wxQk>wxQk>wxQk>wxQk>wxQk>wxQk>wxQk>wxQk>wxQk>wxQk>wxQk>wxQk z>wxQk>wxQk>wxQk>wxP3bwK|ff&P7gzx#EzgIm`{*G1Pw*8$f7*8$f7*8$f7*8$f7 z*8$f7*8$f7*8$f7*8$f7*8$f7*8$f7*8$f7*8$f7*8$f7*8$f7*8$go|4Rq-`I+B~ z-LKrQ{x83Bzi_{Bzwo%jb-;DNb-;DNb-;DNb-;DNb-;DNb-;DNb-;DNb-;DNb-;DN zb-;DNb-;DNb-;DNb-;DNb-;DNb>PpX1HboNkWTOK^Uw7@{eAfR@Nw41S=RyA0oMW7 z0oMW70oMW70oMW70oMW70oMW70oMW70oMW70oMW70oMW70oMW70oMW70oMW70oMW7 z0oMW70oMW70oMW70oMW70oMW70oMW70oMW70oMW70oMW7fpw(=`unqtg!D)L{djF- zxE^IaKX1Qe3(#+({Dl0v@_R2={PdsR4pDlww)lT~`}*^HKTEcN`@uik57w5RM&;xF z_0RS%fB*jelP%zW;C|qK;QhdLz;(cNz;(cNz;(cNz;(cNz;(cNz;(cNz;(cNz;(cN zz;(cNz;(cNz;(cNz;(cNz;(cNz;(cNz;(cNz;(cNz;(cNz;(cNz;(cNz;(cNz;(cN zz;(cNz;(cNz;(cNz;(cNz;(cNz;(cNz;(cNz;(cNz;(cNz;(cNz;(cNz;(cNz;(cN zz;(cNz;(cNz;(cNz;(cNz;(cNz;(cNz;(cNz;(cNz;(cNz;(cNz;(cNz;(cNz;(cN zz;(cNz;(cNz;(cNz;(cNz;(cNz;(cNz;%E+5cPLZ`FGc~<=wxQk>wxQk>wxQk>wxQk>wxQk>wxQk>wxQk>wxQk z>wxQk>wxQk>wxQk>wxQk>wxQk>wxQk>wxQk>wxQk>wxQk>wxQk>wxQk>wxQk>wxQk z>wxQk>wxQk>wxQk>wxQk>wxQk>wxQk>wxQk>wxQk>wxQk>wxQk>wxRPpG^l2yJO&z z_J_@izjpSCch0%AGX7oL+^g>0zB0b$5g+VYm^Ck+M7NweFTQ!*ms%Wh?2LH%D?3lV zxO__dzG`;H@;UKs>WzEy%x&h!H%w(THJ-}ujw$hE>K;GLix2;0-^!=|H6QuRh;NoE zpfY|#qY1-)*>_(2okc6hT=>)6c(U~PmeWygWqjjQf_d?(>Z|K-K0jW(=^KyCdvb>18v*b4dER@kzg4e)31>E(qms*Js*@^(IY=Cv*FJ_j&QV zo8%l&yy@Kd;Kz5YuFBMS>M?s%;<@I=vr-kA7xLfp=#qoFy)`x7JoO-R;=5G0#w$B7 zh`;sa`5!;pY(~6Csv^_lty2v;JN{I%x=$_)?I-OzBVLhgp1tP8A9^tPZ(6)*vKntp zi+>hN-Z(A3Uv;Z}8k-+~;PkR<`hPt+KH%$Q1*f6iDnq-mJ*3^I$CGbu;~S>OKRP%0 zWOMPHmGRVTsy8FvIN9u#)8f^Yt$u>J@!rX1yJl)U)%-_Hk0+n*%k!t>z0D5!@wt+@ zH<~a#E_QzWx>Q{fMl@v~A6GcCSP z^t4N{Ie z75+3gzHn5p>Ni%?zT$V4@ulsP4m&&eQ>q#IPYL*@o=HcU73w8^P#I4(-_w=w<5}?) z)lb{&=&AAKTY7xQ>GAp1Ue)EMY4F?Gami;!@b7A8u6D8M@y@A#HT6$5TQ9!1>B#Tr z&>rH)mGM+D?^MRGsP^i+o6L^y(Ir{J)X)woID+qIZhTp?`+Yh!_z&wT{cs-qYYF<( zDzsx2{I4qhMfKD4?6V}EdR^~Tq5oIKhYm^pTY>aTep!WnzA~P2w~rUarTwe$URJ~}N;T99=*rT#_(K)m&nnbwWqjm%$(C7)e!ns< z{j&<~v>g7jB%YG9jaP+y#V=PN-^F;ZtKvyjdwxMiLf+%Aad|IG* zS&8?*6n>hKkn~mX|CM-uOVOSg38`;Y{JEr?^jv{{xhT||^%|XY+iok6|3dV)D(J*2 z^tAVtG8NcE4V}6uPns{f6znJpN5mUh6Im?ISw268ZiX{6qR_Mq*I) zXmQ8DmC(oEV3*=OXC{*B@$sX-#dk@z^@01##mOu}yLi>q+ z{uZB|l)qU^;$J1@f3sD1Kg+={kH3(VhmytcqpEnDRK-`sr5%@}eoIh~RpGkyuirxZ z(Era!bzu7a3h3z4P(Sg@jKsf_!&{e{_LXs=3i`AX{<<7GwgCP!8+tJtIW14dbim z%G`J=4ETHr^kNy}g1Pb3aQWKY(B7h33&Qvzal-U4K1)BJg7+{D?|*t2=jA=k#Cxn6 z-x+7f_%|iwFFG~@IypV$C%QN_o*M3NWBbhr;{wMg#y^sNW~c|pb&01cp?eEZuW4a? z7JuZpT{E6@T<5qY@!Smbs|6vhIww_uX{i5{cq&Z#b$S@rInKy2l+6~>b)==-WLuCg4d?<(kbRnQ&jR~gXf z6`@@@u1g%c68)|!=n2O&>0ecd53BG#R-!#tA%3h1_j8;)KJ{+}`h8WL?Ihz8+F?aW zBK11zlqO{Uu>j-u0_0CU;5=Yq&`aqT#R-Yut1uogj+lu!d@A^#@qT9pok+$1FP9`l zpGp#<*BJ?!7i1(P&Rm6gKt@91y?P0WzZxf`Kdpj}uEMytGK^yqH&kK%u}a>@Z5t<2 zI=#GOA~ioaX2XQc3pP(k`a+2#hL$A6KME4kA4?Jvzs<#XNqw6W^jF5gIUzn2eVG&D zanY}O3F!y<3F)6X$Ug^hPe~$`&oeC(sp0&gl0<6$aZbyGjOz;!*K<5s5T2Luzs#?w z50!`)XF^xzqQ0CT%!c00LtI#pkny=7!MIW`R3s#xt-^e(A|d`eJ@g-$FHM84uS7YV zU(5*e5a|y;#h*w{`^LjRnz&Tv$ql64?`e>bd0vA=>b=}mo{({;5#r>=3F((D5~8;)6B7TI zB&7Z=rM&k`;HNDy?yp3=l_~WaLfy)Q-&O^kl=@U5&dW&1ysi+{E$OQdxF ziSh*FbeX?YBp5e|UsWVzJs=bBFFV1wMcOS3{#_>_{kBdbHI2#1OUV4RUP9It>Lp}+ zErh=pCuICALi%F#!@`8DOBADh3h{o5&~AnB>muY|1b?cZkojCuLgL@zgv?8e5Z@O< zCyNpiZ`4mnJW+&pVLpX;&LWI&MQD$r;3wk8MG1)~>%%XKrCpyal6GCM7`j)Skoi(! zLguN3AzwbftWOl8UWM?#qJ+c`g$Wri*-k~F-lAJY$hSD;EB;@Uka43J@4ql1^OK^4 ztPd5#zluWrM6U|*T)e+1A@eV`YZ2x{#n87Bq~_cEx0m-k zsHN!3cCA7_za^Kc?yit_y1E74Z$%=dr~6ch4jx=(i*$RHp3hmhv?a@N??!y+)p9&eL4z0j@UQhh&?TUoNF)bzC1}&w%#;+&w zT=$mZm)~=|sE~2vr?%3LFSkrcT-Gun>w^^-hhp&G9BH=&nNt3wI_NK%cy8)gE_5&( z{W&`!aY{D)Cp#hWVx5GnbLFDEI?#bE_-7Wb=SVv@tAl=?laTeYtc1kBnHV2)60)9< zBmVLWbuI_GkpaKWl5uqX%!I7(Wnp~C4E2_{qdvy5f}m>>2NXje$`F^7;r>#ze;Mjk ziuNm$IP|DeyvH(W&+AJQ;$NkB-ZH$;GWcHuq$`pB@pOIEzkcH3_T z8+jjRZzlclxBaA_J#~TT#XXxOQeo@OSIKkkv~kK0MsIev_{YSqQvcrF?QB>t?g3ndE>+P%KKct zv()$Ko-%&6>MZZ^goL!)20IELk4t?z-6YRF?HXyvNf)Qa)%&L2n~>$yO{8BQyu0+P zulAFE^Wx3oFZ*sKdewP<8DEEYOr+Me2b?MSOnge>k!^d6f9%y$*0*ksOMNG8EB-a$ zYVp&dXUq8h;y&`;H`+qxjdLHAcHRD(L@Lf3(nIvWQI~mu` zxkhw9vzz$E7hS|J9@;kL=Xag>xb)BZSIK=nx080R(@CCpK_{u_52wm_kFY^=j)BcU*5k(+Hcy`QqO_Cq`i0BJ~i$RzUXcl*9tmGx_kDN_x9Bt(r%yJBynlm z-qOEze+>S7sQCB0w@Q2ddWZD)Gj5i6;`-h)9+w{|{pFIIQuRDx=wqpP>zKkPB;C;` zCQ@-|#yt{uEWSzJ<3?Q5AaCgz$zBfxe`P23(zj*uD z`^9heJx=Ozes9TV#G}$*?l=Z<$o{GOnjU+n#Jz)ek^b?-e&Q$p>LugJ%eRZ|aEI`9 z?v(hU;{H^>tJtJ($}chp+$H@y?}${n_w|25^mwP;MK_imoa*Ph-P%|DV3#Z8{`Fpv z_qXD3$@j2*WE@!8N8;%%_K|iv?f#T5J>0*q=);xANPqosdujhipOW$atXsw3a`%w< z{^Y&Ij~~BDbnTbhWc=RgPN{F{z4D&-+)461|7Pi5*GqkG?)s#ZyXXDVpQ`mn;@=%b z2YT%)f2ZR_kr7_ ze}8$q_|4NN%Dke^t>S-u?vZ}A6qop5v8Yq^DK-@{I>w> z_zSQe^c&hE1M%TiF4 z>o3Iq*y50m`)jh!J0I(3Wtjig3F}KTzg~%bh9y|1UyOCo#n?wGKzy2w_;O{keyMhf zveIrDqC2~1hPa#Ys`v%>y)u!mNc!&)*@&03MQ{PE%nXDysn;%S8wHr&fK3V^ZjmZ5N9?LJ(}1E^R|YF zue0E1Il|A%#QST6{kbyCqv|KB3;+MNUn(A*(M-m#=Qc!~)>h)bDV?$Y&|3QQkEMuj zI!QYYZj1RsJ<+v)HxfO*wkhhr3D$dB%ljPJF(LaZtz@3LOR4DJ$(=Cnw~=_@hC)1V z1I%~ZNxRS9M9MF0B|5b2CNh3BilH9G(yniH#{93X^v@GYE{^z#_% zwGq}0S|{5rBjd^jN&Ek8ztnnBzY=M$tfn%LD{ce--3&U_2J`0D^8N;P7F{^5y|iP& zdJ^Zoy9xZQO#JWN_UK1#W&PszjwP=w~+XLR8v{c z>AE?dllOO&^?|e6N}QcoPjvtJZfKtli1Rj*@{ibF>esKi^wY1}V*c1p#(}zB5!bJe zd^cEa`^o;tNUUpr3w`+>`(`8K;-8bSpE?;jKLY!pldwC+lFtpu59mygF<+_R&Y+e8GsgoLd=;_8A=Rmvz9QXqRE3KC&J) zG}Nn4YGN`B`;cGb974@=$v*!OwEIx3j}OCo@KEfd48!;_BD9;VM-IVy_7JS&4hikU za%G=r2-dBKVV`|Ocpl~>=a7bk`pNqIP@Ll!7V?*M#o^dr8WEmL_6-MP+#C|}XS*{$ zmc#nVK8k%V*>@Nk{6f~rhlF-zzOt@AEW9t_zd^l5VLy5#{A@JNZ;e4aj)GsZUyQ+i z^BDNgDC~cZg#Uem{jM?a`_VWTHVXTzV{xvnCSO?>8Hs*0I@C+{jYft3#Cps4JsSH^ zHRa3rIv(flYVwhNg;9tzCSjj(6waNDL_cKzuDMT_FMj(?crMw`8xwdudp9mcsk9thS z{{2|&2aZF0IvV;sGU%uDr?EjVWIc04&|ld{860#-_dn_%^+5J|_o(x_75%Z0oajxSB_{n7KJAQ}pWfF8`BF+u{ zfP5!lzjzYnZ4>a`$KzbUIQZv8oOhc9y_*1i9*_O#aVVeuJqhn&0_r^;=g!8Xy~ktT zFdjNIF{GFMsR@W1CIsCP|Nb$aT8C}j1@pl}B+ks+O4fhg>lxOYQtRTo?A%{_KS9D16>t<~{e>iy2yN#+T^ZX$7XpPePn9I%Ben=}TWY`hTGZq5ePllJ#=#Qz z-M@{j+jQ+5(x>LLUu-1#7j1=k&1SM*Q?U!;-p(>VZqp+n$4Gl4Zr@MVyZdd9d38e8 zO-6OZ{^0Hsr#`lo%)hSQT-twfce$^ky_9=e2i&(Q){Qnzq;&e>?$WMbZYJ?--c>Tc z@7-S3Tkh^4?`_()@*eI&eEfPR@ssg2&zYK+x9KD0AGH_eKOLm~X6_^H{^%j{9-d7| zJp6ihc@IOklJa_8CF|sidxZ8&>CA|Z^1e^+B=gd(I!pXLw=LSKJLX4Sr5%syEbBl| zc9Zp*rmeA#-A&3Xs)@r>@y%B~WnNy`P1ZY)X)pD-rz6(+I-#Gm6+b;HArFHQs9}t z#6Qnr{Cd@t&vIlOcr}zS=g?jZ^^^6Am&5bPxryhYd(R=xeJNO8mvidR$9X^VrDgqO zJbWdT&*$NF-p_X6^XdG_^FE1_p9}9x*NfNr{4)N&h;xU}2g~cUEJw}_JR6>mytcfL z^`&KfGx^ZeIC`MjV0 z!0WQFUeo_{{@Slu9@~%Qv3!;z=Y8G`dcu6@7ZNADh5kPr?_o@spGmt6!@0iEA>NmB zg2NCeO~Jf-EY4TX#Jpk|&Zo~p-1aNt?RhxIv=Hq&7xS@Mm~a0Kf14N1|H-+dS%|xS z3UQ;XtMNOHpTqY;a&BNIo@)l;=jk~2JRSNu1M^3oZ<~&KPY>s7<($-X#CbDO{~6)C z9Dbj}baMWQ=h|jsonU6Dmz+=eDU>Vce}4|=rDcBi8`dAPuy0=`$bQ5b?XbSMID8)<{+cV}_%-ct4q+kA#s3EX%T6EHPhN<2 zUk*RZ!FjE=Sl3>NIDI+Vw~m~nIlQgJd+m9ie+9;uy0Tt)a65^c$`+yC%Mr)dk$t2W znA9`3FUfrpo)gZKc<9B}IIlU;>|d;cALq$>>jiBk9_?I-dC*U2 z|5Z5moGEclJ$`36D|}BR`kIOTs7lOVW@EpK=QuNQuBsCA=UJE^t;D=BOX9f7$ynd` z8U9-Zf67FBKLzVLJP*wC*qK-_=J$)UFrQut{mH_E;rlt!oebogCHwTRO~HEMY|NWi;XP%C=a=e z>|^A>|EHk;&4d4EfUhI#@<2*e%?bf%rPLt~6y|iPO*CLFN^E z?ke-NJ9^7J>yC5eyvMKm;XFZ2`OGJEF1Gu2GOv4NU(t;xddhy<&xhjt;ui8;U3yCX zGxn2t&9JRy9x$hCs9$Oy<+42_pJsc@b5x)6V0o!|^id~^f0Q3CI$7)aWPP%m==^&} zg!@vF!R$kmF~O~L%P&@$!$D`u!}s`Aw6W?_GC|)Z?4=zbb0SH!+oiF&9?i= zeE;!2fltLJpLM}`sd(U1`v9LD7H}#adFdp1f4A%#_|!S}tOI1;_R{w9{L8nM_Izg_ zna@7IwdnlhYXtAQgXr13lVm=3{B|;59eN4Y8Mc=7g_Dku`P?b{%JqYLi#_ic=~ta= z+AlSKJb0^6uhjX_y?aZ)>(f)t8Cf=!0`RW6l=;>#_y*UB^QYE74CU zVx5h;{|$6|9L9lfpu^*#yWb;@`ynp#)hUSIra))%5GUke|344sj_YFl9fEzTq3HiZ z!oG^caig&gI3}zQOF3iX@;%DW_+E(Lz0X1%FdOmpSE$d|Vc$!>Q~C+p!Sx@K}^3l&F{{4P<4>FJbFkn6>-z%>(f88#;&gUVf&dBe2 zd=mD5x!)@Ftl5W_{p6YO+n<9D%J&>!hW#?B|H!c4$9;5JKj@EnV~rov|D@j!3jIgU z)h~daEyDgwzo3hae2pc5eK)zIXRrmpU?^Aw8Z@}!5b#z z{LLoq!uTuWP({EQ7{^m_TJ^cT)OROmZ7T7}?alB#L%GByCv^|^rPlXO>n8Qj#{94y z&i~fyDf~5?g!C+jR=!W!P~z`1cE-8&wz98s#>O%qf1)$i_gi5dzM1S_bZLwIn3gjC z9Mm(EliEN1Fec{@r*A6x*KaTHyPn#WQZq3?>@r1*Z}+<%*e10 zN2_p@UpPK-oR#m*MqoZa7IDyr zm`77zYwqWB%X-iNtj~@L`@?b`WFXd82ZnPX@}1^HtP{N##&=mC9getsY#3Lm;}Wlp z2kkO$P#6bteCGHiasEimXU2u{`Tpd*Gtci#!2IJQ#Lok<@6U5C6XD1G z!gcv>d`y@x$U5%(p`Ow%BM_I>ynjBQ)ORG}O78oA7@m*kUgW#a6naz5t^_}2ie`+gbX9ljs=KD~cPC-Ko}%#X$dU6FOSanPGg`JUqQOpIe0SclBO z{I@^aE^C9*RIk(d<#BFj;rytts z)1c4P0mirVYndl}iT!}DF>n47>jqzAoqSN3XVYI~e)(BwU-}2mT~1 zeCPXKz#OkwA6m`}_#QZaV0}1WwGk3`|I}L^RhhFgYQrCj4x!L{axtJ+Zeyz30T$<-p0E3 zyI6mE7dlcikLEtIjOVXIM_v!>HS)VfZ=k={SnkiV9PR^)f4_$Dp(Z~W$KHTmzY*4x zxQ{FS@9l6df%$R2m-&h=zZvSoe7PSjUy)DeBZpEKMC=ctc$%Hu5;XC|6%`Pe$-9&BVMOoNL(;5^lSD@_Alxh z`ziZ7`!nN5=1*PVb;cL$f4olJZ_Og?`QUllNwIo%yqUSuWd`&&T}PZoDr3@o`v}BhTmO`{De6*I5qB=lII@VLIl| z_>s@e`^8T`!S6tR8pbL57oT7H-N3LO&Gu*c@;eZp2LI%9vVFM@MV{sJy)r&yxqN== zHlLs6^7%L}(SMmg)A9Lr`?GxMfBi#zL|&IKx>(ac*p4ip@ec22J=kv~p8q1WFYjl+ z<@59Xv3!nCg83cKd*MB>eWlzl!@4Ky!FJ<0ZI(+b=Qsw!??)icn-cadWL&Joe$On- zC#N7zpNw_tsbRlgeh*^`;^fKrE@vv%)#qXSti<>_1-}C}4gGCl$XE2eCVzQ;{+{gQ zkWTvLRQT^~#IxMLpN#n*&-?KE%qihH#lL4@A7&Bex0A#3%lKG{{i-=3|D93`CX>T? ze>n#=KkW0!`sn1aFC+fC5dC~gcz*Hs8JIsWmUG&VbKWu;?J^bo9K-=N&oA?VX;_a~ zfc>{AA)Uk(Gq9e(-(jmMU;6J%?C&kc`s-xu$4$mQ$2827=ZACh;-`~Q&NS@T)jU7< z1;xK-VxMU~)*Wj6L%tK?@8m8-J->jz)vP1P?~!HW_hi08dyNR|M542G@Ez#j@I96I z`2?&p=VBjnNci4C)(a-!yMVelzy5VtN8&n>jPrRo&pizOUQ>Rn)I>2K=a@#I-A0Ce zD)}9zdN_wQ0_`>u{y7oPktgek+Yb)qi$0IT{4-b1o4+><<21{efcI5XzWBvR`0E7N zd|Ch9oc(+h{D$8(fT?{SAg|M)%Ncbd`QO0qZU`J|lirfOCm? z*mtRieTIgz-*IC@oSVzVIjuaLv+RKWir?Ft(oD`v?Oi4PXTN6h{lYdG@_P=4c9-*` zLwAz>gt3jFm-*O#;O}KMz&=YI`QCfa#^U!2@jH`?_&t9PewU5EhggXBUnb=(Z!P;d z#Rd2ts2TLH6#GJTu}@J)_Cao-|K*0?FOc}CNS^vY*kb zv9#x14dfitBU!Q!)1t1Nqv=?NcFD#5MmEl6X5qX42KbJ=7{7PW9P56+%6i!7M&b7q zHN1L}l1_D!{&5A6Oh`3I|bsMg?Fg@2*qN-&OauH`GDxlDEM=K)Q9upn(ubSANhU!7_9H*WBe?D4z$4U zZ&pbhH?|z{UlxA1hPqG}<79K_VtxG1PC3q3lw+L9!TJ0`Id8Ct=OL>k-k1}U@o!KA z#8LI-{7JVWoFC!uw6?(au8n2iX(#?3WC3(L7vGOI#`xC~;~~ETDaLtn{*G%7>do;r z2XRA*oZoo7Tz;Qsd@0T^G{E;3IVi6o#=k<$&&s6TD%OMF6~ZsGQJ(_nF3*M5!+4*K z?^`eqzg8FVP8NQTGFP6Xdx^|5Dsir*K|{1pKAyV{zC$R-@1y1tlT4@0{o1+a`L@4cetca~+ou?q8v z`r=P-mr8u`S#ud@UTuqd)I;1+f^n-3#=}zlzEXX(Yca;JUlFHbp7(kK_+fh^5>CfmE7_V~>pVXE2(y1}tUmg5TS~23#QhW!^ zcxN-jC1p6D)DrO!vbf7ja-I{CpYoGY9R;zc<3)lP#8fkK^wo=SjT# zP$T%sD#Z0%2PlT$VcuJ(Dc)}m(ih|X7oc8Qs8=p*CgSiSeBZ>lvq6ZvQsZaWhUouA z_o&dm#cDC=^Wi0Alwx5e`QE~S2zBhGD!ep7;Zc>#X6 zD-ZRn4;`x);!hbDi=}=iw3PSqZ6i6ac0o(bgGw=v_*Ldpw>HN6FT?M_<>2?6N)X4D zq5qWOzB<+KJ=lnwA+5IF@6-oKQl!iIyA;Sq9ORL!#E{z zTfVg8F!rlb)T2K9pd9J*FrL-??zF_gC737Td%IK0;K#WbU)sVy%Hi+XL9b<= z#>refZ!zY1Y@ddxPZspGJM=zJ-t#?e!RO1k*suZoAs_STc9^g6?`mXVepo2|q|q*j zWBB{+h3K!0Gjie2`LH?Y@A;Vj6<~aiVZO|HcP7>yFwY*^0Q1>A_?ake?LDD z?T~}}>tbA}58dbQR@cQjn__uS%bFlv9r#Nz#;-E;pIppC`Fr+t5D(Ocp4W#C)D7cB zDqhcnUR+jzIF7%UoeRBUoWtMk-x_hklAuSjA6OT<3_ri60nWje;ycU&=neml%}R-r zmo|ak=R@~OrQKib3cV=6@3uFP_^dVOsf<_nyWXYn=lYlj6hUX%?^&;Wj8i38ALBeZ z8})1o9mo-Xd%K6!>lgn0hiuH}o1%TP;h)9O#HU@$MGx-b@9Agb{WrxpP=N1=8$*XUUggVkZW@Cga6K&(=^G$!&5?DS z`@SbGc;`(P;Z^j+q|MPZ#u_5uxytX(pfw-s`^MqW?gX)F(lbl=m1?xq1v94T(a(5toR(=q(J8>KRSH*Jb}9e>}pDf|e2etA8_ z7x>+)r^M1EaS%Y4WTm)@je+(HJ0_0EgE3nl@Gmbjku>s_93U1OMQ3U3hTZN z;WwG`T+>SsH)Nq-Ho?5T3C1VRzZ*hd>S5ki7yeNn?TPjId&*=T;>{+K?klX{RR8X; z?Bnozl?=ood03Zd67*K~i}Nv0$(MFKFebW?S&n{F1i#3He-%UT%Q6422>nmi-5Owh zp$hwSdFZG0a9;-EW2~dRSt0%QRQ`^1CiWA!55d2SRzJj{a(*QT@jv3sF;)0JvjqLE z1aWl%`W63v7U%mJh)*)Zx`UjvtBd|r2mV6kn7VsDT{aLQ_)kV2=Fb?4F!Yprt z^~XktAM2xEoYWZej{?k7>f(9ohII?sM{5+u1KBTb zi1$;D@f3g0>V~$6r%G@?_5rVMgmh(zbB{i;-iZafXI!^`=FvCobJpJNchB5Cv)8lr zuRn2*oA&E^Sw=>zS{{@CVyXYT^>z?P@OJoTw8LYC@|+A1V{QSRdN59|OG1&s{2W!U<{%N}8&ntf8?eN!Xhjp!Y z>)Kz|_4EJL%U{>;@2_6|U;X{B>-zuI-~YOA7$6|IPT@k3-)&Kc1X5>`@w(a2Qj(L<0Nl~R13I2h$DDA z{5{&i=il-ky&b$Ak}dFO^8=6D#4o)a)=fM7+4_UmNBqFs!P_C(0{?VBh>08gY3An% z)W@wfgl>nJ(EsuK@;E>m(A#0%w!ga54`B&4`Vee&zlK;91C4#^g9Kls1>ASU#GyiVZvO9Og4{CDk;klVeVdOM_A z!2LlS!P~*xA=v`%2Y+)v@b9xyFZd5%@Ar20cCKy#_cQ1z`oo`X{Vpce^LeSaL#hSb zAH)&79lRZqEwHxyAd$?h`d=dT|KD%_Y~zO4M_%NbwnGf>-K{i)w}ZDsvIX1^+z;Fj zydSs@xDL1uxDL1uxDL1uxDL1uxDL1uxDL1uxDNakbwK@L^?j0mSiLCqht-R3I{d7C z+wY#?|9J~|3wR4ex4=PltCg=)hmNU#XYH2`_fLlhq{9Q#;X&!};BF~^Scvd<*I~|^r4$n=8=cU8*)8Pf_@WOO>Q98Uh9bS?SFHMJ+rNhh9;T7rd%5->D zI=ng^UXu>5O^4T|!|T)G4e9X4ba+!byg41-k`8Z8hqtA}+tcA4>9B7)yfYo%l@9Md zC^y-EH}2SB%ahK!r#85^Hn^`gxW6`dpf-50Hh8Eu_*ZT4aBc8NZSe2f;L+OPvD)DA z+Te-W;K|zHsoLP_+TfYm;Mv;Xx!U0Q+TexS;KkbDrP|=-+TfMi;MLmTwc6nI+Te}a z;LY0Lt=i!2+Tfkq;N9Bbz1raY+TerQ;KSPBquQWfZSZkz@JVgZzcv_98w{)s2Gs_i z)&`%|2A|gkU(^O))&^hI24B|(gKL8!wZYKZU|4N1yfzq78;qG0=tI4d2_PKR^S;oNjMFCETLhYQl-!gRPO9WG9XOVZ)ebofg;T$T=h zO^3gw!{zC4MLJxW4y)4Psx)|ZMrKAvI+JObEW>0QCdV*!43le^Jj2vAOuk|28K%H6 zg@!3IOtE3=8>YlCrG_apOasG|8>XRQ8X2asVVW4GsbQKKrnzBS7^cE7Ee+GkFfqfl zHq3g4X=9kShG}P*^$oLuVcHvJL&I!jn2imyiD5Q1Ob5ekW|)qK+1xOl4Aa>#TNq|b z!)#@kE{5r9m~MvYZkQg1+1fDM7-n0;#0`@$Oi#mXXPE5`vx8xFG|Wzh+1W6=B$=9M z`|Mp6-c8}%72ZSPUJCE2@Lme{R(NlP_fdFXh4)iHn!=|me1^hjDtwm0 zXDfV;!sjY{p2FuVe1XCjDtwW`7b|>;!j~$1nZlPVe1*bSDtwi~S1Wvt!q+N%ox;~E ze1pO_Dtwc|H!FOL!nZ1Xo5Hs%e22n)6~0s9yA-}#;d>OmSK<2Ur_i(gOFm;kltsb4LaGt_-70y?gz$g~BZrZl!Qc;noVTr*IpE+bY~n;q?{X zK;iZZZ>aD_3U93NCJJww#A%A!!7!T{rlVmtH%uqPbT-TuhS}0ETN$Q{VY(Won_;>e zriWp+Hq17L+14;|!z2vT(=gi^W_!czV3-{Zvy)+VHq0)D+0`(+8D@9G>|vN*hS}3F zdl{y;VfHr6K8D%XF#8#1|0GjOPY+P|K!p!d_+W((QTR}W4^#MXg^y79NQL_-e3ZgR zD}0Q?$0~fB!pAFog2E>%e3HT^D}0K=rz(7!!lx^IhQenme3rszD}0W^=PG=j!sjb| zfx;Ime38NzD}0Hq{LSVmZZXWQhPll!w;SdT!}K-Gorbx~Fn1f~9>d&gnEMQKzhNFQ%!7t` z$T0sh%)^Fx#4!Ih%%g^R%rK7|<_W_*X_%)B^R!`}G0d}udCoA;8|DSWyl9x04D+&K zUNOw8hI!2}uN&qK!@Oyjw+!>PVcs#!yM}qsFz*}Y1H*i1n2!w8&oCbw<`cv8H_QOT z3^dFj!+dI(&kXaqVZJcTmxlSuFkc&HuwjN6W~gC?8D_X)Mi^$KVMZBdv|+{=<{QI| zHOx4}j5o{#!%Q^HB*T1bnC}epyXBt;WC9AC|s^^LxmeD+*siz3O7}_nZnH#ZlQ36!YvhUrEpB))(Wqua2ti& zD%?)t^%dSg;r0q|sPIM#Z>;bp3U8`#2Zc9NxTC_GE8I!p&I)g#@Rka1rEnL8yDHpG z;qD6eP$2P%A!!UrpSh{A^|e3-(AD}02)M=IP$;iD8jTH#|9 zK33u56h6KhudQ+U@0w3~Ja$5|1=H4cPBhF(hB?_Vrx@l`!<=TA(+zWmVa_znS%x{g znps=#t9JZ#PO_w0@z}WvpQrHo3SXe`g$iG!@Wl#WqVS~(U#9To3SXh{l?q>_@YM=m zqwuu~U#IZ(3g4jcjSAnS@XZR}qVTN>-=^^G3g4k{Uxn{f_%4O-R`?!;?^XCdh3{AR z0fiq__#uV=rSQWFKceuz6@FCV#}s~C;U^S+QsJi*ep=yY6n<9W=M;Wk;TIHsQQ?;q zep%sH6n<6V*A#wT;Wre1Q{lH1ep}&p6ng{LVzUEvuD&s6v)g@0Cfmcp|Y zo}=(wh36?eU*QD`FI0Gu!iyDNqVQ6Me^Gdu!oMo~o5IT#UZLP~jqlixsY~aEZdD3YRI|K;d$Q8!FsL;l>I# zQMjqX%@l5~a0`Vi6mF?-D}`eUw^n#Ph1)3HR^fIEudnb13b$8yLxnd|cw>b(QFv2@ zJ1D%F!W|XfT;Wa%cUE`{g|}3AD}}o#+*RRj3U^nyhr(Mcyp6)!DjZihp>R)yw^Mj~ zg?CVRM}>D%cxQ!oQFvE{cT;$Gh4)anm%@80yqChg72aFneH7kT;r$feU*Q83K2YI< z6h2ttLliz#;lmU@T;U@WK2qU63LmBL(Fz}<@UaRXr||I#pP=xG3ZJC#$qJvM@Tm%) zrts+spP}%X3ZJF$*$SVd@VN?~r||g-U!d@X3SXq~#R^}d@TCf0rtsy}`2Si5Iqom> z8(HaVQCB3NFKu<|O2b@bn5zwQjbW}e%yovjzMA>JTJ!J2rMK-3rUh;^%uR;5*)X>l z=2pYpW|-RzbBAI28s<*J++~=%4ReoS?lsJPhPmG`4;bb_!#re|e;MXs!#rY`e;eje z!#rk~#|`s@VV*S1Q-*okFwYp~S;IVMnCA`if?-}X%u9xO*)Xpd=2gSIW|-Fv^M+yG zG|XFudD}4W80KBWyl0sA4fBCvJ~YfnhUsURj}7yQVfq_pfMEt2W{_b%HOyy*`P?vH z80JgEd}Wxg4Kvs+W~yPP8D_d+W*BCsVSX~q&xV<0nAwJzW0<*ynP-^! zhFM^kg@##Vn8k)!Vwk0d`Nc5H4D+jDelyJSYUb|~1NoD93wR5xNeiUU7FHNtSZSCl z!>lq)MjbuJ%QQ@uVX_UAW0*RI$u&%#Vd@$t-!SzIQ(%}v!xR~&*f8}CQ(~A>!;~4O zfnmxG)6g)D4Aa;!O$^i2FwG3p+%PQ+Q(>5vhG}J(m|1>!S46~(SwlYi?!*n%FH^X!{Ob^3s zZJ2Egv#nv`hDjKvr(w1;%=U)a!7w`-W+%h!Y?xgPv#VitGtBPQ%$jKUpEb>&XN59- zl-t9!bT7l~X_&nX)7vn68)hHF>}#0)470ys4lvAthB?SE2OH)P!yIau!whq{VU94& zk%sAGn4=7Hv|)}h%&~?!&M?Ot<^;o>Xqb}>bFyJhG0drkIn6Mq8|DndoN1V|40E<& z&N0lnhB?nL=Nsk%!(3>XiwtwIVJ-Q8|Dtf^fk<#hPlfycN^v&!`y3_`wVlxVIDBdgNAv? zF#j^l!-jdpF#k5pqlS6RFpnGN3Bx>Tn5PW$v|*kx%(I4h&M?m#<^{vNXqcA_^Ri)H zG0dxmdCf4d8|DqeylI%X4D+^O-Z9L(hI!90?;GX=!+dC%j||h#FdrM{6T|ecX8zZ) zReGc{z|?7=VFnrIQ^R~_n9mLKg<-y|X4dWVe`V_QwP6MuW{6>i8fKVbh8t#tVMZEe zlwn32W{hFJG0a%Qj5Ex5!%Q&DM8ixn%(sU5&M@B_<_E+4Xqd@{nPQkq!%Q{IG{a0c z%nZZKG|W$i`PneD3^Us>a||=rF!KyE-!Kadv(PY$471oUOANErFuxdPnPGl4%x{KS zZkQE@S!tLm!>lq)M)g-X>3WxGm@LC&8z#pvbqtehm^{POHB7!?>KUfMFolLGGEA{y z>KmrSFr|hmGfV@+lpCg@VHz2xv0<7Rrm10?8K${mS{SCnFf9$!$}lm*v^LCohG}D% zwuWhEnDq^_fnnMkW<$elWSET&vx#9gHB1M?Y-X5_hS}UOoeb02Fk2XAOT%nsm@bCt zYM5?@>28=FhS}OM+ZbkB!^91fFicOwY-gD54YPw`b~MaRhS}LLyBKCy!|Z05-3_yc zVR{*6Ps8kGnBIoj+c5hWW?#eXXPEsBbAVwEG)(fV&Ghl&V8a|@m_rS7m|+e#%n^n; z(lC7tbCh9@Hq0@GIo2@88RmGyoM4y}4RexVPBzRbhB?(Rry1sS!<=E5GYxZ=Va_(p zIfgmcFy|TOe8XH|m3^&XO!;Cb{D8r04%oxLbW0GtdX_%i3^Rr=Q z8D_R&<``zKVdfcTzF`&^W}#sg8D_CzmKbKKVSX{pGQ<38nBNSu+%PK)v(hkChFN8p zj65CBXBsBUFxiI5F-#r9Y?%6nDKSi`Vag2Cz%b>8 zX=s>6hG}e=CWdKhm}Z7)ZkQH^sW41S!?ZF?%rLDDvz}qv7^baZ+8Jhj!)#!f_J-Ne zFdG?WW5aA>m`x4S!7!T{rlVmtH%uqPbT-TuhS}0ETN$Q{VY(Won_;>eriWp+Hq17L z+14;|!z2vT(=gi^W_!czV3-{Zvy)+VHq0)D+0`(+8D@9G>|vN*hS}3Fdl{y;VfHr6 zK8D%XF#8#1f5RMLm;()SkYNrs%prz3)G&t`=5WItVVENg)5kDJ8Rlri9AlVc4Rf4f zjyKE+hB?tNCmH5s!<=H6Qw?*PVNN&98HPF2FlQO&Y{Q&mm~#zto?*^6%ms$I&@dMn z=3>KKVwg(}bD3c-H_R1=xzaFK8RlxkTw|DP4Rf7gt~bmLhPlx&HyP$;!`xz+TMcuY zVQx3f9fs*^m^%$~mtpQU%sqy=*D&`P=6=IGV3-FD^N?ZwWtfKz^N3;oZJ0+5^O#{C zH_Q`;dD1XX8RlujJY$$=4fC8~o;S=3hI!F2FB#@#!@OdcR}J%;VO}@P8-{t)FmD;= zZNt1{n0F2Ho?+fM%m;?~&@dkvrk`OxHq0l6>2H_;h8bv>L5BI%FrOLbbHjXLm@f_U zm0`X%%wWR|G0afI3^UAd!;CP@NW+XW%xJ@mG0ZoH8Ecqvh8b^|35JCsfL+mnCXU@VVIeQ`N=Rp8)lYaW*cUXVdffUo?+%2W`SWA z8fKAU78_=XVU`-^7sD(w%&&&|%`nRiv%)Yd4O3;9Rffr^{uP+?{oG8$WEm#gFgb>) zW0+jSm zm>mqWqhWS3%+7|{#W1@XW;es^ZkRm`)5|b>8fGuU^ft`ihS|q3`x<6H!|ZRE0}OMZ zVUlA-`gn1$VGc3Op@uokFozrF2*Vs{m_CL%$}mS8<`}~qYnbB@_;W`TEDx9ZqU4`=%uBUK;!i5SKDO{{@eT7RDE>*Zp z;RXtqE8I}wMhZ7pxQW6|6>g?*bA?+dT%mAFgnYqu;kF95Q+R!aH&D2} z!W$~Qk-{4*yoth_D%?Tg%@ppa@a774Qn<6iTPVDx!dofaMd7XrcT>2#!aWq;TH$RJ z-d5qb!U=_YD!iS-+bg_-!aFLwlfpYIyokT+%Q)d=E@{fE8e(D;j0zCM&WA}zE0u)$KG8>Z&mJ#-eyU6 zcXwEn3U+rVc6VcUlZpZc-QCimbcaZXbc2F)81SO+UT1mT-!aZUXPiCGS$hos#i9b#J*&pWq(K3`)p|lR=^H9DB<;zgM z3Z+dbZI74qb%u6ft@fdG2&H2vokHmxN|#W&hSDvR?xFMurDrI;Lg^h!pHTXS(l3<$ zp$rIRU?_t^863)xP`(ajXeh%%86L`rP)3F_DwNTGl)pTp$0Qz`cwFLd5`UX`eBud- zCnlbhcyi(?iKix>mUw#N8Hr~mo|SlZ;yH=uCZ3o0yTtPof1h|k;)RJ9C0?9(N#do6 zmnB}FctzrsiGN7ED)H*XYZ9+byzUS4jJiIQ4WVodWm71dL)j9_)=;*EvOSa?q5K%i zPoeA#WmhOahq61AJ)!IkWnU=!Lpc!2!B7r`ayXPDq5Klcuc7=F%I~2Z4dqxU8FHle zpMQjMLMRzS$rMWFP_l%QHI!_Bl)pTmW>1_Wan8iK66a2wCvo1y`4Z<(Tp)45#Dx+U zPFy5$(Zt0P7f)OwammD`5|>U~CUM!sro;H-vIyC^v<2^YQY(J_7&qHUGM2yXB8d`pcf~*2K3ZZjku)#CIgV zGx1%C?@oMA;(HU{m-zm~4O~Bz`jSQ;DBW{7m9!6F-;u z`NRzqzmWLF#4ja&Iq@rrUrqd4;@1C2o|s zapES4nHG#De>gQQxZ>2JT39`#4{4lOg!tj|L1$# zzn6MEHv5keWNtGjl)0hI3+1~|=7;iqC<{Va7|Nnh7KgIrc=`L5{r4DwzaGCX{o|JU z%kk{8#LE+}NW3!f4~bVLUY&SN;w5*JKdC~@J$MG_ZHTr6?% z#3d4!Ok65)>BMCcmrYzQarwj*5?4%IDRJe*RT5WCTrF|+#5EG1nE0f`Cnr87@u`Vx zCa#sZcH%mT>n1)e@#%@rNL(**{lsS`J}dFriO)%VZsPM2pP%@GwE4fc zSNB-%!auGbM|T&6a&ah^gmP&pmxXe9C|4XW|9e~i-)(Kj_R25UCqsEEl&3>^CX{DGc`lUaLunYw3!%Ii%1fcV9Lg)9yc)`D zp}Zc-8=<@z%3Gnl9m+eQyc^1Up}Zf;2cdiz%15Cz3Z-!d^vM8pLTMXHyHMJP(jkztRb0}Rx=^9G6P`ZcG zBb1(@^a`bSD1Acd8%n=W`iC+gl!2iP3T1F8LqhpFl%b&v3uSmHBSIM&%BWCAhcYIV zv7w9$<(p8x4Q2fC^55-9=kM+Pdj$R-fxk!K-)98=U1#Xd2`7YS|B0bY3T1LAQ$m>< z%Cu0XhcY9SnW4-IWp*fYLYW)NyimRiWqv5%hq54)g`q48WpOA=LRlKhvQUEP&S9MC6uk9Yzt+3C_6&=F_fP|*%`{NP<{?& zcPM*8*&E8fQ1*v%Ae4il917)dC`Us1C6r%7`7M;+Lpd7Cu~0JPO3w-YBa{qEIAlp902DU_Q- zxh0faL%A)K2BF*@${nHH8OmLu+#Sk2q1+qFeWBbR$^)T17|KJTJRHg+p*$MOW1&1A z$`heH8Ol?kJRQn2p*$PPbD=yRO2bfI2<634UJB*qP+ke;)lgmw<@HeB2<6RC-U{XI zP~HjU-B8{O<^51T2<5|2J_@B#D2+pD5=zrhnuXFll#fICB$O7Rd>YDUp|lL8RVb}P z`8<>_LisY3uR>`PO50G{h0;Eh4xw}mrBf)KL+KJq*HF5J(mj+Oq4W%;S17$h=@Uxd zQ2K?^Ka>HX3=CyZD1$>863W-13=L&iD8oY;5z5F=Mujpulrf=<4P{&?--PmQDC0w! z5X!_*CWSINlqsQ14P{y=(?gjN%FIw^g)%#oIibuAWnL)Xg)%>s??YJ-%EC|$q`D)N4W(Ep#X~6(O36@4g;F|{GNF_WrCccGL#Yr-#ZW4RQaO|=p;Qf}S}4^+sS(PF zp_~-T$)TJQ%Bi8$45d~mwL_^BO5IRS3+41s&IqMmDD^`*GnBJJIXjedLOC~-^Fld4 zlnX++FqDfzxj2+dLb)`Q%R;$4lq*8HGL)-AxjK|q5Cclp8|1F_fD^xjB?u zLb)}R+d^p&%I%@t5z3vR+!e~*q1+S7y`kI}%Kf1{5Xyt0JQT{qp*#}GqoF(&%HyFt z5z3RHJQd2*p*$1Hv!Of}%JZQ#4CRGTUJT`>P+kt@l~7&{<+V^=59N(e-VEifP~HyZ zolxEl<-Jhe59NbUJ`ClfP#T5OIFu%#G!3O$D9uCpIFwI9X%Wh&p?nrf%TQW{(mIsS zL-`_zWrP=RB~vJwL&*|K)=;vAl0B3hq2vrDS17qd$rDQ6Q1XS6Ka>KY6bz+MD1}2Q z5=zlfiiJ`c8B9tpbxhj;aL%AlDYeTs%lPUp*$VRGod^i%5$MSA4jokQsoO4m@jh0;Bg9-;IMrB^7uL+KMr-%$F6 z(m#{|p$rUVP$+{#84}9Zp$rXWSSZ6o84=3JP)3C^I+QV?j16U6DBpzgZ7AbInGnjv zP$q>kIg}}(ObumPDAPlk5z5R^W`#04lsTcy4P{;^--R+ilzR-a-f_j7s`$Dpu8v_%8v@5f~XKGjEbP5s2D1aN}!Uc6e^9%pt7hODvv6lil`E* zjH;ljs2Zw{YM>L*N$6yB3OW_lM72T4HmEIXhuWhKs3YoxI-@SAE9!>2qaLUy>ViXg>NL zEkFy=BD5GSK}*pxv>dHKE71>V6OM@P{ylp!DIX3z;JBg%v_qbw*Z%7(I|94IHsg>s`jC@;!~@}mN%AS#3kqavs% zDu#-q5~w69g-W9`s4Oan%A*RXBC3QcqbjH>s)nkg8t6oH5;_^3f=)#>Q7u#()j@U9 zY3Oux2C9eZqchQ2=xlTjIv1UX&PNxZ3(-aBVsr_*6kUccM^~UL(N*YbbPc)|U5Bnm zH=rBQP3UHH3%V8Eh8i5#zwiFa% zdI!CW-b3%B5739`Bh&~rMomyt)C@I8AEQrD3-l@a47Eh9P;2x#`T~84zCvwKThtD< zM;%Z{)CqM)T~Jrl4RuF7P*2ng^+tVAU(^rvM+4A6GzbkwL(tb~C>n-_qY-E%8ihuq zF=#9rhrU7IqVZ?~ns{8t-`Dj&UNN7<(#dEFntELS`M%)qHU2$D;9s}!G;YP|Xa<^z zW}(?=4w{SRq3_Uq^gUXD7NSLHFok0Kcb(|PP7aCjCP|vXfN7__M-#nAUcE&qa)}S^eg%e{f>^JV<$YVlndoXc~D-I59LP%P(f4(6-Gr+QB({SMWui=p=M9It87IYNA@GHmZZ_qSMgn=nPa3)jzI(kA2(LG9MsyRp8Qp?zMYo{_=yr4mx)a@n?nd{Zd(nO9e)Ir(5Iuw* zMvtIJ(PQXw^aOemJ%ye|&!A_~bLe^05WRq2L@%M2(JSax^cs2{y@B3DZ=tu*JLp~X z9(o^rfIdVYp+=}NYJ!@gW~e#(7=40Tpij|fs3mHJTBFa=7wAj$6>5XpqIRe~>VP_; zPN*~Lg1Vw^s5|O`dZJ#aH|m4>qJF498h{3(LC5vqV~_n`x0V0es}5!?hM=#}P&5n; zMoXJEdpo}OJ%8at0tSB4Gj&h)! zC>P3&@}RsZAIgskpn|9nDvXMtqNo@uj!K}Cs1z!V%Am5S94e10po*vxs*I|js;C;O zj%uJ2(MjlJbP75Z)kL*WZBz%7s*lb@XQ8vvIp|z;9y%XgfG$KAp^MQa z=u&hUx*T1Bu0&U%tI;*+T67({9^HU$L^q+E(Jkm!bQ@}bZbx^ZJJDU}Zgda27u|>M zM-QL}(L?BA^ay$sJ%%1fPoO8!Q|M{*40;wlhn`0b(F^EB^b&d*y@Fmvuc6n`8|Y2+ z7J3`KgWg5&q4&`T=tJ}oYJ?i2Ca5WDhMJ>~(I=<{`V@VJTB25{HToQVfxbjvp*E;3 zYKPjR4yYsQggT=xs4MD*x}zSbC+dZIqdurF>WBKH0cao^ga)G_=xa0-4MW4x2s9Fn zLZi_bG!~6R-=J^Ncr*b`M3c~DGzCpX)6jG@1IOM@P{yl%XK=KXd}hh%%wfC=1GpvZ3rK2g-?Zq1-4B z%8T-${HOpbhzg;?s0b>GilO4D1S*M2q0*=fDvQdY@~8r;h$^AVs0ylzs-fzr209U) zgic1Mpi@yzR14Kcbx>V&8af@Ff$E|9=uC7LIvbsX&PC^;^U($9LUa+j7+r!cMVFz= z(G}=QbQQW9U4yPg*P-jt4d_O66S^7Qf^J2(p$6!7bO*W<-G%N(_n>>xedvDl0D2HT zgdRqZphwYT=yCJ}dJ;W_o<`50XVG)$dDIZSfL=r|p_kDs=vDL@dL6xi-b8Pqx6wQ3 zUGyG$AANv6L?59>s4;4SnxbZ?IrVbNqUZ^+fgZiR=s6QHj2BJY|FdBlsMnlmsG#rgUBhe@{8jV3? z(Kz%C`WB5x6VOC72~9>*&{Q-HO-D1(Of(D4Msv_yG!K1;=A-Y?0<;htwrn5db9y;M4QlNv;}QN+t7Bj1O148LOane^fTIx_Mp9JAKH%& zpo8cTI*g8>U(m1UH}pF?ijJWSg_!}O6HrE!31voEP*#);Wk)$sPLvDfMtM+Pln>=c z1yDg$2o**}P*GG26-OmdNmL4zMrBZ0R1TF#6;MS~2~|c_P*qe7RYx_@iRdJBGCBpF zifW=-s5YvD>Y~%o>F5kp57kF!qO;K1=p1w|IuD(XE)+&FB_%E4mFeK)0hi(4FWmbT_&O-HYx+_oD~UgXkgjFnR<% ziXKCcqbJal=qdCxdImju5Zp>NQ) zXgr#LCZb7bGMa*>qG@P4nt^7bS!gzzgXW@n=sPqYeUBEPg=i64jFzCKXc=0LR-l#W z2eb;UMr+Vov<|ID8_-6y32jDO&{nh!ZAUxMkLV|~6YWAjqupo^+Kcv~{pbKXhz_B{ z=m`1+{fd4=zoVn*7|KwD`5!s~Wki`!W|ReGMcGhxlmq2NxlnGD2jxZiP<~VZ6-0$l zVN?VaMa58YR05SmrBG>929-tSPBbRoJ3U5qY4m!iwi<>(4@CAtb-jjlo0 zqU+H0=mvBnx(VHkZb7%A+fV~^JGukiiS9yoqkGW3=st8mdH_9$9zqYJN6@3_G4wck z0zHYILQkV-(6i_{^gL>aUO+FRm(a`T74#~44ZV)uKyRYA(A&rLU-w-dfB&Wboe}ug zH=De}ZSpR95512*Kp&!yP$SeBH9<{LGt?Y?j6Ojv(5L7#)DpEqtz8}&hbQ9sll4L}3YAT$^aL0_YxXc!ufMxc>s z6dH}jps{Ei`UZWA#-j;nBASFIqbX=AnueyM8E7V&g=V8UXfB$EzC-iT_h(F|%0c}K^&}Os+ZAIJAcC-Wih<-vl(Ju5e+Ku+0 zy=WiWj}D-N=ny)Lj-X%Aujn`QJ35Mvp$tWtccT+fMwAIP+3$Cl}8m&MN|n@MpaN%R1H-}HPDIZ zBy=)51)Yj&qFSgns)OpH)6nVY3{(%*M`xn5(Anr5bS^p%osTX+7ov;M#pn`rDY^_@ zj;=sgqN~u==o)k_x(;2BZa_Dpo6ybZ7IZ7R4K+ZwqdU-@=q_|Ox(D5h?nC#Z2hfA) zA@neM1U-r#`=@HxtXr?)_29Jf9}=(flfdfQ6`ib zWkFd{Hk2LZKsiw^lpEzic~L%;9~D3aQ6W?q6+uN&F;pCtKqXNrR2r2*Wl=d)9#udU zQ6*FvRY6rzHB=qdKqsP;(8=f&bSkQeYN6Vw4yyZ4)v8&$Zo|{i>F5kp57kF!qO;K1 z=p1w|IuD(XE)+&FB_%E4mFe zK)0hi(4FWmbT_&O-HYx+_oD~UgXkgjFnR<%iXKCcqbJal=qdCxdImj>eOqxEt%a8+{<*j7^v}!qQ=L&4)D?9@-BAzJ6ZJy9Q6JP7^+Wy905lK{LW9u|^felahN0nT z1R9A(q0wjz8jHrEZ_u}BJeq(eqDg2nnu4aHX=pl{fo7svXf~RI=AwD%J2W4Cj~1YX zXc1bBmY}6*8Cs53pq1zcv4y{KU&_=WgZAM$rRJ#6ihe`Cqoe2;%21qFOwkD_Bg%v_qbw*Z%7(I|94IHs zg>s`jC@;!~@}mN%AS#3kqavs%Du#-q5~w69g-W9`s4Oan%A*RXBC3QcqbjH>s)nkg z8t6oH5;_^3f=)#>Q7u#()j@U9Y3Oux2C9eZqchQ2=xlTjIv1UX&PNxZ3(-aBVsr_* z6kUccM^~UL(N*YbbPc)|U5BnmH=rBQP3UHH3%V8Eh8m#T(H-bcbQiiC-GlB$_o4gI z1L#5Y5PBFrf*wVWp~uk^=t=YxdKx{0o<+~0=TSrS0(ud>gkDCkpjXjr=ymi4dK0~c z-bU}BchP(3ee?nP5PgIip~k2QYKoen=ICSe32K2pMW3OTs1<6BK1W}mFVR=14Qh+p zq4uZ)>WDg_&ZrCOin{$%f1VfUj(VV;s2A#u`k=n3AL@?=pn+)6as5}#n*aVg|7Roc zuXolD{^xD<=UIm#=xa0-4MW4x2s9FnLZi_bG!~6R-=J^Ncr*b`M3c~DGzCpX)6jG@ z1IOM@P{yl%WLk zaC8F7h%%wfC=1GpvZ3rK2g-?Zq1-4B%6nXY{SMK5EX|J!pn|9nDvXMtqNo@uj!K}C zs1z!V%Am5S94e10po*vxs*I|js;C;Oj%uJ2(MjlJbP75Z)kL*WZBz%7 zs*lb@XQ8vvIp|z;9y%XgfG$KAp^MQa=u&hUx*T1Bu0&U%tI;*+T67({9^HU$L^q+E zkL&;JnCf5u`(LkN-SW@N`19EOR&*O`fNn>3pgYlB|J0wyZ+D}6(7otBbU%6kJ%}Dc z52HuWqv$d8IC=s-iJn4Fqi4{w=sEN}YKUGyFQS*w%jgyKDtZmQj^03TqPNi7=pFPf zdJnyiK0qI$k5D7j7&SpnQ8UyWeT+UqEzqavGt?5bLaou~=nM2E`UVKs`|})Eo6deNjKu9}PeQ(I7M!4MAU{p=cNyjz*x7XcQWa#-Ooi z9Qp=*i^ih~Xd;?~CZj26Dw>9-qZw!>nuTVgIcP4LhrUDe(f4QpT8I{*#b^myik6|} zXa!n{en6|xYP1HeMeERdv;l2Io6u&o1#Ly!&~~%~{fK@-JJBxmGun;zpuK1x+K&#P zgXj=CjE^gB9=j-d=CnMtA(P)3vqWky+0R+J58M>$YVlndoXc~D-I59LP% zP(f4(6-Gr+QB({SMWui=p=M9It87I zYNA@GHmZZ_qSMgn=nPa3)kkNdv(VY-9CR)^51o%LKo_Em(8cHybSb(FU5>6mSE8%X z)#w^@ExHa}k8VIWqMOjo=oWM4y{KU&_=WgZAM$rRJ#6ihe`Cqoe2;%20~=A36bLM43=#lm%r)*-&Kl||)Hc~k*aM3qowR0UN<)lhX*1D%LYLMNkB(5a{4V{k8K=n|4bS63rosG^x=c4n_`RD?4A-V`%j4nZ!qRY_b=n8Ztx(Z#5u0hwL z>(KS+26Q933EhltLARpYPy=*3x&z&b?m~B?d(ge;K6F2N06mBvLJy-y(4*)v^f-C~ zJ&B$|Pornhv* ztwrn5db9y;M4QlNv;}QN+t7Bj1O148LOane^fTIx_Mp9JAKH%&po8cTI*g8>U(m1U zH}pF?ijJWSrJ4Vs6HrE!31voEP*#);Wk)$sPLvDfMtM+Pln>=c1yDg$2o**}P*GG2 z6-OmdNmL4zMrBZ0R1TF#6;MS~2~|c_P*qe7RYx_@iRdJBGCBpFifW=-s5YvD>Y~%o z>F5kp57kF!9@oF#o%rwhuYY|a$6TOArM(?0^(R=89 z^uclc^%E2yvh*X=2sK7cP*ct6h8__1T8ErvZ(KfUl?La@GpU_UU3;m3CqdjOZ+K2X| z1Lz<+gbt%4=oj=W`VIY#j-q2ILm6hM=meAzWkQ)z7L*lbL)lRdloRDbxltaJ7v)3w zQ2|sC6+(qk5mXcvL&Z@ER1%d!rBNAF7L`NgQ3X^HRYH|f6;u^fL)B3YbRs$los3RF zr=psu7OIWvpt|TZbUHc%)kF2sndmHZHaZ8Li_SymqYKc5=puA6x&&Q{E<=~2E6|nb zDs(lv23?D;L)W7l(2eLObThgI-HL8Q4bbiA4s<8F3*C+GLHDBj(EaEE^dNc&J&Ybf zkD|xWt6h8__1T8ErvZ(KfUl?La@GpU_UU3;m3CqdjOZ z+K2X|1Lz<+gbt%4=oj=W`VIY#j-q2ILs{m3=meAzWkQ)z7L*lbL)lRdloRDbxltaJ z7v)3wQ2|sC6+(qk5mXcvL&Z@ER1%d!rBNAF7L`NgQ3X^HRYH|f6;u^fL)B3YbRs$l zos3RFr=psu7OIWvpt|TZbUHc%)kF2sndmHZHaZ8Li_SymqYKc5=puA6x&&Q{E<=~2 zE6|nbDs(lv23?D;L)W7l(2eLObThgI-HL8Q4bbiA4s<8F3*C+GLHDBj(EaEE^dNc& zJ&YbfkD|xW4CMM1#;^Gz5K(hN59;I2wUQqETox8iU57 zap)WLEgFv|powS_nvABPsc0ISj%J{lXcn4{=AgM~9{LW=N8h6bXdzmJ7NaF-DO!e> zqZMc+`T?y%tI-;?7Og|;(FU{;Z9<#T7PJ*@L)*~~^dtHS?L@oK&uBN=gZ83*Xg@lD z4x&ToFgk*MLBFEk(C_FdI)*ZoWB!LuKp9aclo@3~Sy48W9pykdQ7)7lr9Z*Nq33WzY zP*>Cqbw@o=Pt*(bMtx9U)DQJX1JFP;2n|L<(AQ`v8it0W5ojbDg+`+>Xe=6szCquj z@n`~?h$f-QXbPH&rlIL*2AYXxq1k8-nv3S4@6deoJz9VkqD5#iT7s6MWoS8CfmWg) z&?>YVtwC$iI9zdThTVO9qm9rqMy)Cv5EVj& zQ4v%W6+^{Q2~-l5LZwj|R2G#(Y%#lG;}&T1Jy(I(V6HhbT&E%or}&x=c5bIh3F!5F}eg@iY`N!qbtyr=qhwIx&~c~ zu0z+O8_K5nLk-aF=nixzx(nTn?m_pW`_TR90rVhx2tAA*L64%x(BtR{ z^yG2<^%HAPvGi&540;wlhn`0b(F^EB^b&d*y@Fmvuc6n`8|Y2+7J3`KgWg5&q4&`T z=tJ}oYJ?i2Ca5WDhMJ>~(I=<{`V@VJTB25{HToQVfxbjvp*E;3YKPjR4yYsQggT=x zs4MD*x}zSbC+dZIqdurF>WBKH0cao^ga)G_=xa0-4MW4x2s9FnLZi_bG!~6R-=J^N zcr*b`M3c~DGzCpX)6jG@1IOM@P{yl%WDMKy(7ih%%wfC=1GpvZ3rK2g-?Zq1-4B%8T-${HOpbhzg;? zs0b>GilO4D1S*M2q0*=fDvQdY@~8r;h$^AVs0ylzs-fzr209U)gic1Mpi@yzR14Kc zbx>V&8af@Ff$E|9=uC7LIvbsX&PC^;^U($9LUa+j7+r!cMVFz=(G}=QbQQW9U4yPg z*P-jt4d_O66S^7Qf^J2(p$6!7bO*W<-G%N(_n>>xedvDl0D2HTgdRqZphwYT=yCJ} zdJ;W_o<`50XVG)$dDIZSfL=r|p_kDs=vDL@dL6xi-b8Pqx6wQ3UGyG$AANv6L?59> zs4;4SnxbZ?IrVbNqUZ^+fgZiR=s6QHj2BJY|FdBlsMnlmsG#rgUBhe@{8jV3?(Kz%C`WB5x6VOC7 z2~9>*&{Q-HO-D1(Of(D4Msv_yG!K1;=A-Y?0<;h3pgYlB=x%fmx)M^B(9(NpMY z^bC3yJ%^r04bcneMf4JS8NGsDMX#aP(HrPZ^cH#>y@TFG@1gh62k1le5o&}Qqb8^+ zYKEGlkI^Tn1^N_yhFYRls5SZ=eSyA2U!gXr?H|gTAw!0`wd&WcQ?qWvc4=szh7M`y zn1)Vi=$wWwY3Q1UZfWSAh8}6?nTB3z=$(c>Y3Q4Verf2Rh5>09n1(@V7@US7Y4|z~ zL(?!U4a3tgA`K(cFe(kB(=a9tW79A$4d0~U+cb<%!-OVM7`=reRYWHm6}r8n&ikTN<{fVMiK%Ov6uU*qMf1Y4|w} zyVI~I4SUnDFAe+Ca3Bo_({LyahtqH*4Zoz}*EIZ=hTqe0G!4hnkfBntTl+^EPDn$> zG-OIc<}_qUL)J88OGEZFxoJ2r4d5XpqIRe~>VP_;PN*~Lg1Vw^s5|O` zdZJ#aH|m4>qJF498h{3(L1-`WX~d(l3$A00pk(IIpg9YMdKU(s*q zcXSjTLm4Xn^~mZU=meAzWkQ)z7L*lbL)lRdloRDbxltaJ7v)3wQ2|sC6+(qk5mXcv zL&Z@ER1%d!rBNAF7L`NgQ3X^HRYH|f6;u^fL)B3YbRs$los3RFr=psu7OIWvpt|TZ zbUHc%)kF2sndmHZHaZ8Li_ZI}{`{qY^U($9LUa+j7+r!cMVFz=(G}=QbQQW9U4yPg z*P-jt4d_O6({cUBryTzrnVVU93%V8Eh8m#T(H-bcbQiiC-GlB$_o4gI1L#5Y5PBFr zf*wVWp~uk^=t=YxdKx{0o<+~0=TSrS0(ud>gkDCkpjXjr=ymi4dK0~c-bU}BchP(3 zee?nP5PgIip~k2QYKoen=ICSe32K2pMW3OTs1<6BK1W}mFVR=14Qh+pq4uZ)>WDg_ z&ZrCOin^ihs0ZqadZFH^59*8hq5fz98i)p=!DtBj8VyCm&~P*YjYOl+Xfy_mMdQ#n z=vy=%O+XXTBs3XKK~vE*G#$-AGtn$G8_hv;(LD4WnvcFm3(!Ke2rWiS&{DJvEk`TR zO7sI-g;t|AXf0ZY)}sw*Bie*Eqb+DF+J?5H9q32&6WWP(p`X!iv^c0qZ`nT=q7YCx&_^eZbJ>w?dT44C%Wqo{q_BgcchY4ezJngEV}YhL6(FC=HF% z&?F5_)6gsp&C~F48a_!wi!^+ihR@Q_G7YWL&^isDr{RkKcgMZ?f=Gy;u8qtNI-^w;P8F=-f^hH+{5CJo=FVSE}Uq+wzjCZ%C=8m6RS zY8s}cVR{;7q+w9zdThX>Z^w-D2_B8BB!;fkBDGfW* zuqzEer(t&*_M~BN8uq1Oe;N*?;b0mLrQvWIj-=t2H2j)|-_r1V8jhynSQ;`^O>UQe zq~U}#WK2V*G-OUgmNaBdL$)+zPeYD04CMM1#;^Gz5K(hN59;I2wUQqETox8iU57ap)WLEgFv|powS_ znvABPsc0ISj%J{lXcn4{=AgM~9{LW=N8h6bXdzmJ7NaF-DO!e>qZMc+`T?y%tI-;? z7Og|;(FU{;Z9<#T7PJ*@L)*~~^dtHS?L@oK&uBN=gZ83*Xg@lD4x&ToFgk*MLBFEk z(C_FdI)*Y-`|G^wALs;>5oJP|Q5KXHWkcCf4wMt+Lb*{Mlo#bg`Twau&n^}~1yLbX z7!^T9Q882;l|Us?DOCCoW&O*yW|T=o*))_(L-{mRNJGUmR7ykTG*n4L)ihK~L-jP& zNW+O~I4KP$r{R<|oSKH3X{eQk+G(hhhPr7uEe)rq;fyrYOGEuMoSBBR(r|Vf&Pl_$ zX*e$p=cnOhY4ezJngEV}YhL6(FC=HF%&?F5_)6gsp&C~F48a_!w zi!^+ihR@Q_G7YWL&^isDr{Rk2izhVf~bkcNqAn3RUeX_%6RscD#&hUsaTk%pORn3aavX_%9S zxoMb}hVRlaKMmifVL=)ereRSU7N=oJ8kVMESsIq7VMQ8Nrs0P)tV+Y`G^|O(+BB?7 z!}>qM|9Hi31KNl-q0MLu+KRTJ?Pv%35&eXAqFv}`v>WX~d(l3$A00pk(IIpg9YMdK zU(s*qcXSjTLm8_7^%naFIss)wnNVhw1!YCqPOgg&dQg3+0n`v`1T}`5 zKuw`$P;;mS)Dmh1wT9Y2ZJ~Bhd#D4{5$XhWhPpsqp>9xjs0Y*&>ILFfFj0_k?1q`DDhA{!d*nnYNz%V{wm=G{b3>YQ_ z43h(fDFMUOfMHs|Fg;+H5iraQ7-j_wvjc`X0mIyYVP3#6KVVo8Ff0rh76lB81BN95 z!_t6ZS-`M7U|8|r;s5{K=}Kr7v>I9it%cS>>!A(*tN;0I-$rN?v>Dn0ZH2Z$+o2uM zPG}dj8`=Zyh4w-Fp##uC=n!-mIszSqjzPzv6VOTM6m%Lo1D%D=LFb_h&_(DHbQ!t= zU4^be*P$EGP3RVM8@dDCh3-N3p$E`I=n?c7dICL#oy1LcMCLHVHqP(i2= zR2V7(6@`jH#i0^VNvIT58Y%;og~~zYp$bq%s1j5essdGoszKGE8cH>9zxlzLGz&n&_ZYtv=~|fErpgr%b^v}N@x|d8d?Lbh1Nmqp$*VR zXcM#<+5&Bbwn5vW9nel_7qlDN1MP+OLHnTt&_U=BbQn4U9fgiT$DtF@N$3=G8ae}= zh0a0ep$pJO=n`}px&mE=u0hwK8_-SY7IYiB1Kox0LHD5t&_n1E^cZ>qJ%ye@&!HF4 zOXwB!8hQi0h2BB$p%2hU=o9oA`T~80zCquiAJ9+e7xWwY0|iQf^B)u#3IYX%fbcN&+Q?l0nI#6i`Yi6_grE1Eq!1LFu6kP(~;dlo`qbWreaq*`XXzPAC_Y z8_EOah4Ml9p#o4rs1Q^bDgqUSib2Jp5>QE~6jT~21C@o!LFJ(eP(`Q`R2ixQRfVcS z)u9?tO{f-B8>$17Env571SDP1GR(PpB8v8|nk~h5AALp#ji9Xb?0Q8UhW4hC#!j5zt6z6f_zd1C52o zLF1tb&_rkwG#Q!#O@*dG)1evAOlTG~8=3>nh2}x?p#{)FXc4p+S^_PFmO;y*70^m( z6|@>!1FePDLF=In&_-wzv>Dn0ZH2Z$+o2uMPG}dj8`=Zyh4w-Fp##uC=n!-mIszSq zjzPzv6VOTM6m%Lo1D%D=LFb_h&_(DHbQ!t=U4^be*P$EGP3RVM8@dDCh3-N3p$E`I z=n?c7dICL#oy1LcMCLHVHqP(i2=R2V7(6@`jH#i0^VNvIT58Y%;og~~zY zp$bq%s1j5essdGoszKGE8cH>9zx)%D#{-5F0mI3F;Z(qII$$^xFq{n-&IJtT1BMF$ z!^ME%QowLIV7L-6Tn!km1q{~%h8qFH&4A%nz;HWYxDznk4H)kIclf{Gu7n;y51~iU zW9SL=6nX|dhh9K0p;ypr=neE1dI!CSK0qI#Pta%R3-lHG27QNqKtG{h&~NAu6eyMc ziTxK87zzRfg@QrBp%73=C=?VL3Im0O!a?Do2v9`mFDMce8Hxf$g`z>xp%_q1C>9hO ziUY-k;z9AD1W-aK5%l+eq~8Yo2Uq`v5<^L#q);*_Ig|oQ38jKkLusJ2P&z0*lmW^J zWr8w8S)i;?HYhuk1Ih{If^tK7puA8%C_hvHDhL&V3PVMpqEIoYI8*{E36+9MLuH_{ zP&uePQ~|08Re~x*RiLU+HK;mN1F8wtf@(u`pt?{!s6NyHY6vxg8beK>rcg7eIn)Ab z3AKV+Lv5h8P&=qS)B)-Ub%HuWU7)T|H>f+*1L_I&f_g)JpuSK)s6R9S8VC)7217%j zq0lgBI5Ywp35|kALt~(^&^Ty3Gy$3jO@byvQ=qBPG-x_B1DXlVf@VW=pt;aIXg;(6 zS_mzI7DG#*rO+~HIkW;=39W)wLu;V5&^l;6v;o=(ZGtvKTcEAbHfTGv1KJ7gf_6iD zpuNyOXg_oSItU$t4ns$vqtG$vICKI!37vvYLua6~&^hQlbOE{uU4kw{SD>rVHRw8Y z1G)*_f^I{1pu5mL=sxrSdI&v&9z#!{r_eL#IrIX03B7_|LvNtB&^zco^a1(^eS$ti zU!bqhH|RU`1NsU5f__7Ppg^f{0)zrXL7<>eFeo?_0tyL*fzjQJ|<$G$=Y01Bwa7f?`8)ptw*xC_a<`N(d!_{)Ya6{)G}lNuZ=qGAKEe0!j&` zf>J|iptS$1|9QJ|Iw(Dq0m=wvf-*x{psfF^|9NjfHYhuk1Ih{If^tK7puA8%C_hvH zDhL&V3PVMpqEIoYI8*{E36+9MLuH_{P&uePQ~|08Re~x*RiLU+HK;mN1F8wtf@(u` zpt?{!s6NyHY6vxg8beK>rcg7eIn)Ab3AKV+Lv5h8P&=qS)B)-Ub%HuWU7)T|H>f+* z1L_I&f_g)JpuSK)s6R9S8VC)7217%jq0lgBI5Ywp35|kALt~(^&^Ty3Gy$3jO@byv zQ=qBPG-x_B1DXlVf@VW=pt;aIXg;(6S_mzI7DG#*rO+~HIkW;=39W)wLu;V5&^l;6 zv;o=(ZGtvKTcEAbHfTGv1KJ7gf_6iDpuNyOXg_oSItU$t4ns$vqtG$vICKI!37vvY zLua6~&^hQlbOE{uU4kw{SD>rVHRw8Y1G)*_f^I{1pu5mL=sxrSdI&v&9z#!{r_eL# zIrIX03B7_|LvNtB&^zco^a1(^eS$tiU!bqhH|RU`1NsU5f__7Ppg?JG{(}NTL7<>e zFeo?_0tyL*fzjQJ|<$G$=Y01Bwa7f?`8)ptw*xC_a<` zN(d!_{)Ya6{)G}lNuZ=qGAKEe0!j&`f>J|iptMjrC_R(`$_QnGGDBIQtWY*6JCp;; z3FU%vLwTUQP(CO>Q~)Xn6@m&wMWCWkF{n6H0xAiWf=WYWpt4Xos612wst8qrDnnJE zs!%nkI#dIy3DtsXLv^6KP(7$V)BtJ-HG&#LO`xVwGpIS#0%{4hf?7juptevus6EsH z>Iij$IzwHcu246qJJbW}3H5?{Lw%sWP(P?YGyob14T1(kL!hD1Flab50vZX8f<{AQ zps~<6Xgo9lng~sTCPP!8sn9fNIy3{C3C)6LLvx_H&^%~9v;bNNErJ$9OQ5CDGH5xp z0$K^Jf>uLoptaCCXg#z6+6ZleHbYyWtJM;tk3H^e8 zLw}$^X>tC80z*NdpinRP&g<&6ak6|{RKsWB12K2s8BR0Iurwn z3B`h9Lvf(EP&_C;lmJQyC4&Bj{(=655<^L#q);*_Ig|oQ38jKkLusJ2P&z0*lmW^J zWr8w8S)i;?HYhuk1Ih{If^tK7puA8%C_hvHDhL&V3PVMpqEIoYI8*{E36+9MLuH_{ zP&uePQ~|08Re~x*RiLU+HK;mN1F8wtf@(u`pt?{!s6NyHY6vxg8beK>rcg7eIn)Ab z3AKV+Lv5h8P&=qS)B)-Ub%HuWU7)T|H>f+*1L_I&f_g)JpuSK)s6R9S8VC)7217%j zq0lgBI5Ywp35|kALt~(^&^Ty3Gy$3jO@byvQ=qBPG-x_B1DXlVf@VW=pt;aIXg;(6 zS_mzI7DG#*rO+~HIkW;=39W)wLu;V5&^l;6v;o=(ZGtvKTcEAbHfTGv1KJ7gf_6iD zpuNyOXg_oSItU$t4ns$vqtG$vICKI!37vvYLua6~&^hQlbOE{uU4kw{SD>rVHRw8Y z1G)*_f^I{1pu5mL=sxrSdI&v&9z#!{r_eL#IrIX03B7_|LvNtB&^zco^a1(^eS$ti zU!bqhH|RU`1NsU5f__7Ppg`$x{(}NTL7<>eFeo?_0tyL*fwRk8NXI7fizMko`M8Oj1>g|b1}p&U?7C>N9)$^+$v@Ou9P22ew&5!4uJ0yTx2LCv8SP)n#4)Ea67wT0S2?V%1(N2n9j8R`Ocg}OoA zp&n3As29{5>I3zK`a%7n0nk8b5HuJX0u6LCc{P&`M|(v>I9it%cS> z>!A(MMraeX8QKDEg|pg|0!@p&QUm=oWMvx&z&X?m_pV2hc<45%d^(0zHME zLC>KV&`anQ^cs2ty@lRE@1YOSN9Ysu8TtZ!g}y=Gp&!ss=oj=G`U3?@|9?(^|8wvC zUr=Bu2ow|w1_g&gKp~+}P-rL&6c!2xg@+sI#fK6=386&L-_SqMzffW*36vB{1|^45Kq;YAP--X*lom<{rH3*=8KF#2W+)4k z70L!>hjKtUpIe0Q20#O$LC|1m2s9KL1`UTs zKqH}1&}e83G!_~MjfW;c6QN1aWM~RB6`BT3hh{)Cp;^#uXbvx z=nixjx(D5d9zYMFN6=&F3G@_t20e#fKrf+J&}--o^cH#ty@x(PAE8gsXXp#`75WB! zhkigmpO_6bc3fheALhp-@n0C=3)93I~OUB0v$Lzo1A^WGD(0 z6^aH$hhji6p;%CCC=L`CiU-Aq5lo!ee<%bGD1))MvVWsVDhHK^DnJ#XN>F8}3RD%U233b@KsBLSP;ICVR2QlT)rT5D4WUL*W2gz# z6lw-Fhgv`_p;l09s14K>Y6rE4IzSzvPEcp43)B_r26cyeKs}*eP;aOY)EDXp^@j#P z1EE3CU}y+56dDE%hekjnp;6FiXbdzK8V8MsCO{LRNzi0y3N#g(22F=%Kr^9P&}?W9 zG#8o&&4(613!z2OVrU7p6j}x?hgLusI#fK6=386&L-_SqMzffW*36vB{ z1|^45Kq;YAP--X*lom<{rH3*=8KF#2W+)4k70L!>hjKtUpIe0Q20#O$LC|1m2s9KL1`UTsKqH}1&}e83G!_~MjfW;c6QN1aWM~RB z6`BT3hh{)Cp;^#uXbvx=nixjx(D5d9zYMFN6=&F3G@_t20e#f zKrf+J&}--o^cH#ty@x*hNB`#=Yyb0E%8$7E3Hl6ufxbfDpzqKR=qL0G`VIYo0%gJp z5DE+hfr3K8px{slC?pgL3JryU!b0Jo@K6LOBJ>v&35pCwfucgupy*HxC?*sOiVekq z;zIGD_)r2UA(ROE8~O+O7fK8zfs#VWpyW^rC?%8%N)4rf(n9H=^iT#UBa{is3}u0` zLfN3~P!1?3lncrY<$>}-`Jntz0jMBU2r3K}fr>)KpyE&os3cShDh-u^%0lI!@=yh+ zB2)>g3{`=uLe-$^Pz|UiR12yN)q(0l^`QDt1E?X?2x<&9fto_ipyp5us3p`2Y7Mo4 z+CuH1_D~0?Bh(4%40VCJLfxS5P!Fgl)C=kj^?~|A{h&qO+AlF}scFkhTW;D4(^i_c%Cyy{tubw_Y3oc|Z`uaaHk!7{ zw9Te%F>R}9+f3VT+78opnzqZd-KOm^ZLewjOxthT0n-kecF45DrX4ZusAy+os(y?XGF} zOuKK|1JfRw_Qfkfwz)EwpK2 zObcsTIMc$L7QwWLru}7FB-0|B7R9uvrbRO?x@j>?i)mUc(_)(z$F#Vn#WO9wX$eeA zXj&rE{xR=~7^rWG=+uxUk1D{5LX(~6r`!nBg6 zl`^fgX=O|+Yg#$e%9~cfw2G!xGOe;{RZOdDS~b(En^wcLnx@qX>CkvYg#+g+MCwFw2r2AGOe>|T}NF=nWoJ$ZMJE1Oq*-kJk#czw!pN7 zrY$mUv1v<8TWZ=e)0UgI!nBp9tuk%3X=_YdYuY-~)|Uw>YuY>0-kbKpw2!8JGVQZzUrhUI+Beg_oA$%B zpQimX?YC)vObe97JpVH-uxUX|3u;<0(}J58!nBa4g)%L)X<S~Am;o0h_~l%}OJEwyQBOiOE8I@8jdmcg`)re!iMvuRmO%W7IS)3TeE!?c{H z8E2rd2YnvT0RJt7=*`)2f?R!?c>F)iSNNX?0AiYg#?i>YLWUw1%cNGOe*`O-yTQ zS~Js{o7Td#mZr5bt+i=wOlxaeJJZ^m*1@!nrgbu{vuRyS>uOpz)4H41!?d2J^)juu zX?;xVYg#|k`kOYuw1K7#GHtMFLrfcL+A!0Gn>NC%s zrcE+!vT0LHn`+uL)25p?!?c;E%`$DaX>&}QYuY^1=9{*_w1uWEGHtPGOH5m8+A`CY zo3_HVm8PvSZMA7@Oj~Q(I@8vhw!yTGrfo89vuRsQ+iKc2)3%$o!?c~I?J{k*X?slD zYuY~3_M3LVw1cJ{GVQQwM@&0v+A-6Pn|8vqlct?A?X+oUOgn4ZIn&OYcEPlZrd=}a zvT0XLyK34s)2^F#!?c^G-7@XAX?IM!YuY{2?wj_&w1=iWGVQTxPfUAi+B4IhoA$!A zm!`ck?X_udOnYnEJJa5q_QAA|rhPK)vuR&U`)b-Z)4rSb!?d5K{W9&hX@5)$l+`@{ zGcB-bK}-v3S}@asn-;>fkfwz)EwpK2ObcsTIMc$L7QwWLru}7FB-0|B7R9uvrbRO? zx@j>?i)mUc(_)(z$F#Vn#WO9wX$eeAXj&rE{x zR=~7^rWG=+uxUk1D{5LX(~6r`!nBg6l`^fgX=O|+Yg#$e%9~cfw2G!xGOe;{RZOdD zS~b(En^wcLnx@q zX>CkvYg#+g+MCwFw2r2AGOe>|T}NF=nWoJ$ZMJE1Oq*-kJk#czw!pN7rY$mUv1v<8TWZ=e)0UgI!nBp9tuk%3X=_Yd zYuY-~)|JEq+=?Vf4(O?zP4L(?9a_Sm#1rad+7nQ6~Wdtus3(_WeO z+O#*Oy*2HfY41(@VA@C1KAHB}v@fQ8HSL>e-%a~r+E3GdnfBYXKc)rBW}X0=7TB~P zrUf-Em}$XH3t?JF(?XdR+O#mHg*7dlY2i(aU|K}e{xU6+X^~BfVp>$wqL~)mv>2wv zG%c2Cu}zC(T3plOnHJx)1g0f4Es<$|oA!@s|C*NAv?Qh_H7%KG$xTaPT1wMWnU>nL zG^V9BEuCrUP0L_fM$T zt(|G@P3vGtR|?(|VcK+q6EW^);=ZY5h$bVA?>_2AMY4 zv>~PqHEoz_!%Z7u+DOwznKs(AF{X_*ZJcT2O`BlaMAIgjHrcc(rcE_%nrYKbn_=2a z(`K1A+q5~R%{6VFY4c56VA?{{7MZr#v?Zo3HEo$`%S~Hh+Dg+_nYP-rHKwgKZJlZB zP1|7FM$scG7dnrYWfyJ6Z*({7n| z+q65T-8Jo=Y4=TgVA?~|9+~#ov?r!LHSL*c&rN$_+Dp@3nfBVWH>SNc?VV}wP5WTl zN7Fu;_Sv*ArhPT-n`z%o`(fHo(|(!u+q6HX1>JhH7%HF!A%QcT1eAE znHJi#Fs6kyEu3lLO^aY!MAQBqMJf`I}EuU%mO)FqpLDLGER@k&6rWG}1CUL(>|W*4VTrrZqLKnQ6^UYhhYT(^{F<+O#&NwKc7sY3)txU|L7hI+@nl zv@WJ~HLaUz-A(IZT2Iq@nbzC1KBo0Gt)FTAO&eg^K+^`9HrTWwrVTZ1m}$dJ8)4c= z(?*##+O#pIjWunYY2!_sVA@2}CYd(bv?-=dHEo({(@mRU+Dy}CnKs+BIi}4uZJufK zO zHEo+|+fCbH+D_AUnYP=sJ*Mq7ZJ%lTO*>%PLDLSIcG$EdrX4lym}$pNJ7L;M(@vRo z+O#vKoi**8Y3EJ5VA@5~E}3@Ov@51vHSL;d*G;=&+D+4LnReT>JEq+=?Vf4(O?zP4 zL(?9a_Sm#1rad+7nQ6~Wdtus3(_WeO+O#*Oy*2HfY41(@VA@C1KAHB}v@fQ8HSL>e z-%a~r+E3GdnfBYXKc)rBVV?h)7TB~PrUf-Em}$XH3t?JF(?XdR+O#mHg*7dlY2i(a zU|K}e{xU6+X^~BfVp>$wqL~)mv>2wvG%c2Cu}zC(T3plOnHJx)1g0f4Es<$|oA!@s z|C*NAv?Qh_H7%KG$xTaPT1wMWnU>nLG^V9BEuCrUP0L_fM$Tt(|G@P3vGtR|? z(|VcK+q6EW^);=ZY5h$bVA?>_2AMY4v>~PqHEoz_!%Z7u+DOwznKs(AF{X_*ZJcT2 zO`BlaMAIgjHrcc(rcE_%nrYKbn_=2a(`K1A+q5~R%{6VFY4c56VA?{{7MZr#v?Zo3 zHEo$`%S~Hh+Dg+_nYP-rHKwgKZJlZBP1|7FM$scG7dnrYWfyJ6Z*({7n|+q65T-8Jo=Y4=TgVA?~|9+~#ov?r!LHSL*c z&rN$_+Dp@3nfBVWH>SNc?VV}wP5WTlN7Fu;_Sv*ArhPT-n`z%o`(fHo(|(!u+q6HX z1>JhH7%HF!A%QcT1eAEnHJi#Fs6kyEu3lLO^aY!MAQBqMJf`I} zEuU%mO)FqpLDLGER@k&6rWG}1CUL(>|W*4VTrrZqLKnQ6^UYhhYT z(^{F<+O#&NwKc7sY3)txU|L7hI+@nlv@WJ~HLaUz-A(IZT2Iq@nbzC1KBo0Gt)FTA zO&eg^K+^`9HrTWwrVTZ1m}$dJ8)4c=(?*##+O#pIjWunYY2!_sVA@2}CYd(bv?-=d zHEo({(@mRU+Dy}CnKs+BIi}4uZJufKOHEo+|+fCbH+D_AUnYP=sJ*Mq7ZJ%lTO*`

}VannwicG9#{rkytJjA>_0J7?N?(=M2H(X>mZT{i8C zX;)3VX4-YrZkTq{v|Fa#HtmjScTKxz+I`a=nD)@LN2WbC?TKkmO?zhAbJJd!_R_Rh zroA@pjcIR9duQ5v(>|E?(X>ydeKzfjXy_3ujt*(;}D_(X_uzi)314)1sIb)wF1)MK>*m zX)#TUWm;_0;+PiKw0Ne)H!Xo_2~A66+TW)AW7@x_B{nUIX-Q2>W?FL7Qka&~v{a_0 zHZ6^5X-!LKT6)tmn3mDBOr~WvEsJScP0MClcGGg0meaIcrsXy*k7;>L%V%1C(+ZeY z(6mCP6*jGiX+=#dW?FI6N|;vCv{I&(Hm!_lWlbw*T6xndm{!rWN~Tpdt%_+?O{->F zb<=8?R@1avrqwpBj%jsGt7lq$(;Ar8(6mOTH8!n@X-!RQW?FO8TA0?-v{t6IHm!|m zZB1)uT6@zvnAXv>PNsD>t&3@0P3vY_chh>9*3-0Jru8t|Yj(*~F}(6m9O z4K{6vX+upLX4-JmMwm9zv{9ywHf@Y)V@(@p+IZ6@m^RV0Nv2IUZHj4AO`B%gbkk;- zHq*3Orp-2Oj%jmEn`hd5(-xSv(6mLSEjDe5X-iF8X4-PoR+zTZv{k09Hf@b*YfW2c z+IrJAn6}ZhO{Q%&ZHsAJP1|PLcGGs4w$rp-rtLOuk7;{N+h^K-(+-$+(6mFQ9X9QV zX-7>vX4-MnPMCJmv{R;?HtmdQXH7e2+IiD1n0C>$OQu~m?TTqvO}l2=b<=K`cGI+5 zrrkE}j%jyIyJy;c(;k@i(6mRUJvQx$X-`diX4-SpUYPdMv{$CRHtmgRZ%un=+I!PJ znD)`MPo{k~?Tcw&P5Wlrchi2D_S3Xqru{bUk74VNDBXT6ohUm=@8rzf6l{T4d9rm=@KvXr@IsErw|^O^an(Y}4YH7T2_R zro}fcfoTa%OJv&Lru}2uzosQNEs1GKO-p84a??_nmeRCTrlmG5jcI93OJ`bo(=wQr z(X>paWi~B~X<1FnW?FXBa+sFWv|Og;HZ6~7c}>e_T7J_Cm{!oVLZ%fqt%zwwO)F+v zannkeR?@Umrj<6WjA>;}D`#4H(<+!&(X>jYRW_}PX;n?DW?FUAYM55jv|6UsHm#0n zbxo^hT7A1UabJJRw*3z_ArnNS$jcIL7YiC+}(>j>e(X>vc zbvCVwX&{8)w>h(gXO*U9p4(X>sbZ8mL- zX}VannwicG9#{ zrkytJjA>_0J7?N?(=M2H(X>mZT{i8CX;)3VX4-YrZkTq{v|Fa#HtmjScTKxz+I`a= znD)@LN2WbC?TKkmO?zhAbJJd!_R_RhroA@pjcIR9duQ5v(>|E?(X>ydeKzfjXy_3ujt* z(;}D_(X_uzi)314)1sIb)wF1)MK>*mX)#TUWm;_0;+PiKw0Ne)H!Xo_2~A66+TW)A zW7@x_B{nUIX-Q2>W?FL7Qka&~v{a_0HZ6^5X-!LKT6)tmn3mDBOr~WvEsJScP0MCl zcGGg0meaIcrsXy*k7;>L%V%1C(+ZeY(6mCP6*jGiX+=#dW?FI6N|;vCv{I&(Hm!_l zWlbw*T6xndm{!rWN~Tpdt%_+?O{->Fb<=8?R@1avrqwpBj%jsGt7lq$(;Ar8(6mOT zH8!n@X-!RQW?FO8TA0?-v{t6IHm!|mZB1)uT6@zvnAXv>PNsD>t&3@0P3vY_chh>9 z*3-0Jru8t|Yj(*~F}(6m9O4K{6vX+upLX4-JmMwm9zv{9ywHf@Y)V@(@p z+IZ6@m^RV0Nv2IUZHj4AO`B%gbkk;-Hq*3Orp-2Oj%jmEn`hd5(-xSv(6mLSEjDe5 zX-iF8X4-PoR+zTZv{k09Hf@b*YfW2c+IrJAn6}ZhO{Q%&ZHsAJP1|PLcGGs4w$rp- zrtLOuk7;{N+h^K-(+-$+(6mFQ9X9QVX-7>vX4-MnPMCJmv{R;?HtmdQXH7e2+IiD1 zn0C>$OQu~m?TTqvO}l2=b<=K`cGI+5rrkE}j%jyIyJy;c(;k@i(6mRUJvQx$X-`di zX4-SpUYPdMv{$CRHtmgRZ%un=+I!PJnD)`MPo{k~?Tcw&P5Wlrchi2D_S3Xqru{bU zk74VNDBXT6ohUm=@8rzf6l{T4d9r zm=@KvXr@IsErw|^O^an(Y}4YH7T2_Rro}fcfoTa%OJv&Lru}2uzosQNEs1GKO-p84 za??_nmeRCTrlmG5jcI93OJ`bo(=wQr(X>paWi~B~X<1FnW?FXBa+sFWv|Og;HZ6~7 zc}>e_T7J_Cm{!oVLZ%fqt%zwwO)F+vannkeR?@Umrj<6WjA>;}D`#4H(<+!&(X>jY zRW_}PX;n?DW?FUAYM55jv|6UsHm#0nbxo^hT7A1UabJJRw z*3z_ArnNS$jcIL7YiC+}(>j>e(X>vcbvCVwX&{8)w>h(gXO*UQ)3%wm-LxI1?KEwdX}e9^W7=NR z_L;Wdv;(FcH0_XShfO=SS<}v$cHXoLrd>4c(to$W zfdU1}n6~|8*;iy=m3>Y2b=fy$-;{kz_HEgBWZ#v2PxgJ;4`e@-{Yds>*-vCYmHkZi zbJ;Iszm)w-_G{U1WWSaDPWF4*A7p=&{Ymy`*?uvNOrfEIW(rtg^Go&MrHL?3}W5$<8f1kL&dP!yMgS6vKz^6EW3&9rm~yKZZ5lp?3S`y$!;yXjqJ9v+sSS( zyMyeGvOCG{EW3;BuClwy?k>BB?4GiF$?h$?kL$(}8Hj_kRz z=gFQgdx7kQvKPr-EPILUrLvdFUM_ou?3J=t$zCmcjqJ6u*U4TldxPwavNy@zEPIRW zt+Kbt-Y$EG?47cA$=)q{kLud=_%{x186?4PoK z$^I?-kL*DC<^ErGVA(-r2bCR6c5vAtWQUX;N_J@3VPuDu9Zq(5*%4$%l>L|NNU|f# zjv_m%>}ay1%Z?#CrtDasJFe__vg6B6AUmP#M6&;u{g3Q_Wha)MM0Qfy$z&&& zokDg>*{NiwmYqg+TG{Djr_C?v3WM7tjMfO$M*JNLpeM9z5*|%ihmVHO|UD@|!-=3d;$_^zvwCpgl!^#dPJG|@&vLnj= zOLipLk!44b9aVNT+0kXkkR4NYEZMPT$B`XZc0AefWhaoGP`byV%g!P@tL$vDv&+sQJE!bivUAJM zBRj9`e6sV)E+D(0>_V~&%Pu0jsO)00i_0z{yQJ(=vP;V@BfG5Za`JmL z%dR53s_bgAtIMt-yQb_~vTMt(BfGBbda~=wZXmm%>_)O1%WfjOsqAL5o6Bw?yQS<_ zvRlh;BfG8acCy>c?jXCP>`t;f%kCn(tL$#FyUXq&yQl14vU|(!BfGEcezN<^9w2+5 z>_M^z%N``AgG%bp^8s_bd9r^}up zd#3DJvS-VlBYUpwd9vrrULbp+>_xH{%U&XTsqAI4m&;xud!_7EvRBJqBYUmvb+XsX z-XMFU>`k&a%ibb;tL$yEx69rkd#CJOvUkhgBYUsxeX{q_f5-%RVCesO)31 zkIOzG`=snsvQNuCBm1oEbF$CNz99Re>`SsQ%f2G}s_bjBugkt6`=;z$vTw`2Bm1uG zd$RA#ejxjy>_@U6%YGvJsqAO6pUZwB`=#txvR})7Bm1rFce3Bh{vi9K>`$^k%l;z! ztL$&Gzsvq1`={(*vVY6|BRfz*x&N0PSauNEL1hP%9b9$@*&$_zk{w!h7};TEhm##% zb_CfGW&b5RlI+N`qsWdbJDTk1vSY}ODLa1Ah-ol$ls*_mZ$k)2g`Hrd%_=a8LK zb}reuW#^HdS9U(x`DGW7T~Kx**@a~nkzG`FG13WN(+fL-tPDyJYW{y+`(5+52Sg zmwiC?LD`37AC`SY_EFi#WFMD(LiS17r(~a&eMa_K+2>@RmwiF@McJ2RUzUAE_Ep)} zWM7wkL-tMCw`AXzeMk0P+4p4Mm;FHYL)njHKbHMO_EXu zCfS)~XOW#%b~f4BW#^EcQ+6)dxn<{(omX}~+4*G`kX=xAA=!mx7m;05b}`w-WtWg$ zQg*5T-RUy5FD<)_?6R`U$u2Lug6xX2E6J`byNc|pva89iF1v>8nzC!jt}VNc?7Fh+ z$*wQEf$WB|8_8}gyNT?kvYW|nF1v;7ma<#PZY{fw?6$Jo$!;&ZgY1s7JIU@WyNm3u zvb)LdF1v^9p0az%?k&5I?7p)5$?h+Efb4;?2gx2Rdx-3zvWLkYE_;ORk+Mh09xZ!} z?6I=P$sR9zg6xU1C&`{Hdy4F-vZu+OE_;UTnX+feo-KQh?76b%$(}EJf$W8{7s*~M zdx`9&vX{wTE_;RSm9kgKUM+i#?6tDj$zCsegY1p6H_6^CdyDL?vbV|JE_;XUow9ey z-Yt8N?7g!0$=)yffb4^^56M0(`-tqLvX99=F8hS+ld@0AJ}vu$-Xc9f$WE}AIW|!`-$wQvY*L*F8hV- zm$F~Uel7cr?6_CP8&;9>@ z?lJyLc3{~-WCxWUOm=YDA!LV?9ZGg+*`1aB%Z?&Ds_ba8 zqsxvVJErVdvSZ7RBRj6_c(UWmP9Qs>>_oEvmi>?He`P0@okVt0*~w%lmz_d(O4+Go zrYc3Ro#WT%&%L3T#jnPg{{okez5+1X@gmz_g)PT9F+=a!vEc3#=}WapP%Kz2dd zg=80&T|{g++1+G!m)%2lPuaa>_m*~?@vm%T#vO4+Mqua><=_FCENWUrUKLH0)3n`Cd6y+!s` z+1q4qm%T&wPT9L;@0Pts_FmchWbc=KK=wh|hh!g?eMI(A*~erbmwiI^N!h1lpO$?_ z_F38IWS^IPLH0%2mt>#p($_^$wxa<(JL&^>%JGAUDvct*_Cp*0C2(lx}{!4Zw*^y;O zksVcbG}+N*$B-RUb}ZSkWyg^nS9Uzv@nt8Foltfn*?-IaNA|z66U$B_JE`nsvXjeB zAv>k)RI*dcP9r<5>~ylz%g!J>qwGwwGt15*JFDz$va`$1Av>q+T(Wb^&Lcao?0mBG z%Pt_hpzK1j3(GDdyQu79vWv?uA-kmPQnE|SE+f0F>~gZp%dQ~1qU=htE6c7TyQ=JJ zva8FkA-ksRTC!`)t|Pmy?0T~6%WfdMq3lMo8_RAYyQ%DEvYX3pA-kpQR&` z>~^x-%kCh%qwG$yJIn4OyQ}POvb)RfA-kvSUb1`3?jyUe?0&NQ%N`(mpzJ}i2g@EJ zd#LPTvWLqaA$z3kQL;zN9wU3K>~XTk%bp;6qU=esC(E89d#dbdvZu?QA$z9mS+Zx# zo+Ep%?0K^1%U&RRq3lJn7t3BEd#UVYvX{$VA$z6lRkByhUL$+0>~*r&%ibV+qwGzx zH_P54d#mhivbW3LA$zCnU9xw}-XnXj?0vHL%RV6cpzK4k56eCx`>5<=vX9F?A^W84 zQ?gIXJ|p|A>~pfu%f2A{qU=kuFU!6n`>O0~vaid&A^WE6Te5GE__vY*R-A^WB5SF&Hrek1#>?02%?%l;tyqwG(zKg<3i`>X74vcJpz zA^WH7U$TG8{v$h3QMvz@9awe{*+FFolO0@k2-zWJhmsvyb{N@VWrvd;UUmf85oP}+ zJCf|kvZKh3Dm$9&=(1zTjww5q?AWs7$c`&Jp6vLt6Ua^|JCW?aW&b1lU)hOeCy||0 zb~4$?Wv7sxQg$lYsb!~;omO@_+396xkeyL>CfS)~XOW#%b~f4BW#^EcQ+6)dxn<{( zomX}~+4*G`kX=xAA=!mx7m;05b}`w-WtWg$Qg$iXrDd0qT~>BE+2v(dkX=!BCE1l_ zSCL&+b~V}6W!I2hQ+6%cwPn|lT~~HJ+4W^Nklj#rBiW5*H<8^`b~D+{Ww(&sQg$oZ zt!1~7-Bxxx+3jU_klj&sC)u54cahyyb~oAGW%u~c{y(2^|Ih#8_LM8VWcQZcM|NM? z{bcu-JwWzA*@I*cmOVuFP}##|50^bc_DI>IWRI3TM)p|Q<7AJQJwf(F*^^{XmOVxG zRN2#HPnSJI_DtEcWY3m8NA_IV^JLGLy+HOt*^6W^mc2ywQrXL7FPFVS_Db2SWUrRJ zM)q3S>twH&y+QUy*_&i*@t8w zmVHF_QQ602AD4YX_DR{NWS^FOM)q0R=VYIkeL?m`*_UKrmVHI`RoT~MUzdGD_D$Kh zWZ#y3NA_LW_hjFf{Xq6Z*^gvDmiVUPl% zAe{nYeE)ZhwZ3)y_ReF>ooDZv*BJLqOedkAq?6Lg=;U+?IwhToPEDtw)6(ha^z>77 z20A01iOx)Cp`WI+(%I(ce; z`g8-jA>D{>OgEvM(#`1RbPKvA-HL8ax1rn8?dbM&2f8EOiSA5yp}W%E= z?oGc;_o4gJuh9MI{`3HPAU%j4Ob?-l(!=Oi>EZMU`Zana{W?909!-y-$I|2I@$>|G zBK-zEiJnYPp{LSs(r?kz=;`#^^bC3?J&T@A&!Oki^XU2X0{R_#A-#xxmwu0apZ*)3L26`jCiT<4ag5FGTp|{f8 z=r8H7=&$K-=x^!m^bUF_{T=;1y^G#W@1gh7`{@1j0s0_)h(1jJK>tYpME^`5p^wtX z=;QPW`Xv1eeTqI!pP_%H&(goqztiXF^YkC|1^Oa=iM~uURIx(Guev(c~ zC!>?oDd?1RDmpcthE7YTqtnw*(HZEBbS648orQjy&Pr#av(q`~XXt0?=jfbtE;=`z zht5mqqo1eq(*@{)bRoJhU4$-57o%UGi_<0Ol5{D$G+l;%kuFP@qs!A3=!$eDx-wmb zu1dc|SEH-bHRzgjExI;chptQ4qwCWR=!SG7x-s2^Zb~>cm(wfgmGmllHNA%ZnEr%bOMgm# zMz5pS(;MiG^d|aq`U`q9y@lRNZ==7YzoNgUzoEaSx6?c5o%DC~_w+7$H@%16OYfui z(+B8-^db5%{R90Y{S*B&eS|(rAES@cC+L&(FZ3z;G<}Btl|D=VM*mKqqtDZS&==^7 z^dZ?PN$$#(y8dwbQ(G>osLdVKSgJtGt!yp%ybs| zX*w&Njm}Qzpr4_irJtj7(z)o|bRIe{osWK=&QBMh3(|$?!gLY3C|!(xfi6y$pi9!F z=+bl<`bD}dU5+kKSD-7>mFUWJ6}l?@5?zh1PS>Do(zWQ?bRD`bU5~C$H=rBRjp)X7 z6S^tgjBZZ1pj*LE8UImPWPaD(!J>3^viS~x-b0- z-H+~151wpMHQ&NGGBnq#vRmrXQgnr5~dor=Oq`(@E$j>7;ZrIys$!PD!Vt zQ`2eav~)T;J^d7&fzC*0qBGN3=%?wdbT&FWor8XcewKcY&PnH@bJKa~ymUVLc{)E` zfG$WEq6^bS=%REn`USc;U4kx2m!eD4W#|{_vUEARJY9jVNLQjO(^crI^hin-==5KGwE6MYmj0Cfj9y2t zr#H|W=}q+K^cVDIdJDak-bQ~(e?@;ye?xyuZ>M+AJL&J}@9ACiZh8;Bm)=M3rw`Bv z=|l8k`Um<)`X~Bl`UriLK1Ls>PtYgnU+7cxY5ENPD}9#!jsBfJN1vzvpfAuD=}Yuw z`U-uOzD8fCZ_qdCTl8)EPx=mhm%c~;Mc=3YrvIV;rT?Q7)MNqB56}teMD&C7L-fP+ zBlM&6WAx+n6Lexa3H>CUlukw`r&G`==~Q%TIt`tcPDiJwpQ1C+8R<-PW;zS~G@X^s zMrWsU(9h7%($CR3>0ESfIuD(f&PP8_=cfzM1?fU`VY&!ilrBcUKo_S=&?V_obZNQ_ z{UTkKE=QNAE6^3`N_1tq3SE_ciLORhr)$tP=~{Gcx(;2Ju1D9W8_*5uMs#Dk3Eh-# zMmMKh&@Jg!bZfc|-Ii`gx2HSM9qCSVXSxgBmF`A&r+d&n>0Wei`enKg-Isoa?nn2h z2haoQLG)mH2tAY@M!!lAr$^AQ(Ie^C=~47(dJH|59!HO-C(sk=H|R<9WO@ocm41_c zi=IYLr{AV$&@<^-^lW+#J(r$G&!-pA@6ZeBMfAJ$d-VJC2lR*ZNAzNP3B8nFMlYvV z&@1Ux^lEwy{W1Lsy_Wuz{)}EnuctTA8|h8-=kyo!W_k;~mEJ~wNq0jtm^lADG z{VRQz{*C^fK1ZLY|DZ3>7wJp%W%>$zmA*z_r*F_V>09(|`cL`}eV4vR|3%-Y|EB+; z|E2$<6V&4VPd`8>q!ZB((ht!O(~r=P(vQ)P(@)Te=_K@%bW%DQot#cVr=(NSsp&Ly zS~?w_o_>nXKxd>g(V6Ki^wV@!Ivbsx&OtvzKTAJH=cIGdx#>J~UOFHBJe{8|Ko_J7 z(S_+EbWyq({Q_N_E_ z(3|Nk^j3Ns{U!Ys{Wbj!{Vlzn-a+rAzoWmWchS4)J@j6BAHAPGKp&(J(TC|D=pX5y z=%48$^ild4eVjf)pQL}GPtm98GxV?YS^78nclsQCp8kWrKwqRU(U<8f^i}#AeVx8R z-=uHRx9LCWJM>-p9{m@6pZ=TvhyIuTk4{jV`#=2vosdpMKS)1BKTJPDKT1DFKTbbE zC#I9oPtr;0WOQ;m1)Y*kMW?3I&}r#(bb9(JIs=`N&O~RXv(Qh|S?O$ab~*?B4E-$q z9G#QSMdzmT(0S>6^z(Fnx&U2}E<_imi_k^sV)P4iak>Ouk}gG;rpwST(q-v#ba}c0 zU6HOtSEj4bRq2=LYIJqF23?b`Mc1b5&~@p0bbY!3-H>iXH>R7=P3dNIbGilHl5Rz} zrrXeM>2`E`x&z&j?nHN{yU<3Q^gdI9|oy^vl+ze~SIzfXTae@K5sFQ%8!OX+3wa(V^5l3qoxrq|FP z)1T05=}+m;=ymjZdIP3#Hm`T%{9K13g;f1rP)f1-b;kI+ZyWAt(Q1bveJg+4`}rq9s7(r4-4=-=sc z^m+OZ`T~8CzC>T9uh3WNYxH&c27QyhMc=0Xr0>vo>3j5F^nLno`XBmV`ae2B9q#}1 z19U<<5&amGi82vc?1f7^pLO)3-rIXRg=@fKIIu)IoPD7`q)6wbar|1lH zMmiInna)B#O=qRE(b?%7^fUCc^mBAhIv1Ur&O_&=^U=@K`RM|5LAnrKm@YyWrHj!o z(8cKzbV<4tU79XKzety*%hBcO3Uo!f5?z_DLRY0|JVQD7w$DUsY%zOYtwb; zx^z9dKHY$BNH?Mz(@p56bThg+-GXjOx1w9qZRoaiJGwpHf$m6mqC3-F=&p1(x;x#2 z?n(Eed($t|edxaQD|A1)KRtjRNDrb1(?jT?^sxWii4r78P@q8m{C!^y8Xhzv=(V7c zL9Yjm3K|_WCTMKXxS;Vt6M`lNy%97iXmZe$ps7J`2E7$DEoges+d(seW(Lg)njJJJ zXl~HFp!q=yg5C*Q7_=zp-Jth^-Vgd9=)<6of))oY30fMoENFSqilCK2tAbVstqJ-# z=#!weL7xVF7PKyCeb9!WjX|4&J`egLXmik(pshjMg1!v;D(LH=Z-TxJ+8(qcXlKxO zLEi`M3fdjCCuncbzM%a<2Z9a;9SS-e^h3~(K|clk9CRe;Xwb2s<3T5aP6quFbSmg{ z(3zlLgU$y17W8}2xuEkwe*|3!x)^jR=yK4NpsPXGg02VM2)Y?`E9iF6pFwwm?grfp z`YY&u(BDD-1pOQIUr>U&{%`)Dpa+5y1|)6_g<;V^F4`%t2X#o({?ylr1QGP>!Hyf}Ra} zE+}VEuAtmOd4lo=XN(Pk*DjifN=*6J2 zLFIzV2UQ5F7*r{!a!{3^szEOWRST*fR3oTnP_3ZaL3M)a2Gt9yAJibIVNj!>#z9Sj zng%rsY97=gsAW*Apw>Zcg4zbP3u+(KA*f?er=ZS3U4ps>bqneq)FY^8P_LlgK`#gO z3F;g4N>IO`{y_tR1_liZ8XPnvXlT%|pjU&22aO1NEofxW>p`P}MhA@v8XGh&XnfFw zpou|m1WgK>95f|pYS5cOZv{;YnjZ9a(2Ss&L9>Ep2h9nZ8#FIye$axTcY+oMEed)! z=)Iu#gFXoQFzBP8#X(DgmIf^gS{}3_Xl2l|9? z(59fzgT4sb9JD29YtXi!FN3}c`a0;Fpl^e=2ki*j8T4Jy_d&aYb_eYV+8eYlXn)Xw zpo2k&f({4$5cFfvPeDHi9SJ%bbS&t2(21avLB9l@3OXHhCg|6ovq8TF{T_5K=zP#0 zK^KB923-oe9CRhp?eyZU)^7x*haq(4C;WLHC0G3c4TkchEmU{|5aRl%QVN ze?bofB@9Xw^kC3KK@SH#67*=$V?mDxJrR^RC`r(hK}my>1tkwk5tK40RZ!}nG(l;D z(gmdtdMYSGP{yE4L79WH1U(&;H7Hw9_MjX=&jdXi^juKRpj<(@gYpFB4ayhvd{F+N z0zn0X3I!DoDiTyQs94YoLB)eg1eFXb6;wK?Owfx#WrNBEl@F>AR57ShQ01U1K~;lZ z3aS=VJ*Y-d&7fLAwS(#e)eWi_R6nReP{W``L5+i&1T_t67SuebMNrG2RzaJ!vA=#`*;LH&aU1Pu%t6f`(! zNYK!rVL`734G$U-^jgr!px1*&1&t0G6ErqxT+sNS2|*Ks-Uyl$G&yKW(A1zegWd|7 z7BoHR?VuS!GlOOY%?_FqG&g8o(EOkULGJ`D3|bWQZqR!{?+1Ml^kL9PL5qWy1T76( z7PLHQMbOHiRY9wR)&zYV^hwa#pihH73tAVnK4?SG#-L3>p9g&rv^i)?(AJ=BL0<-a z74&t`H$mS9Z4cTJv@__tpznir1?>*n6SOyIU(o)b13?Fa4h0XP{2uc`~DCohUhk_msdL-!4 zpvQt94|*afaZr+=Cxem(B@0R(lp-i)P^zHRL1}{02Biy1AM{jEhMZ0JL9K#X2ek=m8`LhSeNcy>jzOJ*ItO(L>KfE7 zsC!V4pq@d!f_ewN9MmVMZ_q12{et=j4G0<-G$?3r(2$^^LBoPx4H_OaBIvcCkwLEq zjS3naG$v?l(72%SK@)-|2E7q9DQI%gl%T0WZw9>;G%aX)(Az;Xf@TKI3Yr}>CunZa zyrB6(3xeJWS{Sq_=-r_Ag5D4MAn3!OkAfBlEeTp0v@B?O(2AgyL92pR2dxSEIOvn0 zwLzZ-eHOGXXnoLzpp8MBf<6!WB4~5amY}Uc+k(Cf`YPz_pl^b{4cZ>GBWP#PcR}9= z?F!l*v?pk9(7vGkK?i~k1|14I9P~rbk3l~L{Ty^8=xETfpyNR&f=&kg5_Br)bkLcg zUxUsD{TB3l(7B-VL4O2Y2)Y<_Dd=+0m7uFZ*MhDG-3YoFbSvm~(4Rqfg6;<03;HYQ ze$d}R{{;OT^j}bd24VjNJrI;IC{fUZK@SBz9P~)gqd|`aJs$K#P~xB@K~DxH4N4Z2 zJSas_%Aiz1se{r4r433Kls@RGpbSA7gE9qW4$2brbWqlyY(d$Bas)jS^lZ>`K{rCPLA8Qv2h|Cx8&of-eo%v;hCz*j z8V5BAY8uomsCiI}pq4?cf?5Z)32GbEE~tG_hoFu@oq{?CbqVSk)GercP>-OVLA`={ z2fZBBC#Y}GD?$B&`UedN8W=PvXmHSwprJv-f?f?89yB88wV;tfuLq3^8XYtyXl&59 zpz%Qyf+hyN5i}`ia?q5ZsX=cBy%jVqXnN4wK{JA82F(hZ9W*CsZqU4-`9TYU-U(V5 zv?%D^p!b5_5BebJ!=R6X76&Z}S{k%0XnD|zpp`+Zf>sBu3Hmtblc2Rhp9Xyvv@U3U z(1xImL7Rd;5Bef#bI_KctwGy@z6|;*=wKL!09bR_6#(6ON7K_`Mv2K^FrD(H03nV?^T&IbJ!^n1{` zpz}e01YHQa7<4JW?uvWjFL7V| zE&dVzivL7{hWxhwBOVY5MI!N_ct|`f9ubd<$He2}36WSN5l@PwBAG}oQizlyl}Ihp zh_oV|NH3le8AL{rNn{pT#M2_H$R@Ij9O4=Atawi36uCrhkw@ee`NZ=gzbGIIibA5W zC?blAV&Vl+T$B(cMJZ8Qlo2nAvZ9iF;EN=gT)XrR16cZis52}cukBHuZvM)v=}4Cig9ARm>?#KH^d|{ zSxgaA#hczmCP%IMfiuc6(;sf!a_(&`kOT<#K zOe_~G#7ePBtQKp;$Kn&QR(vWx6YIo!u|aGUo5bhh3$a;j5nIJJ@um1md@a5a--_*G zhuA5;6W@znVz<~M_KJOCzc?TcibLYC_(A+AeiA>6BjTtyCXR~};-vURoD!$S8S$$) zD}EEdi*w?<_(NO}7sVxUSzHlU#WitV+z>a#Epc1?Dej27;-2_R+!ud~f5gAyKarr3 z`$s$=5{g9PLGh4ySUe&g6_1I>#S`^s4i-VnxdAdE$WE6qMoQP8iyi5Vy*a8d?wb3^ zN9+~*#C~x=92AGdVey0bQT!x+7DvQUaZDT+C&Wqdi#R1ti!xA> z$S!h-XT-DOIgwN361hblkyqps&x`z`fG8*miNd0YC@PAH7esMULX;GxL}^h*yeP_w za-zJbAS#MVqOzzWs*0CHHBnvE5H&?DQCrjzbwxcb712-h7X!pVF-Qy+L&Q)q zOuQ-mNW3fF6Yq-;#E0S|u~;k-OT{v=T&xf)#VWB{tPvlJPsCdBsrXE+ z6YIqWu~BRipNlWVX0b(V72Cv@;w$mB_(psywu>ENr}$2MFLsIDVvpD>_KE%CfH)`) ziNoRt@uT=j{49=$qvDu2E>4J(;umpBoEB%qui~uuP5ds-iSyzQaY0-Zm&9dpMO+ov z#C35)+!VLOZSkkLBkqcO;xBPu{4M?w|BC-af+p@C@qkDu5{U=JL*ilahyCLR}0 zh{Pg^cv2)4$wYFILZlR_L~4;nq!sBzdhyi%@V}=x|L@0-89d7Y|3IDQbz@qK>F5>WTWIfoLciiN>OdXeyeC=AwmYDO!ov zqK#-P+KKj}gXkzaiO!;n=qkF2?xKh2DSCd?7ZAEn=(KCcYG3iLb>s;#;v@><~M}cj9}oOY9bV#9pya>=y^bL2*bN z7C(p|#ZTgAaYP&y$HZ}QLYx%8h*RRUI3s=)XT@*gcX3Xf7k`Kg;-a`DE{iMTs<dDBB@9wl8Y1~rAQ@Gi!>swNGH;Zr$h#kQDhRCMHcb2$SSgl>>`JFMm#H?6FEgL zkz3>uc||_)yvQ#Kh=QV!C@hMIqN131K@=AyL`hLflon;gi=wP3C(4ToqN1oIDvK(j zs(49M6V*ixQB%|swM89KSJV^rMFY`LG!l(P6VX&O6U{{n(NeS$twkHrRh_Pav7%wJ>iQ)}0NlX?~#8mO7cuPzZ)5Y6jhL|a4iP>U~m@DRq`C@^1M=TVJ z#Jl1>@xJ&#d?-E=i^USLR4fzA#R{=ftP-om8u79CM64B`iqFJ4v0iKt8^tE^x%fhC z7F)ztu}yp_z7k)HZ^XA^yVxOiitohtVwc!0_K3Y=pV%)Bh=bygI4ph;KZ>8k&*F$U zDvpWc;)FOUei5g{X>msUD$a`E#P8yqI4}MX7sN$zNn93J#8q)kTo*UQO>s-y7JrI6 z;;y(S{u1}a-{K$fulP?SXy*PA4~T>!k$6x%Bpw!zh)2a^;&JhWNGy_wCq+_`Oe7a6 zL`soLq!wvJT9Hnq7f*={BBRJ8GK(zYX^~ZA6WK)$@r-y@JSTFBTq3u~Bl3!T;(3u@ z6c7bPAyHTq5k*BY@q#EWN{Eu8lqfCAh!;g!QBIT>6+}f*NmLe9L{;&Us3xk58ltAC zC2EU0qOPbX>Wc=Vp=cx;izcF}XeOGA7NVtSC0dI%qOE8r+KUdNqv#|$i!P$8=q9?0 z9-^n{C3=gOMIX^uydwIE{$hX_CCPs?a#V9daj1gnSI5A#K z5EI24Vv?9FriiKHP4SkPCZ>zG#SAf1%o4N395GkS6Z6Fa@s3z17KwMod*XfZf%s5- zBo>P$VyRdrmWvf)rC23ai#6h7@rhU~J{6ydbz;5PAU29k;&btZ*ete)tzw(_QhX)8 z7T<_(#dfhn>=fUL@5L^$TkH{g#XhlL91sV^A#qszAbu1-iJ!$0aa0@=$HfV8Qv4!L ziPPeY_*I-0zlq<)IdNY6AufoE;*z*5u86DRnz$}*h@0Y;xGnw^cf?(BPy8kBi@(J` z;$QKfNYLEiF;EN=gT)XrR16cZis52}cukBHuZvM)v=}4Cig9ARm>?#KH^d|{SxgaA z#hczmCP%IMfiuc6(;sf!a_(&`kOT<#KOe_~G z#7ePBtQKp;$Kn&QR(vWx6YIo!u|aGUo5bhh3$a;j5nIJJ@um1md@a5a--_*GhuA5; z6W@znVz<~M_KJOCzc?TcibLYC_(A+AeiA>6BjTtyCXR~};-vURoD!$S8S$$)D}EEd zi*w?<_(NO}7sVxUSzHlU#WitV+z>a#Epc1?Dej27;-2_R+!ud~f5gAyKarq?`$s$= z68;aoWJ%=NgW@6auy{l~DjpM$izh^4kwiQxl8R&^xkw>Wic})CNF&mUbRxZYN@Nfj zMJAD1WD!q`tRkDpE^>%x#Ixc#kyGRnxkVn4SL74Vi~OR1C@2bv!lH;MDvF60L~&6< zloX{zX;DVJD9Va*qP(aeDvC;?vZx}eikCz+QC-v!HAO8^ThtMCMLkhpG!P9%Bhgqi z5luxi(Ok3;Ek!HQTC@>uMLW@6bPydyC(&7S5nV+$(OvWqJw-3kTf8j#h`!<#(NFXj z1H?cvNDLN3#85Fzyefu^5#lv5QoJrkiP2(=7%Rq!@nV9QDBcj0#AGo=Ocig6x5P9t zUA!%3h?!!Rm@VdrxniD}FBXV*#6q!1yer-l?~4z_hvFl#SS%4s#WJy6tPm^3DzRFu z5g&_B#9HyG_)M%5>%|7KQEU>Qi!a1xu|;eZ+r*dREAh4XMtm!_iydO8_)dH;c8T3$ zkJu~riT&b$I4BN@!{P_=qxebuERKky;+QxtPKcA@7ja6Q7H7n-;;i^h{4UOk^WqP2 zL0lA<#AR_sTou>Eb#X)76t~1}@u#>W?uvWjFL7V|E&dVzivL7{mM$RifJi73i3i0) z;$iWKcvL(l9v4rD#3G4!QY010L~@Zrq!g({YLP~y73oBJ@s!9QGKx$hv&bTz7Fk6$ zkzM2v&xmKmb0Vk6C31^ABCp6No)`H=0Z~vC5`{$(QB)KYFNorzgeWOWiPEBscu|xU z?RqPD0b>WX@zzGxsCibkTbXd;@5W}>-hAzF%7 zqP1uv+KP6fz33o1icX@l=pwp`Zlb&BA$p2lqPKWi^bviq#Dz=F)#aH5M@s0RaY!^GkPVt@iUhERP#U8O&>=XON0dY_q z5{JbP;z#k5_*ongN5wI5T$~Um#V_KNI4#bIU&UGRoA_Ov6X(Sr;)1v+E{V(HinuDS ziRiswX5 zkxS$jc|=~3PdqR3ivpscC?pDtBBH1$CSDN5MF~+-loF*y8S$bhE6R!TqJpR>Dv8RX zil{1H64gX?QA5-ewM1=EN7NPdM19dfG!%_QW6?x370pC*(L%Hotwd|lMzj^}M0?Re zbQGOLXVFD;72QO4(L?kUy+m*EvgjlFidRHG(O(P@1H~XQSPT(E#W3-z7%oPL*ThKi zx)>!!i!oxX7$?Sy31XsnLrfBr#S}4ByeZxi)5LV~wwNJiidkZ|m?P$jd1Ag;Al?xR z#Uk;pcu%}9J`f*@kHliJL@X7{#B#AhtQ4!nYOzLqEItuy#i!yku}-WP8^lJjNqjE8 z5Szsou~lpnUy859*Ww%Tt=KMhh@Ij)@x9n3c8fh?uh=K{iv!}II3x~>AHY|3I zDQbz@qK>F5>WTWIfoLciiN>OdXeyeC=AwmYDO!ovqK#-P+KKj}gXkzaiO!;n=qkF2 z?xKh2DSCd?7ZAEn=(KCcYG3 ziLb>s;#;v@><~M}cj9}oOY9bV#9pya>=y^bL2*bN7C(p|#ZTgAaYP&y$HZ}QLYx%8 zh*RRUI3s=)XT@*gcX3Xf7k`Kg;^P0nOO{KXT^3iwRdG#R7dOOBaZB75e~LTeuDB=u z68FX5;vey^_)jEg;{p;7h=d}Mcu+hf9u|*?N5x~}aq)ymERu*PMN*MWBo`?}N|8#W z7HLFUkxrx+Pl*g7qsSyOi!9=4kyT{-e^I4Q!@k)?4)Kh5Ry-$iid-VM$RqNKeBybL zUlb4pMIljG6cI&5G4X;ZE=q`!qLe5t%7_<5Sy4`u7ZpTBQAt!5RYXLc8jB{Psc0seix#4#XeC;UHlnR)C)$e+qNC^}I*Tr%tLP@W ziyoq<=p}lKmqj1ZSG*$niT+}M7$^pb!D5IQDu#(y#c(k~ye3A9*TpC?T8t56#W*ou zOb`>r8)A}}ET)L5;!W|Em?ox+x5W%GQ_K>x#T+qL%oFp)0`ZPmC>Du##e3p?@qzeI zd?Xf&C1R;qCYFm8Vx?FmR*N;_WATYtD?SyUiFIPV*dR8FP2zL$h1e{%h^=Cq_)>f& zz82qzZ^d@8L+ljaiSNZOv0LmBd&NGnUmOqz#UXK6{2+c5KZ&2k5ph%;6UW5~aZ>yu zPKndvjQCZY6~Bq!#W`_a{2?xgi{g^FEUt*F;+nWFZit)Wmbfkc6nDg3aZmgu?u)<0 zKjL5UpGeU5|JgtP_u~A2!~-IsNF*K<4~d7xBjQoBUnbgUBc{iOeF4cv@r?*+h1cLp&p%70-#BBA3W5@`$`5pLky67X?H? zQAiXPMMP0iOuQh9ixQ%wC?!gZGU7#1R+JOvMFmk&R1%d%6;V~ZB&vz(qK2p`YKhvS zj;Jf@iTa{}Xeb(q#-fR6Dw>JrqJ?NFT8Y-8jc6;{iT0v{=qNgg&Z3LxD!Pg8qKD`y zdWqiRWzk3U6|abXqQ4j*28uyquoxnSiechaF}p)ABn|c ziC8L@iREI2SSePC)nbkKSbQSZiciI7Vx3qoHi(U4llWYGAvTLGVyoCDz7$`Huf;dw zTd`g25Ie=t{(Ua?Q?7YD>aaY!5%KZqa2PvU2BL>v{z#Bp&#oD{!^Q{uEZ zBYqWU#c$$waZa2Ue~1g>qPQe3i!0))xF)WP8{($8C2os9#T{{1+!KF^`{Hl$kN8*o zCla)C|A+@fLXk*3C>|0Ii$}zx;xX~KctRu=NyL*PsYoW0ixeWINF`E>G$O4?C(?_j zLCw~t75nqAzl+B#p_~}7%j$#v0|JUFD8hI;terLOcqnbRPm;G zOH32f#oJt7*hUThE>#U}B&_(E(JTf|neO?)Z75?_mN#J6I**dca`@5J|F zm)I@#h`nN;*e?!LjCaQ}XqNb=NYKuCeuBa#Kiw2^hXe1hoCZefmCYp;D zqNQjhT8lQKt!O9Oiw>fr=p;IeE~2aGCc29rqNnI3dW)AuAJJF5BKnE`Vt^PZ28qF9 zh!`q{iC4vNF+#j1MvB+PC^1@$5o5(TF z6`zT9V!hZPHi}K+bMb}PEVhWPVw?C=Ap#KCxdM z5C_Gf|Kb06$?|_cTsrL858_AhllWO25l6)_aa^1bC&e$~lsGNUh+oB7@tgQvoD=88 zAL4?zC@zW1;)=K`u8Hg7hPWwiiQD2&aYx)0_rzb~zW7`GBmNcti3A;7K;i+BP$Uu$ ziigC*;t}zvcuYJlo)C#e67i%+Dw2uhB85mPQi;?ejYuofiS*(rkwIh>nM7uhMLaFC zifkgg$RVB)&x+?nPLWIG7I{Qokxx7?@{0nZpeQ5?iz1?^C?;MI#YG8GQj`*RgL39+IL}$@ObQRr1chN)i6um@m@v`V6`ifUXKha+d5Cg>^F<1-{ zL&Y%hsu(Uth}XnO@wyl#MvF0GtQaT8iwR<)ctcDQlf@Jldd@Mc@YsIJH zGqFyr7aPPzu}OR`z7U(m7O_=q6JLt2#Mj~*@vYb{c8HzgJMq2PC3cHFVz1aI_KO4J zpg1HBiyy>~;wSO5I3kXUW8%0tAx?^4#3^xFoDsi@v*I`LyErG#i$BB#aZy|nm&Fxv zRa_I-#SL*&+!D9NpW=?VEAENE#C`F%_(%LJ{u2p0x_`t2BB4km9uyCWhs7h}QSq2~ zTs$EXizMPnkyIoT$wdm0Qlt{8MH-P-q!a1IQzC=NC^Cu6B8zxhWEI&&c9BCoBc2t{ ziJT&r$Sv}Syds}?UgQ@AL_twV6c$B9QBh31Ac~6;qNFG#N{ce$MNw9i6Xit(QBhP9 zl|>a%RlFpsiRz+;s3~fR+M8K7$63UL1M5NB8G}#;#DzRj1aGh zk>YhRN{kj`#8@#-j29EcMDd20Bqoa~Vybvkyd|cI>EdlML(CMj#B4D~%oX#*e6c{h zBNmE9;$88ccwc-VJ`^8`#bSwADwc`mVue^KR*BVOjrdr6BG!se#b;ujST8n+jbfAd zTznxmi!EZS*e1RdUx}~9H{x5dUF;A$#dqR+u}kb0d&FL`PwW>5#6fXL92P%_AH`4N zXK_Rv701MJaYCFFzlc-fv^XPv6=%h7;&*XQoELwH3*w@E{ekx^t4nMD@yw8$#5iR>bWct$)co)bAmE|FX05qU*E z@w~_{3W$QDkSHvQh@zsHctI2wB}7S4N|Y95#EYV=C@0E`3ZkN@Br1z4qN;dFR1?)j z4N+6n617DgQCHLx^+f~GP&5*aMHA6fG!xB53(-=v60JoW(N?q*?L`OCQFIcWMHkUk zbQ9f057ATf61~OCqL1h+UJ?C7e=$G|6obTIF+>a%!^Ep%xELW`6C=gzVw4yy#)z?E zoER@Ah>7A2F-c4oQ^Zv9rg%$C6Vt`pVuqM0W{KHij+iUviTPrIct5a#Lwc0I4X{bB#A$Ix{3_0h-^B0YoH#H35EsNnaYLc8jB{Psc0seix#4#XeC;UHlnR)C)$e+qNC^}I*Tr%tLP@Wiyoq<=p}lK zmqj1ZSG*$niT+}M7$^pb!D5IQDu#(y#c(k~ye3A9*TpC?T8t56#W*ouOb`>r8)A}} zET)L5;!W|Em?ox+x5W%GQ_K>x#T+qL%oFp)0`ZPmC>Du##e3p?@qzeId?Xf&C1R;q zCYFm8Vx?FmR*N;_WATYtD?SyUiFIPV*dR8FP2%(aftM^_c(z$=5nIJJ@um1md@a5a z--_*GhuA5;6W@znVz<~M_KJOCzc?TcibLYC_(A+AeiA>6BjTtyCXR~};-vURoD!$S z8S$$)D}EEdi*w?<_(NO}7sVxUSzHlU#WitV+z>a#Epc1?Dej27;-2_R+!ud~f5gAy zKarq|3rIX55{g9PLGh4ySUe&g6_1I>#S`^s4i-VnxdAdE$WE6qMoQP8iyi5Vy*a8d?wb3^N9+~*#C~x=92AGdVey0bQT!x+7DvQUaZDT+C&Wqdi#R1ti!{|P+4fl?1+qP}n zR>$s6r(@f;ZQHhO+qS=*Q|CSJ7~k)i z5tR^?5mgXX5!DdY5j7As5w#Gt5p@uC5%m!D5e*Ox5seVOh{lK}h^B~Uh~|hEh?a;} zh}MWUh_;A!i1r9Sgg>GKA^_155s2u7=#1!s=!)ou=#J=t=!xis=#A)u=!@uw=#LnH z7>F2z7>pQ#7>XE%7>*c$7>O8#7>yW%7>gK(7>}5Mn24B!n2eZ$n2MN&n2wl%n2DH$ zn2nf&n2VT)n2%V1Scq7JSd3VLSc+JNSdLhMSczDLSdCbNSc_PPSdZ9%*ofGK*o@eM z*oxSO*pAqN*ooMM*p1kO*o)YQ*pE1XIEXleIE*-gIEpxiIF2}hIEgrgIE^@iIEy%k zIFGo1xQMufxQw`hxQe)jxQ@7ixQV!hxQ)1jxQn=lxQ}>%c!+p}c#L?0c#3$2c#e31 zc!_w0c#U|2c#C+4c#rsi_=xy~_>B01_=@<3_>TC2_=)(1_>K63@M(?bA4CvDP((09 za6|}1NJJ<^Xhax9SVTBPctiw5L_{P+WJDB1R75mHbVLk9Ohhb1Y(yMHTtqxXd_)37 zLPR1&Vnh-|QbaODazqM5N<=C|YD5}DT0}ZTdPD|9Mnon=W<(Z5Rzx;Lc0>+DPDCz5 zZbTkLUPL}benbI8K|~=$VMGx`QA9CBaYPA3Nkl0`X+#-BSwuNRc|-+7MMNb;WkeN3 zRYWyJbwmwBO++n3ZA2YJT|_-ZeMAF9LqsElFQPG`38E>Y8KOC&1)?RQ6{0nw4Wccg z9ilzL58;pKfCxZzLQ+AeJJQA(kUnAXXw)Ayy;SAl4$*A=V={AT}a4AvPnnAhsg5A+{rS zAa){lA$BA7Aoe2mA@(B1Af6(gA)X^%AYLL~AzmZi zAl@S0A>JcCAU+~KAwDC%Aig5LA-*GiAbui#A$}wNAbi^3`3DgM5fl*&5gZW$5fTv! z5gHK&5f%{+5gri%5fKpy5g8E$5fu>)5gic&5fc#$5gQQ)5f>2;5g(BNkr0sxkr)MQ5#VQQ5R7UQ6JF&(GbxH;frXDXo6^pXohHxXn|;nXoYBvXoF~rXoqNz@I&|` zIv@fN9T9Q!L5RVKA&8-fVTj>~ z5r~n9QHarqF^I8dU!2E<0hCd6jM7Q|M>HpF(s4#ZBx zF2ruc9>iY6KE!^+0mMPXA;e+C5yVl%F~o7i3B*anDa2{S8N^w{ImCIy1;jtzvAQB=HArd2!Ad(`I zA(A6fAW|YyAyOmKAkrezA<`oy5ls+H5zP?I5iJlc5v>rd5p57{5$zD|5q=1NLi3Uu?Vpk zu>`Rcu?(>su>!FYu?n#ou?Ddgu@12wu>r9Wu?evmu?4Xeu??{uu>-Lau?w*qu?Mji zu@A8yaR6};aR_l3aRhM`aSU-BaRPA?aSCx7aRzY~aSm}FaRG4=aS3r5aRqS|aSd@D zaRYG^aSL%9aR+f1aSw4H@c{7<@d)u4@dWV{@eJ`C@dEJ@@e1)8@doi0@ec7G@d5D> z@d@!6@dfb}@eT1E@dNP_@eA=A@dx444$nV`Ac&xdV2I#|5Qvb7P>9foFo>{-aES1T z2#AP?NQlUYD2S+tXo%>D7>JmNScuq&IEc82c!>Cj1c-!)M2N(QB#5MlWQgR56o{0F zREX4wG>Ei_bcpnb42X<~Oo+^gEQqX#Y>4cL9EhBVT!`F=JczuAe2Dyr0*Hc$LWshM zB8Z}hVu<315{QzBQi#%sGKjK>a)|PX3W$n`N{GscDu}9xYKZEH8i<;RT8P?+I*7W6 zdWian28f1;MhIU-V?+}~Q$#aFb3_Y7OGGO~YeXAFTSPlVdxRgtAJG93far(_M07%Q zMsz`RMRY@SNAy7SMD#-RM)X1SMf5}TM+`s=L<~XM~py>M2td=MvOs> zMT|p?M@&FWL`*_VMod9WMNC6XN6bLXM9f0WM$AFXMa)CYM=U@rL@YuqMl3-rMJz)s zN31}sM65!rMyx@sMXW=tM{GcBL~KHAMr=WBMQlTCN9;iCMC?NBM(jcCMeIZDM;t&L zL>xjKMjSyLMI1vMN1Q;MM4UpLMw~&MMVv#NM_fQ$L|j5#MqEK$MO;H%N8CW%MBGB$ zM%+Q%MchN&M?640L_9(~Mm#}0MLa`1N4!A1M7%=0M!Z41MZ812M|?nhM0`SgMtnhh zMSMeiNBltiMEpYhM*Knew8!%gA_yWVA{Zh#A_O8NA`~JtA`BudA{-(-A_5{JA`&7p zA_^iZA{rt(A_gKRA{HVxA`T)hA|4_>A^{>HA`v1nA_*cXA{in%A_XEPA{8PvA`K!f zA{`<DrA`2obA{!z*A_pQTA{QbzA`c=jA|E0@q5z^Gq7b4mq6nfWq8Oq$ zq6DHOq7k%6e8xfljn-NIe*L^MM9A{rx_ zAethYA(|svAX*|?AzCBaAlf3@A=)GS5dMe`hyX-KL?EIQqBEikqAQ{sqC27oq9>vk zqBo)sqA#K!qCa8)VjyA=VlZL|Vklx5VmM;NzkpA&jKnqyF&Z%jF%~fnF&;4iF%dBd zF&QxhF%>ZlF&!}jF%vNhF&i-lF&8lpF(0u2u@JEcu^6!gu@tcku^h1iu@bQgu^O=k zu@`$BdFBP?Mdl^uW#$#;RpvG3b>3UFJRJedYt^L*^sqW9Ad)Q|2?~bLI=?OXe%)Yvvo~Tjo3Fd*%n`N9HHyXXY2? zSLQe7cjgb~Pv$S?Z{{DSk3YRq{$U1T24x0g24{v~hGd3fhGvFghGm9hhG#}#Mr1}} zMrKA~MrB50MrX!g#$?7~#%9K0#%0E1#%CsACS)dJCT1pKCS@jLCTFH#revmKre>yL zre&sMre|hgW@Kh!W@ct#W@Tn$W@qML=49q#=4R$$=4Iw%=4TdQ7GxG;7G@S<7G)M= z7H5`VmSmPmS~Ln4RbAX9dkW%19KyD6LT|j3v(-T8*@8z2XiNL7jrjr4|6YbA9FwR0P`U85c4qe z2=gfO81p#u1oI^G6!SFm4D&4W9P>Q$0`nsC67w?i3iB%S8uL2y2Jw1@k5I74tRo4f8GY9rHc&1M?&E6Z13k3-c@U8}mE! z2lFTM7xOps57VaupMRM_m_eDrn8BGLm?4>=n4y_rm|>aWnBkcbm=T$gn30)Lm{FP0 zn9-Rrm@%2Ln6a60m~ol$nDLnjmHRwn3*m^qobn7NsGn0cA`nE9Cnm<5@In1z`|m_?bzn8leTm?fE| zn5CIzm}QyenB|!jm=&3on3b7Tm{pn8nAMpzm^GQTn6;U8n01-;nDvUoX4EcT)+``<-+{WC_+`-(*+{N6@+{4_<+{fI{Jit82Jj6WAJip5i>C}2{S1(88bOE1v4cx6*Dz64Kpn>9Wy;M12ZEt6Eib2 z3o|P-8#6mI2Qw!#7c)0A4>K<_A2UC*0J9*o5VJ6|2(u`&7_&ID1hXWw6tgt546`h= z9J4&L0<$8s60nVp!OnO&G&ncbM(nLU_2nZ203nSGdj znf;jknFE*unS+>vnM0UEnZuaFnIo7ZnWLDanPZq^nd6w_nG={3nUk24nNyfknbVll znKPI(nX{O)nRA$Pne&+QnG2W;nTwcnYWm?nRl3XnfI9YnGcu`nU9!{nNOHc zna`NdnJ<_xnXj0ynQxeHneUkInID)RnV*=SnO~S+nctY-nLn66nZKC7nSYo*9r^sr z48jb`48{!348aV^48;u148sh|495)5jKGY@jKqx0jKYk{jK+-4jKPe_jKz%2jKhq} zjK_@6Ou$UYOvFsgOu|gcOvX&kOul z%)!jb%*D*j%)`vf%*V{nEWj+tEW|9#EW#|xEXFL(EWs?vEX6F%EW<3zEXOR*tiY_u zti-I$tir6ytj4U)tii0wti`O&ti!C!tjDa+Y`|>DY{c|sHfA?swrBb={h1w@0nCp7^54&f{`Z4!f!sSWJ2Sg5yE3~myEA(*dop`5do%km z`!f46`!feH2QmjS2Q!B-hcbsThciboM>0n-M>EGT$1=w;$1^7|Co(57Co`uor!uE8 zr!!|TXEJ9oXEWz8=Q8Ip=Q9^D7cv(y7c-YImok?zmorx|S29;IS2Ncz*D}{J*E2UT zH!?RdH#4^|w=%aew=;JzcQSV|cQf}e_cHe}_cISL4>Aui4>ON2k1~%jk26m&Pclz2 zPczRj&oa+3&oeJDFETGNFEg(&uQIPOuQP8jZ!&K&Z!_;O?=tT(?=v4TA2J^?A2XjY zpE92@pEF-DUou}YUo+n@-!k7Z-!nfjKQcctKQq5DzcRluzcYU@e=>hDe>49ueFFIe z%nZT|$_&N~&J4i}$qdB|%?!f~%M8a1&y2u~$c)5{%#6Z}%8bU0&Wyo~$&AH}&5Xm0 z%Z$g2&rHBf$V|jc%uK>e%1p*g&P>5f$xOve%}m2g%S^{i&&h z&dkBg$;`#f&CJ8h%go2j&n&MXM%SbP_#y(`HI$9v?ii86|I?Q%|&Y=T1(MdiPl=QHlno^t(|D? zMe`HQU$hRQ1&G#Bv_R21iPl-PE~0f6t($1wMe8A2Ptkgb)?2hbqV*N6pJ@F>8z9<1 z(FTb&ShOLc4Ha#eXv0MtA=*gMMu|3Bv@xQM6>Xeo<3*bw+CXhp>qXlj+D6efiMCm^Euw7|ZJTJ@McX0TPSJLWwp+A4qU{xJpJ@9mXWyXdOih z6s?nJoki;+T36A!iPl}T9-{RWt(R!MMe8G4U(x!B)?c&%q74*nkZ6NN8zS0J(T0gO zT(l9QjTCK^Xro0NBidNe#)&pwv6q+E&rFiMCy|9ir_NZI@`fMcX6VUeWf6wqLXZq8$|NkZ6ZRJ0jXq(T<6BT(lFS zofPeqXs1OxBidQf&WUzjvxOiS}KzAENyf?U!i3Mf)R~PiJxePqZMS1r;rrXu(AbAzDb$LWveyv@oKD6)l`- z;YEueT13$zi56M3D56CbEt+W2MT;R?OwnSA7F)D9qQw<0o@nt!OCVZ8(GrQ4ShOUf zB^521XvsxOAzDh&Qi+yYv^1in6)l};=|#&RT1L?_iI!QkETUxqSY0xo@n(&Yam)f(He>7D_UdGnuyj^ zv}U3;7p;Y8Ek$c3T5HkTh}KrLcA~Wx%}+Fc(K?70AX-P!0!8a2T4&L^h}KoKZlZM; zt%qnmMe8M6Z_)aQ)>pKCqV*STfM^3n8zkCb(T0dNRJ38D4Hs>MXd^`%CE94w#)vjn zv~i-17j1%Q6GfXO+GNqDh&ENUX`)RRZH8zwMVlqsY|-Y3HdnNHqRkg=foKawTO`_I z(Uyp|RJ3KHEf;NtXe&iqCE9Ay)`+%Nv~{Ab7j1)R8%5hB+Gf$Vh_+R1ZPD(Cc2~4}qTLtmfoKm!dnDRp(VmF* zRJ3QJJs0hTXfH*3CE9Dz-iY>Aw0EMt7wv;+A4U5l+Go+ei1t;qZ=!t{?T2VTMf)Y% zZ_)mU=F>&o{}U~UXhB5_CR%XOLWmYpv{0gj7A=fuVMPlkT6obSh!#<_NTNj+EsAJS zMT;g{bkSmn7E`oXqQw?1j%aa3izixq(GrN3P_#s%B^E7-Xh}s&CR%dQQizsPv{a&{ z7A=ivX+=vXT6)nkh?Y^bOrm8LEsJPbMaw2ycF}T(mQ%D`qU9DXk7#*C%O_fX(F%xG zP_#m#6&9_CXhlUUCR%aPN{Cicv{Is#7OjkEWko9|T6xhbh*nXwN}^R3t%_(>MXM%S zbxrMNFIof98j99PG+)sgi`GQ6rlK_yt+{9| zL~AKpE74ku)<(3pqO}vPy=Z=-`HR*;v;fgMiWVqZC($~K)nU0< z(Rz#4N3_18^%JeXXahtWDB2*=28%XCw4tI66K%L?BSaf1+9=URi#A5Iv7(I=ZMQRx@a>*n?7K^q-w56gg z6K%O@D@0o<+A7gji?&9zwW6&PZM|q4MB6CZCeb#Fwnen9qHPmxyJ$N^+bP;E(RPcr zN3^}7?GtUkXa__)DB2;>4vTg~w43 zCDAU6c15(SqFod1x@b2U>qJ0zXyJ$Z|`zhKl(SD2eM>LXLmO-?PqGb{-vuIgF%PLwn(Xxw{L$sWtWkJuw1%QJ63th%#-cS5t*L0uL~AZu3(;DN)=IS2qO}pNt!V8; zYcHCgX#S#g5G_Eoj-myM)=9L^qID6it7zRs>n>Uk(Rzy3OSImi^%1SFX#GU%FWLan z28uRFw85ec5pAew!$cb{+6d7`iZ)8L(V~qJZLDbHL>n*K1komnHc7O}qD>KPs%X*qiC5#%Pd+J z(Xxt`O|WWrRwECho z5UrtTjYRVmt+8lLL~ANqGtruh)nd6|(YlM)L$scv^%AYOXnjQMD_TF%`inL|w1J`x5^b<(Lqr=Y+Az_E zi#9^Ek)n+fZM0}(L>nvGIMK$7HbJzBqD>NQvS?F8n=0Bg(WZ+wL$sNq%@S?4XmdoH zE80BK=8LvKw1uKA5^b?)OGH~L+A`6Wi?%|vm7=W@ZMA4?L|ZG`I?>jPwn4OwqHPjw zvuImH+bY^N(YA}WL$sZu?GkObXnRE4E80HM_KS8vw1c7@678^PM?^a++A-0Ni*`b^ zlcJpx?X+lTL^~_mInmCGc0sg@qFoa0vS?RCyDHi>(XNYjL$sTs-4gA#Xm>=rE80EL z?u+(7w1=WS678{QPegkv+B4Cfi}pgam!iEA?X_rcM0+dRJJH^Y_Cd6dqJ0wWvuIyL z`zqQu(Y}lJL$sfw{SxiBXn#cW=`Qa7i55h(prQp6Ex2eQL<=ceDA7WT7DlwNqJw78$>nvIq(YlJ(O|n#I7}3Ux zHcqthqD>HOqG*#un=INC(WZ(vO|=885?wE3bf5N)Aoi$q&2 z+7i*0indI&<)W<+ZKY_dL|ZM|8qwB@wobJ5qHPduqiCB%+br4^(YA`VO|yyDZuj(XNViO|9 ziuO#j=c2t3?WJh1M0+jT8`0j1_D;0-qJ0qUqiCN*`z+cQ(Y}iIO|5iPE0@kEO+S_07$ik3*U#G)k;Evab9L`yDO3ei%EmP)kL zqNNcnt!U{)OD|dm(K3pbNwmzOWf3i_XxT)|E?N%Ja*CEqwA`ZQ5iPH1`9#YvS^?1t zidIOp!lD%st?0ka`y1AZiJvSkS_#ogidIUr(xR0St*mI}L@O^^1<@*sR!OwVqE!*C zs%X_jt1em%(Q1lTOSIae)e)_(X!S&^FIof98j99PG+)sgi`GQ6rlK_yt+{9|L~AKp zE74ku)<(3pqO}vPy=Z=-`HR*;v;fgMiWVqZC($~K)nU0<(Rz#4 zN3_18^%JeXXahtWDB2*=28%XCw4tI66K%L?BSaf1+9=URi#A5Iv7(I=ZMQRx@a>*n?7K^q-w56gg6K%O@ zD@0o<+A7gji?&9zwW6&PZM|q4MB6CZCeb#Fwnen9qHPmxyJ$N^+bP;E(RPcrN3^}7 z?GtUkXa__)DB2;>4vTg~w43CDAU6 zc15(SqFod1x@b2U>qJ0zXyJ$Z|`zhKl(SD2eM>LXL zmO-?PqGb{-vuIgF%PLwn(Xxw{L$sWtWkJuw1%QJ63th%#-cS5t*L0uL~AZu3(;DN)=IS2qO}pNt!V8;YcHCg zX#S#g5G_Eoj-myM)=9L^qID6it7zRs>n>Uk(Rzy3OSImi^%1SFX#GU%FWLan28uRF zw85ec5pAew!$cb{+6d7`iZ)8L(V~qJZLDbHL>n*K1komnHc7O}qD>KPs%X*qiC5#%Pd+J(Xxt` zO|WWrRwECho5UrtT zjYRVmt+8lLL~ANqGtruh)nd6|(YlM)L$scv^%AYOXnjQMD_TF%`inL|w1J`x5^b<(Lqr=Y+Az_Ei#9^E zk)n+fZM0}(L>nvGIMK$7HbJzBqD>NQvS?F8n=0Bg(WZ+wL$sNq%@S?4XmdoHE80BK z=8LvKw1uKA5^b?)OGH~L+A`6Wi?%|vm7=W@ZMA4?L|ZG`I?>jPwn4OwqHPjwvuImH z+bY^N(YA}WL$sZu?GkObXnRE4E80HM_KS8vw1c7@678^PM?^a++A-0Ni*`b^lcJpx z?X+lTL^~_mInmCGc0sg@qFoa0vS?RCyDHi>(XNYjL$sTs-4gA#Xm>=rE80EL?u+(7 zw1=WS678{QPegkv+B4Cfi}pgam!iEA?X_rcM0+dRJJH^Y_Cd6dqJ0wWvuIyL`zqQu z(Y}lJL$sfw{SxiBXn#cW=`HU6i55h(prQp6Ex2eQL<=ceDA7WT7DlwNqJw78$>nvIq(YlJ(O|n#I7}3UxHcqth zqD>HOqG*#un=INC(WZ(vO|=885?wE3bf5N)Aoi$q&2+7i*0 zindI&<)W<+ZKY_dL|ZM|8qwB@wobJ5qHPduqiCB%+br4^(YA`VO|yyDZuj(XNViO|9iuO#j z=c2t3?WJh1M0+jT8`0j1_D;0-qJ0qUqiCN*`z+cQ(Y}iIO|5iPE0@kEO+S_07$ik3*U#G)k;Evab9L`yDO3ei%EmP)kLqNNcn z?Z3_Y8`jc^pG+@W2GKH#mPxeCqGb^+t7zFo%Pv|D(Q=BGOSIggZE*g843&W^2%W9#bJ zx;eJ)j;)7d>*?5fIkw)8t&d~t>)84^w*HQ7fMXlz*akVa!H#W+V;kz&hB>z3j%|cv z8|m0aIkwS`ZH!|Z>)6IQw(*W_f@7QL*d{r)$&PJ`W1H&Ora89hj%|ixo9WnQIkwr3 zZH{A`>)7Tww)u{2fn!_f*cLgq#g1)>V_WLjmN~ZNj%|fwTj|(VIkwe~ZH;4F>)6&g zw)Ku}gJaw1*fu$~&5mu0W83Q3wmG)#j%|ly+v(VLIkw%7ZI5Hy>)7@=w*8LnfMYx8 z*bX_i!;bBUV>{~DjybmDj_rhFJL%X?IkwY|?TlkP>)6gYw)2kdf@8br*e*G?%Z}}e zW4r3ut~s{rj_rnHyXn|&Ikwx5?T%x+>)7r&w)>9lfn$5<*d95y$BylZV|(h@o;kMX zj_rkGd+FF-Ikwl1?Tur5>)75ow)c+hgJb*X*giS7&yMYjWBcmZzB#t1(f*|8Z)4MRsga99vY!7R|9mcWf~nTTI6m%dy3FY;hc0T*nsAvBh_62^?EO$Ck*kC3b8{ z99vSymdvpwcWfyfTS~{4%CV((Y-t=@TE~{ov88ux85~?99ve$md&wc zcWgNvTTaK8%dzElY=O z%CVJpY-JovDJ5M4IEoT$JWTP`8u}7j;)DfYwFmVIkx7Gt%YN2>DXF1w$_fVjbm%;*xEU^ z_KwZZvH3f;4u7}MK0ZE`e4GB~?LRwpv{Rs+I@zhSox0emtDU;psk@ze*r}(TdfBPB zo%-0RubukYslS~D*lD1h2H9z_orc(HsGWw{X}Fz6*lDDlM%ih!oyOQ{tewW$X}p~# z*lD7jCfRASou=4ns-33UX}XA0Ov*y*I5PTA?SozB?ltewu; z>AamT*y*C3F4^g_ovzsFs-3Rc>AIb6*y*O7ZrSO!o$lD_uAT1L>AsyF*y*929@*)! zou1g~shytL>A9U=*y*L6UfJojo!;2#t)1T4>Ajsk*y*F4KH2HBoxa%VtDU~t>ARhN z*y*R8e%a}_o&MO#r=Phv{par##pi#nSE~j1w(scg*Rk3EUbqGkEvRV0L<=rj2+=}{ z7D}|xqJ5iPE0@kEO+ zS_07${%sNeeI5P(FQP@roTwWo!SIFZP@_L1QULn6%DBu+edWAw> zp|Dpd;uVT|g<@WzxK}9Q6-s)AQeL67S198Z%6f%zUZK2KsNfYUdWA|}p|V$~;uWfT zg=$`*x>u;-6>55gT3(^HSE%C^>UxEGUZK8MXy6qZdWA+_!PhG^_6kkBLQ}8M%quka z3N5@sORvz%E420sZM;HTuh7mbwD$^rUcuihbnprRUZJB`2=oe_yh3NM(8Vis^$Oj* zLU*sw!z=Xk3cb8SZ?DkDEA;gW{k%eduQ1?m2>tIhTrJQ_ zb{cA@VRjmBrxA7jcA9FZX?B`!rx|vd zX{T9snr){!cA9Iad3Ks_rv-LeXs1PXT5P8!c3NtuWp-L_rxkWuX{S|oT5YE_c3Nwv zb#_{Brww-6Xs1nf+H9vScG_yEZFbsjryX|MX{TLw+HI#jcG_#FeRkS!rvr96Xs1JV zI&7ySb~~Srw4X=Xs1VZdTggBc6w^3XLfpSrx$j5X{T3qdTpmS zc6w{4cXoPjrw?}eXs1th`fR5!cKT|kZ+7}_ryq9uX{TRy`faB_cJk?OE~EajQxH1^ z{W}Hwd;QhaFEAh&DL5$vDI_TrDKsezDJ&@*DLg3xDIzHnDKaSvDJm%%DLN?zDJCfv zDK;q%DK04}-DLp9zDI+NpDKjYx zDJv-(DLW|#DJLlxDK{w(DK9A>DL<(Iso-A)|NA;>(X3s&fI`f|%p%O9%wo*q%o5C! z%u>wK%reZf%yP`~%nHnk%u3A4%qq;P%xcW)%o@y^%v#La%sR}v%zDiF%m&Pc%tlOK zW@Ba(W>aP}W^-l>W=m!(W@}~}W?N=EW_zX|)1TRa8Nlqw3}kjA_Gb2B_GR{C_Gb=Y4rC5u4rUHv4rLBw4rh*Fj%1Evj%JQwj%AKxj%Q9_PGnAE zPG(MFPGwGGPG``?q%*{?q?og9%LS39%de49%UY5 z9%r6lo@Ab4o@Sn5o@Jh6o@ZWQUSwWkUS?ilUS(cmUT5B5-elfl-e%rm-euln-e*2w zK4d;(K4v~)K4m^*K4-pQzGS{)zGl8*zGc2+zGr@5eq?@PerA4Qer0}RerNt*{$&1Q z{$~DR`V64U>3^6(m_eDrn8BGLm?4>=n4y_rm|>aWnBkcbm=T$gn30)Lm{I@bzfUIn z?`b0%_vp+R%$Uqr%-GC0%(%>W%=pX%%!JHD%*4zj%%sd@%;d}z%#_Sj%+$!v0cn_N zndzA6nHiWFnVFcGnOT@wnc0}xnK_s_nYoy`nR%FbnfaLcnFW{ynT42znMIgInZ=mJ znI)JdnWdPe|MGwCFaCQj@BBaa7t7eGtewi)sl1&k*r}qOD%q*BovPTWs-3FYsk)tN z*r}$SYT2o_o$A=BuAS=HslJ^W*r}nN8rjL$PL1u<#7<4^)XYxJ?bO0fE$!6GPOa_K z#!hYR)Xq-r?c`@Ce>-)sQ-Ga1+9}XZo$S=vPF?KO)lS{))ZI=!?9|gvz3kN6PJQgu z*G~QH)Zb16>@?6$gX}cePDAW8)K0_fG~7-j>@?C&qwF-=PGjse)=uN>G~P}V>@?9% zlk7CvPE+hO)lSp?PT~Idec1o``@YJ*_cf+F*BQ=rrgNR;TxUDiInH&ibDig0=R4O0 z&UK-4UF2LBJJ%)7b*Xb*=3JLM*A>onrE^{7Tvt2SHO_Udb6w|L*E`n@&UK@6-Q-+1 zJJ&7Fb*ppT=3KWs*B#Dvr*qxqTz5OyJ*;uJJ%!5^{8__ z=3I|E*Ave5q;oyxTu(dKGtTv_b3Nx=&pX!(&h?^mz2sakJJ&1D^{R8d=3K8k*Bj3D zrgOdJTyHzqJI?j4bG_$W?>pBA&h?>ledJspJJ%=9^{I1x_IKs;%yT=vu+vLBy|UA5 zJH4^fTRXk8(|bF8u+v97efrPz-{(1ew$m3oeYMj!JAJp)4?F#|(=R*yw$mRw`3(Hu z-?ab!5B+1OAa)9Br(kvpZl@4-3TdZMb_#8$Fm?)Sr*L)(Z>I=$ifE@uc8YALD0Yf! zr)YMHZl@S_ifN}I!yN@%AN@=H5 zc1mrhGJ1)%4nxdcFJt0EOyFjr)+l0Zl@e}%4w%ucFJw1Ja)=!r+jwG zZ>IuwDrl!db}DSAB6ccjr($+0Zl@Ceng08+fRc79Wv9}1Dr2Xzb}DD5@^-3Vr;2u} zWT(n@s$!?AcB*El>UOGOrD zcRTg4Q%^hfvQuw6^|4c5JN2_ue>)AZ(?B~7veRHY4YAWuI}Nkba665#(?~mwveRfg zjj_{MJB_o`csot7(?mN>veV@MoBr=M%@jLLwbL{^O}EnwJI%DyEIZA%(;Pd^wbML1 z&Hpe(T1Q$>+CbV!+C^_a zJJ&PL^{jI}=UmVKU4#AS&;JF|MbahGWzrSWRnj%mbzKUVp|{L1{s{LcKr z{K@>q{LTEs^ch6|RsS%9FoQCKF@rNhFhep!F+(%MFvBv#F~c(>Fe5S}F(WghFrzY~ zF{3kMFk>=fF=I31Fyk`gG2=56FcUHpF%vVBFq1NqF_SY>FjF#9F;g?sFw-*AG1D_M zFf%eUF*7r>FtakVF|#vsFmp0^EXF!M6=G4nGEFbgsZF$*(`FpDyaF^e-xFiSE^ zF-tScFv~K_G0QV6Fe@@EF)K5xFsm}FF{?9cFl#bvF>5pHFzYhwG3zrMFdH%(F@2eh znN65Yna!BZnJt(tnXQb|f9oqoMHqfyRa%_Vg+YrY# z)Uge7Y{MPf2*)o2u9oq-T_R+C@a%`U++ZV_7)vtaIPW$uEGBE)@UeF zXi^wbSW-Arcv1vXL{cPDWKtATR8lljbW#jbOj0aTY*HLjTv9wzd{P2ZLQ*19Vp0-P zQc^Nfa#9LXN>VCPYEl|f+P~tbAEvWYdOKyXQ${;wvQuU|WwBFMJ7u#|c01*;Q%*bO zvQut5<*`#X`!7K*=ezzme^^jotD{Yxt&(nX{DW3*=e<%*4SySoz~fDy`47LX``Js z*=e(#w%BQ_ownI&yPbB}X{Vib*=e_(_Sk8!o%Y#jznu=)>7bns+3B#Ij@ap_osQY* zxSdY?or3>8*Zt2^0Z%edF;6qkFwZj2G0!tEFfTGMF)uT(Ft0MNF|RXkFmEz%F>f>P zFz+(&G4C@UFds4>F&{IZFrPA?F`qMEFkdoXF<&#^FyAuYG2b&kFh4RsF+VfEFuyXt zF~2i^{7ZWB%unvWn7^5Sm_9@3#qkd_2s0=%7&ACC1T!Qv6f-n43^Ob<95XyK0y82r z5;HP03NtD*8Z$aG1~Voz7Be<84l^z@9y2~O0W%>p5i>C}2{S1(88bOE1v4cx6*Dz6 z4Kpn>9Wy;M12ZEt6Eib23o|P-8#6mI2Q%kiy1w@;msiN`74mq6yj~%nSIF-b3V4Nr zUZIdzDC`xAc!i=~p_o@F?iEUSg_2&OlvgP270P&pvRTWmRG3l73z3}x?Z84SE%n58hC|kkpCPnbd{UmDG*Y zoz#QWlhljUo79KYm(-8cpEQ6pkTi%im^6emlr)SqoHT+ok~E4mnly$qmNbquo-~0p zku>SA{&QOF959(Vg*lZujX9k;gE^Bq>o5P_h5GN~31-`Aj-BS(X`Y?t+i8KF7TRf% zofg|^iJg|(X_=jt+i8WJR@!NmomSgvjh)uoX`P+c+i8QHHri>Eoi^KPi=DRGX`7w4 z+i8cLcG_u|op#%4kDd0~X`h|;+v$Lv4%+FEoetaSh@Fnw>6o34+v$XzPTJ{|ole{7 zjGfNf>71R;+v$RxF52mmoi5wyik+_7>6)Fc+v$d#ZrbUVoo?Idj-Bq>>7JeL+v$Ow z9@^=VogUliiJhL>>6x9L+v$a!UfSuEonHT)JT5igi1t>rccQ%)?Sp6^Mf)V$XVJch z_EofRqJ0`=WR_xXI5ZVWL9EUW>#TVWmaQWXVzfW zWY%KVX4YZWW!7WXXEtCqWHw^@G8;3SFq<-)F`F}6Fk3QPFJI zIJTjVZJ1*l?$|~+wvmo)lw%w1*v2@vv5sw=V;k?-COEc)E=_vK_E&2QAwn%XZkZ9kFajE!#26cHFX^uxuwS+bPR- z+OnOoY-cUoIm>q5vR$xj7cJW*%XZnaU9oIeE!#EAcHOewuxvLi+bzp>+p^uUYIhA&=7` zuhSu)(;>gpp@7q&pwpp{)1k1_p@`F=sMDdC)1kQ2p@h?+q|>34>)?r3HV(T>R?VC z&8d?)1)Ec6a|$u1F6PwLoVuA)cXR4tPCd=3mpSz|r#|M?*PQy9Q-5{*PP~=(|mJUU``9oX^}ZCHm4=#wA7rInbUG}T47Er&1sc6 ztv07M=CszF)|u0KbJ}1|8_j8xIc+wlE#|b5w@cHm4)zbkv-VnbUD|I$=&H&FPdmoi?X4=5*Ga&Y9DBbGl$o7tQIC zIbAlVE9P|7oUWPEb#uC5PB+czmO0%vr#t3!*PQN|(|vP#U``Ls>5(}-Hm4`%^wgZ5 zxu*Z`yO+-`+Y8I~(z3m>Y_Bcb8_V|Avc0ox?=9O0%l6T-eX?wyE!!8%_SLd|vuxij z+Yih3)3W`tY;Hr%-=k2L&E2wvwrn1jEsSLgYuUnCw(yqC)3SM4wg{FjqGgL@*&C*%Di}B$h3yWlLt+ zl3TVEmMx`aOJ&(oTedWoEv;osXW7zQwhWdnqh-rv*)m(UES4>+Wy@ySye*rLW%IRc z*)5x&Wy@jNa$2@rmMyns%VXK{TDE+aEx%0KWvgM?YFf5hmaVpB zt7F;fTDE$Yt-fVzVA&d4Hh;_3$g(xIY)vd%Q_I%OvNgACEi7A0%ht-WwYF?+EL(tO zYirrsS++pS*50xOS+)+At)pe@WZ8l(TW8A_V%fS_wyu_~n``rQ{UjUzkiag*{X29F z>TXUw%&Dh2^)jd4=G4cW`kGTebLwwS1I%flISn$W!R9o?oQ9gyFmoDiP9w}|q&bZ; zr_ts##+=5Q(>QY)Z%z};X`(qzGN;MrG{u~zn$t9Mnr==r%xR`M%`&Ii<}}Bg=9<$y zbDD2X3(RSuIW01$#pblcoR*r?GILsPPAkl5r8%uKr`6`P#+=rg(>ilnZ%!M`X`?x9 zGN;Yvw8fmZn$tFO+HOue%xR}N?J}p`=CsG0_L|c^bJ}lC2h8c9IUO>m!{&6voQ|5) zG1uhv`|)1>Wt-!c?Sy4JY1vL$w$qmFjAc7(+0I$E^Oo&`WxHtEE?KtAmhFmVyK334 zS+?tz?S^H$Y1wXBw%eBNj%B-R+3x+`e*YvdFa8eSxBM?3Shk0j?U7}BY}uYzwx^cu znPq!!*HWpf*5K1zhLZ0?pVv}N0Sj!g9vW2&7o|etavPH0L5iMIJ%NE(PMX_vA zEn76p7TvPNuxv3cTP({K+p@*6Y;i4HJj)i}vL&!=2`yV9%a+)(C9!NtEn70nmfW(X zuxu$UTPn+z+OnmwY-uf9I?I;cvSqMr87*5T%a+-)WwC5oEn7Cr=55)0ESs-o%Wm2H zEL#rCmeaE3vTV66TOP}n*RtiaZ22u)0n1jEn5xCR@1W8vTU_2TOHTt z@FO+q{>QD}f86T-$F0GC+#3GJ&Hq1cjsD}-_&;t<{^Qp4KW@$bG&awIv598HcP$CBg7@#F+@A~}hiOim%ElGDiP zbmQr^64Y!%wHfFQEP#d$l-Lz>2%2Dbja;=$m4X#>vYKHbja^?DByG` z=yWLLbSUg}DB^S|>U1dPbSUn0DB*M{>2xUNbSUj~DC2Y}>vSmRbSUq1sNi&{=ya&$ zbg1lfsN!^}>U5~)bg1rhsNr;|>2#>&bg1n*{CT_&2&zNYCF_y($p&OY(w}TZHYS^p zP040tbFu~5l59n`Cfkq!WLvTw8A!G#gUAkKN3s(cOm-$i$S!18vK!f*>_PSogBsY

*_;AGx1AKprFyk%!46G02!?EHXA3hm1?cBjb|^$b@7fGBKHiOiCsrlandPlw>M0 zHJOG?OQs{!lNrd2WF|5*nT5yr)0hNM5)h-^$YA)AuT$mV1VvL)GyY)!Ty1IV^yJ2H@LPX>`4$c|(uGMMa4hLByz zu4Ff|JK2NmN%kUplYPj(WIwV$Ie;8U4k8DWL&%}zFmgCKf*eVXB1e;B$g$)&ay&VK zoJdY0CzDgispK?rIyr-!NzNi?lXJ+qRBHiXxJGq10N$w(dlY7X$r{B2SZN$g|`*@;rHgyhvUmFOyfutK>EEI(dV@;&*1{78NxKa*d`ujDuKJNbkBN&X_;M)LVD=}v|w zJ;*R*STY{nVZZ* z<|Xry`N;xgL9!58m@GmTC5w^8$r5BqvJ_dGEJKzh%aP^D3S>pH5?Pt7LRKZKk=4l> zWKFUbS(~gw)+Ota^~nZgL(-pYL^dXykWI;EWOK3w*^+EUwkF$<0c2aU9T`ZrCxgfi zWJj_S8BBI2L&z>96$~v2a$uxA>>eU7&)9AL5?Ix zk)z2mgpuah^( zo8&F>HhG7s4`RqstQ$uszWuPnoupMHdF_y3)O?_Lk*yYkU!K2 zY78}jnnKN>=1>c$CDaOP4Yh#+ptevuC=hB71wkF4j!-8k80rj#KwY4&P&cSM)C1}X z^@4gseW1QjKd3)602&Alf(AoFprOz(XgD+i8VQYpMnhwuvCueZJTw8C2u*?}LsOus z&@^Z|Gy|Fm&4Ok_bD+7+_XRkUJC_@_@oXVWDtP zc*qm-f+9c>p-50OLNT5ilNTf)t zNTNupNTx`xNTEonNTo=vNTW!rNT*1z$e_rm$fU@u$fC%q$fodC_$Yi8*%f|@9EzNZ zT#DR^Jc_)Ee2V;v0*Zo)LW;tQB8sAlVv6F55{iAOfDHgjB@&7)bb`0`w+aa)Bho(VGtm{(iy3D#Rx2`L!>q_go%DS$$ zu4}C8TI;&bx~{jb8?5U_>$=IhZnmyltm{_my3M+7x2`*^>rU&s%ewBiu6wNOUhBHg zy6(5G2dwKs*XsI5Xa4Cr_Je%Ih{49bLMp3 zoGzHtMRU4jPM6K;iaA|1r)%bP-JEWi(@k@_Wlp!v>5e(wHK%*#bl;pFnA1aZdSp(I z&FP6bJvFCi=Jed0UYOHMb9!Y?ug&R=IlVQfcjolooIaS-M|1jQPM^)`i#dHYr*G!; z-JE`y(@%5yWlnBmj7O7D=HzZpq0Py|oWht>SaS+zPT|eT)117_DS|mgG^a@B6xp1j zm{U}9ie^sH%_)XC#Wbf_<`mnU;+RuhbBbq9@y#iLIVCiwMCO#(oRXMRQgcdXPRY$F zg*l}(r&Q*Y+MLpuQ(ALMXHMx|lj|itA+CSmEhqz-k<3J9CbN)P$!w%I=|lRG*-1Y# z2bq)1Mdl{+ka@{`WPY*$S&%G57AA|3Mag1hak2zik}O4*Cd-gz$#P_QvI1F=tVC8O ztB_U6YGie?23eD=Mb;+kkafv=WPP#$*^u-n8Cnhv<_MiZGbjHo1o3m7HBK94cZRv zfObN=pxw|OXfL!6+7BIo4nl{Z!_X1vD0B=u4xNBbLZ_h9&>83~bPhTXU4Sk^m!Qkg z73eB-4Z059fNny!pxe+L=q_{*x(_{o9=i0;*Rjo;1_T5>A|I1a$fx8p@;Ui}d`Z3{ zUz2ahx8ytWJ^6wBNPZ$elV8ZM{nVZZ*<|Xry`N;xgL9!58 zm@GmTC5w^8$r5BqvJ_dGEJKzh%aP^D3S>pH5?Pt7LRKZKk=4l>WKFUbS(~gw)+Ota z^~nZgL(-pYL^dXykWI;EWOK3w*^+EUwkF$<0c2aU9T`ZrCxgfiWJj_S8BBI2L&z>< zSF#(~o$NvOBzuv)$v$LXvLD%>96$~v2a$uxA>>eU7&)9AL5?Ixk)z2mw~z9rw0@5vA3NAeT-nfyY2CBKp1$sgoT z@)zkgPM!e=2ZbWt$Ix;<(fy_u|A~Ta&$gE^G(wp=l zeaY;kADM&9N#-JRlX=LzWIi%KS%54^79tCiMaZILF|s&Wf-FgvB1@BH$g*TPvOHOV ztVmWOE0a~os$?~?I$49PN!B83lXb|tWIeJz*???F`jd^w#$*$+DcOu{PPQOhlC8+r zWE(PoY)iHy1IhMe5ZQt3NOmHF$n_HG&zPGOO7MQlM~2^lEu18x$KAn-rTBTNGOr z+Z5XsI}|$=yA-<>dlY*W`xN^X2NVYthZKhuM-)dD#}vmECln_Yrxd3ZXB1}@=M?7^ z7Zev2mlT&3R}@zj*A&+kHxxG&w-mP(cNBLO_Z0UP4-^j-j}(s;PZUoT&lJxUFI@Qd z1&J@oSLAE*4f&RQN4_UNkRQoU0csAM!UIvInENyZ{$lX1wnWIQrHnSe}4CL$A)NywyRGBP=t zf=o%KB2$xT$h2fSGCi4r%t&S;Gm}}!tYkLQoAe=l$?T*bnS;zp<|1>GdC0tEzQ6qa zch12<`N;xgL9!58m@GmTC5w^8$r5BqvXqN|UvyR)Dg%{;%0cC!3Q$F;5>y$g0#${o zLDiueP)(>7R2!-T)rIOo^`Qn(L&zU$1T}`5Kuw`$P;;mS)Dmh1wT9Y20WSS{kyLO{ zTe2M)NVX?~$PQ#jvJ)9hb|yo}E-wE4SnUdRgStaKpq@}Ks5jIH>I?OQ`a=VtfzTjm zFf;@j3JrsXLnEM(&?sm$GzJT>A5|8W1#-oJGzi z=a6&BdE|U@0lAP|L@p+mkW0yB?xOkVna5ZfD%H9pu|uTC@GW-N)DxfQbMVq)KD5I zEtC#Q4`qNdLYbh~&aJLCuDfO0~)pxjU%C@+)`$`2KQ3POdT!cY;Y zC{zq84wZmPLZzV6P#LH!R1PW+Re&l&m7vN{6{spy4XO^+fNDaupxRI!s4i3wst+}Q z8bbb1Bd9Ud1ZoO3gPKDvpq5Z8s5R6E3V_-|?Vv!YJro3WfI32*pkSyo6asaDxCg;lCNvA04b6e(y7cF%YjDs!az43$Tu3e=7n4iKrQ|YlIk|#d>EhqN zW?2QThSor9p>@!DXalqn+5~Newm@5eN0uimkQK>F zWM#4nS(U6tRwrwaHOX3JZL$tom#jzDCmWCrNq@2t*_doXHYJ;p&B+#IOR^Q&nruS` zkZs9!WFXm|4EoD|-jddV>_~PZgUQZh2-$_~N_Hc=lRe0uWG}Ke*@x^)_9Od~1IU5o zAaXD{gd9o^BZreC$dTkIax^)H97~QP$CDGtiR2`5GC75uN>2Mty!UZB?=#4m! zlRLrr|Z%!Y~>7zM)GN;ex^u?UMn$tIP`fg4?%;~2&{W2%F zNybNED06Z*r_ko)VNPMpDXeQU{D)-{54jc8pXS=Y$cHHvkOYF(pQ z*XY(YhINf;U1M3-*w!_Ub&cy<|Nf0}JSaYt07?iYf)YbXprlYTC^?h@N(rTcQbTE= zv`{)IJ(K~;2xWpYLs_7#P&UXL@_~Gz?2sRn1Ih{If^tK7pu8^q``wLvWPY*$S&%G5 z7AA|3MP2;!uCZc@;))WAl8RD_(uy*QvWjww@`?(Iii%2#%8DwAs)}lg>WUhQnu=PA z+KM`gx{7*=`icgMh6;a0BSm9H6Gc-+GevVn3q?ysD@AKX8%2Pkt)iVGP|;oyr0AgN zsOY2!R&-W`D7q-RD!M7UD|#q;Dtak;EBYwpIoCPP4Alt?LZyI@7w&vaYkO>m2Jk z*SgNLuJf(y0_(cax-PP=i>>Pt>$=psF0-!7t?LTwy3)F?vaYMG>l*92*1E29t$#jI z1_Z4qH;^02P2^^B3%QltMs6o}kUPm;OXOwp3VD^hMqVdxkT=O&i3FU%v zLwTUQP(CO>Q~)Xn6@m&wMWCWkF{n6H0xAiWf=WYWpt4Xos612wst8qrDnnJEs!%nk zI#dIy3DtsXLv^6KP(7$V)BtJ-`9qDM#!wTeDbx&V4z++ugFsFm&bjX|zo6`|+<%baeT(;aiVYfksf>ApEVFsFy+^vIkZ zo6{3>dTLJ3%;~u~y)dVj=Jd*(Uc070-;;Txc&m7)c(3@N_^9}#_^kM%_^SA(_^$Y& z_^J4%aGN6kJ)sotiqHxVMHod`ML0!xg{Q(x5kV1A5lInQ5k(PI5lsA2 zrix~Y=86`ImWo!2)`~WY07Y9xJ4K+Py&_1_LD5msNfE5*tO!wbQFK*wQ*>AKQ1n#v zQuJ2zQS?>xQ}kC1Pz+QIQVdoMQ4CcKQw&#(P>fWJQjAuNQH*ur@82MfgT_M>po!2V zXfiYfnhH&Wrb9EJnb0g~HZ%vC3(bS(LkpmV&?0Ctv;`qBbObsI9fOWTC!mwi zDd;qG209C!gU&-2po`EY=rVK#x(Z!`u0uDVo6s%jHgpHN3*Ce6Ll2;b&?D$E^aOee zJ%gS@FQAvuE9f=!26_vhhD=MQBh!-^$c$trGBcTl%t~e>y-6R^m&{K3kvYhmWG*r{ znTO0v<|Ffy1;~PAA+j)8ge*!HBa4$I$dY6!vNTzSEK8On%aawziex3SGFgSJN>(GQ zlQqbiWG%8bS%<7k)+6ha4akP1KiP7}^zVOfe_RJZ1EE3C zU}y+56dDE%hekjnp;6FiXbdzK8V8MsCO{LRNzi0y3N#g(22F=%Kr^9P&}?W9G#8o& z&4(613!z2OVrU7p6j}x?hgLuvy$0JZ_>`x9L2aDZ`O?Xu<}}QlhMUs} za~f$*qs(cvIgK%=vF0?+oW`5e1aq2bPLs@OvN=sLr>W*N&77v2(+qQ(X->1uX|_4d zF{ioaG|!yoo6`bwT4+v-%xSSXEitF1=CsV5mYdTGb6ROmtITP&Iju3LwdS*9+&78KI(++dmX->P$X}3A;aZP`|1P%_`OYS50lLyFyiuW9D?+oKBe2Npm`7PN&W3j5(b(r*r0X-kdI&(?xT-WKNgO>54gBHK%Ll zblse8nA1&jx@Atc&FPLg-8H9s=5*hj9+=ZZb9!V>kIm_cIXyL}XXf5DmiHK%Xp^xd3(nA1;l`ejaT(~Sp(P_F6E z<5NJ8I~khvAj6Ph$#7(N(v$QeBajivNMvL(3K^A*Mn)%NkTJeN0uimkQK>FWM#4n zS(U6tRwrwaHOX3JZL$tom#jzDCmWCrNq@2t*_doXHYJ;p&B+#IOR^Q&nruS`kZs9! zWFXm|3?e&_9m!5)Fxi<5A-j-W$!=tKvIp6d>_zq_`;dLfeq?`g06CBxL=Gm0kVDB~ z?xOkVna5N5&@;kO|2|WMVQ2nUqXMCMQ#nDall1YBCL(mP|*cCo_;4$xLKsG7FiN z%tm^XKBO<1o%ADfkU7a*WNtDKnU~B*<|hk~1<68WVX_EWlq^OTCrgke$x>u#vJ6?4 zEJv0nE07h*N@QiS3R#t`Mph?lkTuC#WNoqzS(mIw)+ZZ~4M~5p5!sk*LN+Cvkl1-X)3MXn~-kZZ|xnW8`u21bLD?MV=G02!?EHXA3hm1?cBjb|^$b@7fGBKHiOiCsrlandPlw>M0HJOG?OQs{! zlNrd2WF|5*nT5yr)0hNM5) zh-^$YA)AuT$mV1VvL)GyZ2g<~cWB%29zeDw+mV4}doqaZKz1ZMk-=nVGKB0xb|t%! z-N_zgPqG);o9sjOCHs;6$pPd*au7L~96}Byhmpg{5#&g66giq4LyjfKk>kk;+2)5#g+OmY@Eo18<=CFhaz$pz#>auK08pbrSC}J zmA)r^U;2UcL+MA-kENeTKb3wa{apHm^h@bi(yyi8NWYbSC;eXfgY-w~Ptu>Izes0i=rv;O_F5K7uzI<&NhbQtNd(&41TOM6OtNk@>5C>=>UvUC*bsM67- zqf5t-jwu~WI<|Bi>A2GIq~l8`kWMI_NIJ1}66vJU$)uA@r;tu5ok}{jbQAceUr1MJ`aJB1)==ga5 z{tQqMDg+gViaYo#9BKizgjzwZp*Bze)D~(71w!qiAgBY>5$Xg5 zL!F@zs0-8;>IQX(dO$s)UQlnS57Zax2la;rKm(yc&|qi?G!z;J4TnZRBcV~yXlM*H z78(bQhbBN1p-IqWXbLnHng&gWW zE1^};YG@6#7Fq|bhc-YPp-s?cXbZFz+6HZhc0fC!T`v83{v*9ddav|8>HX3Nqz_6T zl0GbbMEa=oG3n#dC!|kGpOQW;eMb7M^f~GC(ifyJN?($`EPX}#s`NGK>(V!*Z%W^i zzAb%6`mXdn>HE?Tq#sH@l71}xMEa@pGwJ8jFQi{ezmk3}{YLt&^gHSI(jTNhN`I36 zEd53LtMoVN@6tb{e@g$7cANe0pM_A;?$V*9J*2}(hm{WJ>c3xKhKD>MFDL>O5sCyw zhN3`Gp=eNaCrH0Z#X`ysb zdME>w5y}K*hO$6ep=^*hj}psDexX`+Qnax{`Ed=_=AyrK?F-m#!gQQ@WOPZRtAFb*1Y` z*OzYK>c5Xy4IzK15!4uJ0yTx2LCv8SP)n#4)Ea671-SIT--EW&?W6;x+e-&YcaZKV z-AOuFy0dhMbQkHa(%q!HOZSlODcwuDw{#!rzS8}q`%4dy9w7mlYq=!q7 zkRB;LN_w>P80oRnNaItCqwPCzH2Q_yMXj7$G}JUlCXPWrs`1?h{@m!vOC zUy;5leNFnh^bP5o(zm2cD}7J;zVrj>htiLvA4@-xek%P;`nmKA>6g;4q+d(F zk$x-vPWrv{2kDQ}pQJxaf06zw{Z0D2^bhHu(!ZqL=KT9H6H3}$I<&NhbQtNd(&41T zOM6OtNk@>5C>=>UvUC*bsM66~{rB-XIurwn3B`h9Lvf(EP&_C;lmJQyC4v$|NuZ=q zGAKEe0!j&`f>J|iptMjrC_R(`$_QnGGDBIQtWY+{8}fmCq3nN<(F!vQRmwJX8Uy2vu_Ff1iIUOIMMuDqT&w zx^xZcn$oqTYfIOWt}9(ny1sM+>4wt&(v74WOE-~jD&0)FxpWKZmeQ@HTT8c*4v=mu z-Okm2ALjz0_D~Si0qO{Kf`XyWPzclo>I!v(x(0q zGz1z74TFY5BV795pM{aqqohYmkC7fLJx+SO^aSaN(vzeoOHYxWDm_hly7Ua`nbNbQ zXG_nKo+~|1dcO1m>4nmZq!&vskzOjjOnSNW3h9;7tE5*;uaRCWy-s?)^akmT(wn3= zOK*|hD!om5yYvp}ozlCccT4Y)-YdONdcX7m>4VaTqz_9Ukv=MYO!~O=3F(v4r=(9y zpOHQ*eNOtk^abgQ(wC$!OJ9+`Dt%4*y7Ud{o6@(WZ%f~izAJrC`o8o7>4(yfq#sK^ zk$x)uO!~R>3+b2AucTi~zma|`{Z9J5^atsW(x0S1OMj96D*a9RyYvs~pVGgi-RA!L z7#m93-PM0TKZb@ppfFHaC>#_X@`Svg2v9^Q5)>JV0!4+QLD8WYP)sNm6dQ^I#f9QQ z@u37zLMRcG7)k;qg_1$Zp%hR`C>4|%N&}^Z(n0B=3{XZW6OQE~6jT~21C@o!LFJ(eP(`Q` zR2i!BTmSn9`u=%sVO3nKLDiueP)(>7R2!-T)rIOo^`Qn(L&zU$1T}`5Kuw`$P;;mS z)Dmh1wT9Y20Z?109TW()hk~FEP)Dc}6byBC$=&Ur_Y{OkcaiQY-A%f?bPwsC(!Hd6 zOZSoPE8S1Jzw`jvp?%PP=m2yO zIs_etjzCADW6*Kv1auNQ1)YY@Kxd(I(0S+rbP>7)U52heSD|arb?63k6S@W6hVDRj zp?lDM=mGQ)dgPM(|2-Z&mVP4rRQj3pbLkh-FQs2ezm|R@{Z{&&^n2+K(jTQiNq?69 zBK=kRoAh_-AJRXie@VN|`}fx>l(f5aXlW1WFw$YA!%2si_LTOLjvyUTI+Ao`=_s!L z`*;u)iUvi8Vn8vWSWs*z4ip!P2gQdHKnbBlP+}+vloUz^C5KW#DWOzQYA6kq7D@-D zhcZAJp-fO_C<~Mo$_9BuK9Dbz9rA;6xa9u(@xaeFD5rET>DMO^Lr`1SMs{r{*aR17K(m4He@rJ&MK8K^8&4k`~-fGR?jpvq7cs47$qst(nF zYC^T3+E5*+E>sVy4>f=qLjF)As4>(8Y6>-jnnNw1mQX9GHPi+QfZ9Uspg^cS6a;mE zIzpYGV5l<`0(F79LfxS5P!Fgl)C=kj^?~|A{hZN0g2v9a%bxbX4hR($S@3NXPuszrWUuB^_Hjj&xkP`RMm&q-0QXCK5 z`9j$tKPU&36UqhUhVnppp?pw&r~p(DDg+gViaYo#9BKizgjzwZ zp*Bze)D~(71w!qiAgBY>5$Xg5L!F@zs0-8;>IQX(dO$s)UQlnS57Zax2la;rKm(yc z&|qi?G!z;J4TnZRBcV~yXlM*H78(bQhbBN1p-IqWXbLnHng&gWWE1^};YG@6#7Fq|bhc-YPp-s?cXp2k#d$zk(dYkli z=^fHLrFTj1mfj=1S9+iHe(3|!2c-{5AC^8MeN_6G^l|AE(kG=)NuQQJBYjr-ob-9= z3(^;*FG*jPz9M~9`kM50=^N5FrEf{!mcAo>SNfjxed!0%52YVTKbC$X{Z#sy^mFMK z(l4c7NxznUBmGwTo%DO@57Hl{KS_U<{v!QV`kVB3=^xTRrGH7gE%^7(R48e8>Cn<1 z(qW{-N{5pUFYPJqB^^OJqI4wb$kI`yqe@4UjxHTTI;M0i>Dbb7q~l7*la4Q)Ksupx zBI(4^Nu-lXCzDPtokBXLbSml8(rKjAN~e=fFP%X;qjVI;V6l>DMWl;L7n3e7T|&B~bSdf5(q*K}N|%!^ zFI_>pqI4zc%FDJcc=%{6Y2%^hWbE# zp?*+*XaF=28Uzi7hCoB1VbE}B1T+#F1&xNrKx3hC(0FJ9G!dEvO@^jGQ=w_lbZ7=N z6Pg9hhUP$Xp?T1JXaTekS_CbImOx9PWzceH1+)@c1+9kGKx?6O(0XVCv=Q0_ZHBf$ zTcK^xc4!B*6WRsshW0>vp?%PP=m2yOIs_etj{MO-ucbZ;9fOWTC!mwiDd;qG209C! zgU&-2po`EY=rVK#x(Z!`u0uDVo6s%jHgpHN3*Ce6Ll2;b&?D$E^aOeeJ%gS@FQAvu zE9f=!26_vGjeZq&G@$lHM%6MS83BHtFrsJEV6?@A}if zU+1%1dXMy8>3!1sr4L9Sls+VVSo(iu6_KYtq-HZ%E&iz9oHI`i}Hn>3e_r_p6}pOFxi)DE&zKvGfz^r_#?{?fRSB zf1Y7Khh9K0p;ypr=neE1dI!CSK0qI#Pta%R3-lHG27QNqKtG{hklP~tRSE^UL!luL zC=3)93I~OUJRvVA0u&L71Vx6TKvAJ+P;@8;6cdUC#fIWQaiMrnd?*2w5K06khLS)@ zp=3~UCK5`9j$tKPU&36UqhUhVnpp z|LC7*Wci@{PywhQR0t{z6@iLE#h~I)38`JvE?q;qrgSao+R}BT>q^&?t}op{x}mhcbR+4;(oLkBN;i{kF5N=9rF1Ll z*3xaH1EkwZx04Q(ZZ91q-9fsebSLRx>CVz2(p{vxN_Ug)F5N@Ar*tpr-qL-f`%3qd z?k_z+dZ6?m>A})Nq=!lmlO8TTLVBe1DCyDCW2DDQkCPrRJwbY+^d#xY(o>|TN>7uX zEBZ7Zq?bxBlU^>pLVBh2D(ThIYoymouajOc zy+L}T^d{-e(p#jrN^g_iF1BG`Tq>oA;lRhqe zLi(iiDOdmfO8Yc)209C!gU&-2po`EY=rVK#x(Z!`u0uDVo6s%jHgpHN3*Ce6Ll2;b z&?D$E^aOeeJ%gS@FQAvuE9f=!26_vgTh0ekQWpIiU>u5B12K2s8BR0Iurwn3B`h9Lvf(EP&_C;lmJQy zC4v$|NuZ=qGAKEe0!j&`f>J|iptMjrC_R(`$_QnGGDBIQtWY+{8}fmCq3n=w{~yQkGD3uukwh9aNKqk*N{X@~JE_E{Wt@tP?3KOu-h1!8_uhN&z4!Q? zqnq#ZeOWgY1s-dVxqQXTr7S%*lgs7&Xnu%&Is)eYQqFRY+Ehjl@LG5s zydK^FZ-h6&o8c|+R(Kn{9o_-&gm=Na;XUwPcptnUJ^&wt55b4wBk)o97+04UxY8gm*Fe$Rrnfw9linIgm1yO;XCkM_#S*8egHp&AHk2|Cx5Mf zR_H1G41Ny3fM3F|;MedQ_$~Yneh+_uKf<5j&+r%cEBp=q4*!6E!oT3(u+dWOYq9?R zF^03j+2I^;PS^y_1?PtIz@~6sI3H{V=Z6cx1z~fz5NrWk!iC`?a8bAzYy}sGt>F@| z4O|i~1>3@GH_Yg33i5EU{|;tTpo6VE5Pos2V4=Z1bf0>us2*8t^!y6 zYyC6wK5#X-I_wMk!8PETus<9C2g0@BAh%$G;hHxV|9BvFZ zfg|9ia5K0$+yZV1w}M;4k#HM03T_KW!!d9y+zyU|+ru4T1;@h)a3Y)pC&L}#PH<$9;MwpTcrH8-o)0g87s89+#qbh% zDZC6`4zGY$!mHrb@EUk6ybfLuZ-6(#o8Zmx7I-VX4c-p#fOo>X;N9>ZcrUyU-VYyu z55kAw!|)OKD0~b)4xfNe!l&TV@EQ0ld=5SjUw|*dm*C6r75FNA4ZaTFfN#RL;M?#W z_%3`8z7IcuAHt8|$M6&QDf|q64!?k3!mr@h@EiCo{0@E(e}F&2pWx5%7x*ju4gL=Q zfPccj;NP&(vj6@q&KS-HXNPmZIbjnx7n~c;1DnEm;e4La+sF2^WTo zz(wI=u$968Ub|gfl(nc5qHIK!6je%;t*FwX>_pk?i?7*m5bt#qRYp`A*!jUW}=#lY9Xqn zs8*s{i;5K0MpTrjwxXg%#fXX()lO8LsP>{dh*F~BMJ0$z6qO_@SyV?+okVpO)kTy} zREnrnQE8&Ois~jRT~v2bJw){s)k{={sNSObi0Uha4 zSJXUF^F=KXwNTU|QHw<_5w-Mh{rMUHWulggS|Mtss8ym?i&`UUt*CXP){EL8YNM!4 zqBe`#B5JFsZKAdtN_!p84tOWL3*HUyf%n4u;QjCc_#k`;J`5j$kHW{`pTf`J z=kN>oCHxA04Znfk!tdbs@CW!K{0aUHe}TWk-{9}?5BMki3;qooE&uQLjxn4K&JO2* zbHXNYE;u)w2R4QC!uen`I6qtfE(n{$g!yZQzn{DcBY+ z4co!?umkJ}mx0T|POvlV0=vTH;PS8=Tmg26J>ZIPCD;@8g1zC&a22>J>;qSWtHZvq zA6x^j3H!qVa3EX@4uWgL!Egv%2d)c;!eMYdxIWwfZU{Gm!{Nqo6F35H3O9qB!!6*J za4Wbq90|99qu{o1G#mrR!tLNVxINqfR)4L3<}DshfD_>)I2rB;cY-?`{O>2eyNJ?> zN)eSRDos>ZQQbtPi|Q__hp3*SdWp&q)mu~_QGG@A6V+eT08s-)4H7k2)DTfaMP-T_ zCTh5-5u!$l8YOD9s4=3(iW(DHCW@LQYO<&)qNa+PCThB<8KP#2nk8zss5zqM zikc^CzNiJF7K&OVYO$y#hSI(lUkWdSm%}UImGCNfHM|C13$KIM!yDj@@FsXOyanD0 zZ-ckPJK&x0E_gS*2i^pTjTUm+&k2HT(vC z3%`Tk!yn*}@F(~){006Be}linKj5G6FZefXwBo-%AB^E_aCSHcoD(*IbHTacJg_O8 z7tRNp!TI3=a6#A{E(BY^mT+OX2wW5{23x_!VQaVqYy+2sOTo5qY1j_7haF%?xC~qt zc7mN@7uXdp2bYK4;0mxi>;YGVE5V+y7wipJhO59;VIQ~}Tpjj>{oopKP1qj}fCJ%L za1dM@4u(VEI&fV$6b^&y!S&$=a6`Be91b^zo4^rpQ@9!29Bu)(gj>O_;Yhd*90j+9 zqv04h7H$W}!R_G=u!7^^1UL~+f|KEna3{Dk+y&ObDR3&B26u(K!Rc^!xCh)5?geMS zz2QD^U$`IKA07Y?ga^Tc;UVx)I1?TQ4~IveaRgloY;aBVmk4uR{yb>UDr46X;)ha12R;YM&c+!$^GN5D6#D1-FJH z;Wls-+!l_8W8hf09UKR@hdaOuj)xQAL^ugfhC9NY;LdOtSO=%Tsc;(H748P7!`y_4--K_$x8XbRUHBe+AASHogdf3=;V1A@ z_!;~hegVIPU%{{8H}G5d9sC~t0Dpu(!JpwT@K^X7{2l%Q|Ac?RzhR?Q`2HWx24{zJ zz&T+PI2W87&I6mmdG*%60zrG0G9TV!2Iq$hzy)D*xDadsTf&9mB5+Z-7;FU>hppif zunk-iE(P1drC~eR9(I5o;WBVp*a>!qU0_$Z99$lDgDb%9um@Zbt^|9+Ua&V@8Lk3X zg?->^aCO)h_JeD{HDP}^01kv}!9j3sI2aCr>%ev4P&f>(2iJ!izzyL>a5&r;ZURTZ zP2py6bGQZE5^e>zh9luNa1`7Yj)r64ShyV=2e;QdE;S}DQrAH*CuO2?W$4r&=2509 zZ>Z6r2;-O`;ktOk+d_>)od>D>nUT7LKZlf1Hmam5rEFDcWvA?wgK|`5R9WSuoRy1m zRpnH9<)++~hpMP5DNp64yj5jYMO9Tks+y{g?)2~E&j07+F51avStslM98yr3t3t{`S*pUS zh$^azDJxZ6S*xJ`E-6KSU|fbSRX-6X?rt8QZXUWcePrmm8h(k09MS1+$Ef+{qnuQTq3$=z1EC<-Od?a6s0*9G2N6#MsCvpZ^w6)OIQrp7X7SXn-w#Bry(zdv^*4mcP)<)Zs+S+PcT3b79 z?X`8#)=}Fs+LqPUNn2-aU9@%8ww$)*wRO|Bg0}A3dT3iw+e+GcYU{0SYD|Pl*XX1) z{UelNxYrw`W$1bv?(GD_gAf}P6_Xa9lp5bLD#|n^DLyvNRM$uU_RvzHwxP!QPyfF9 zNceMNBmIf}{+%cuz5a$b>mNP+CstTygwj5;k^WKqb2EIYHr%pjjgjF8?hLn_Sz~0P hZ7ywdYnw+~Q*HBV>!odKOxBf!r^ThEq{hU!{STzZ3q$|_ literal 581880 zcmeF)1)Nn?_y6%Bl^9wSrMpB@K-7C^5D9@1FlZE%8bZY`KvWb(#P06y?(S|$l}?oy z{&Vl%Ynad4zw;k>Kv7x#Ua#MJ-s|jh&b|BY^F8Oz%;R%*{$b;Xj2%8?VxwW>CX8rw z+=L-xMilKZWYUmMssEIjuiiPC95QO`q!AOw4jDbMXuHwlhMm}b$k-vrkC;$2vq1L9 zf_0D0EL0aq95r_Mh*OFV8av|TL0wb-F|+XhvE6Ult+-jo_+qzBE!Z`9y5N5 zdtGBDkDfGY$b<<)rW74CVZ^X;V<%3UFnQRdqM1eAsG`|#A+7IS^wwcv~Ui-`%*t2`~v1(#ekL*3$W!4IM?K5j*&mP%(*2&l_*ExH&?a;S(+dTvw6@>PE89; zO>%6XE*YIVWlzn4!T997w8TrwUL&pWF&x>iX=ZEHX_k4A>U7U+qdGk@4@Re7=RTQ- z;GJc6nq(fTTTL?)y45T*;-Ru%bz2VV)wfAzJKbuU*CYGCn(5MoklvAc)g4)6WYLjZ z!>C(!7j7V)*Huf=>V?rXZQC44REYbjq#`&!1=vc8t{buVAb`?|NU`}kVH*L{86 z&)15+R`Rv7uT^}l>T5M$tNSWbQ`5VaueE)x<7-`C>-k#W*9N{e^mTt<8~J*GuZ?|e z;%if1oB7(@*A~7W=xa+~Tlw1B*Mod*!uV2w#u$^>|-L`Z~(j6MQ|<*U`R?@pY`P<9r?OYo@Og ze4Xg)Bwr`{dXldv`+AD6Q+z$u*VBAG-PbdGJ=52-e3fF?znclW{@py-_3!4vu75WV zcK>(tVE2DF4|4x^N=vi*hnoqzf4F(r*89huJ!ajfd{F5`VyF5?sZtHz_d%yPcX7`EP&zs#m4E7hm5MTePKX3Lg zZ@0-|yH4FFZr7=shr#{=uXpFXdmrzxDUmHh*pg{@0&(*!be#M_P_Otu2zpw6%~v&Y zAFI_}GY^gZ{9ASN|98(r%i85tpmFbEC(zHEC(zHEC(zHEC(zHEC(zHEC(zHEC(zHEC(zHEC(zHEC(zH zc2y3@?~!*^-PONOmfvIUs=BM@UCwLs@9sY`AAfh<)${*%zh8a6UH$%}$bJ61s_Oap zyKVEodtd*%>bb+t>(-skgUv&725cSJIC1WIs+=dT%6a0d zoF}fvomcvC=aqikd8HqBUg^tu;;Nh{uF84hs+=dT#+_ICap#qO+F=uN4m+=X@3HSa_Pxh)z;eKHz;eKHz;eKHz;eKHz;eKHz;eKHz;eKHz;eKHz;eKH zz;eKHz;eKHz;eKHz;eKHz;eKHz;eKHz;eKHz;eKHz;eKHz;eKHz;eKHz;eKHz;eKH z;IGMn#*sU&)F{TfFXN@^?Qzz9y}#ZbXX)#zkC&JA{&AM$NR@G7UG;Xc)c@QW0 zBbF+CsnXA@#*LT$w$`}w$o_h}?&oE_zgX`leW|its`TS(+&Jk=mHqRw9;e64cB!&m zSG`>@QW0BbF+CsnXA@ z#*LT$w$`}w$o_h}?&oE_zgX`leW|its`TS(+&Jk=mHqRw9;e64cB!)6o>Tss=M<^{*PQkGr!iuYX%>-1)b4e?3l)Cw;N5+q+$l&&&FJ+Hw0! ze|xJOU-pahw)W*XQe{7>(w8dzxGLkM>Tz*>Jx*Kp(^bYvUt9W8WxK9=yIAU9HC`Wg zXIozXw$`}wZ|nYgoE%U3VqLd)yB?pH_4%~p_Lu(lRyn@x7w2v5%WF0HP87EaAM_b0paiz*Qu~ZqatBjW_h3^%KlQNFUJwfcB#^rD&uvP@zR&^y2^GvUiwmHyVRY1f41YLmY@Bd z#91~E$r-Tg!PbGT1G^3^2P_9H2P_9H2P_9H2P_9H2P_9H2P_9H2P_9H2P_9H2P_9H z2P_Bv>p395hs>+W?HTFtsj@waTz-kA%DB9$9xv84+DYfx z*5k@JdHrIkk$*k1U8-!4v-D+usj{C`87G!1{Yaf(wo8?9;<&o4<7EFRkIzBINtNU4 zD#wu;H(vTuWxO0u`ckDYRrkeGqdewO#_1~i>3*DLynbDAbZ8>{)<*8Ss-C&$&>^?78RR2eVl7t40pUo2I| z$$nCGUt905EyvM)Z5b!WZ|vtYuW_=UR6SmfCw-~Xmn!>~O87Ect&#UU=h-E*q99OFJr8ZXc8@KdW@Hb}U{o)+gm*Ys) zab!Q)F5{&yj@vGM8821#(^bYxU&hB(8K?Ktec3Kmj;Aes zUE_`?b6j1x9mE*{Ev92;cuF815UHVdGdz>RR4>Dd?*mA;JEec3Km#>G|HU$)DB zaaE5KOO^e!*n?mF;nk)I7*|U1fh6 zFID<7UiW3YR2e6AXWyUgc<%mV^I-FkoB>-0whn9^*mYnzU^!qpU^!qpU^!qpU^!qp zU^!qpU^!qpU^!qpU^!qpU^!qpU^(z#&w;^_yUu(0DwZn0m(<%MKVDZoPL3nv^mcJx zRgR;p-Y>81GCuD0=>27!RN23=djGPYY?mtI#8PGawpQ6M?sdpJx=<% z>TyxbdXd*D`-`PYUsu`QSRGgTQsum2Iga$jy6Wv>IgV7>PpoU)b~&DG*Hw-a*Vo&{ z`nb~9$7$^6O>dX|b=BkKcrs3J7w1*wIJ)Zn^4c!r<6e*6U&cw5{TthLnwI=W&zH=P z^u{I%gu9%K^&)%K^&)%K^&)%K^&)%K^&)%K^&)%K^&)%K^&)%K^&) z%K^&)%K^&)%K^&)%K^&)%Yj{=1M=^sk(w5{bt$!!w^aT2HuCp6vcFW>U#jdcRgNR$ zrOJ4zGG40eCsp>7D*H*5{iIs%xX;(-!R8@31GWxq9oRas>%elra=>!Ha=>!Ha=>!H za=>!Ha=>!Ha=>!Ha=>!Ha=>!Ha=>!Ha=>!Ha=>z6YYxcoq5o^Oapczdf2~ITef?AF ze{J2!`}wb}8+kwflyxJo!|ul}r)(Z<9+ER)>%i85tpmFbEC(zHEC(zHEC(zHEC(zH zEC(zHEC(zHEC(zHEC(zHEC(zHEC(zHEC(zHEC+Vq9FV_P*wHF~AF!iUey_iybw}Ti z?7yS$NA};*_apnu>)O#OuWLu^j=m1re@9=3>>oug&v&%S>)O#OuWLu^j=mq+e@EYs z?7yS$NA|bp4}1Qw=a1d@`NOV%yZ-I^x9i_>z;eKHz;eKHz;eKHz;eKHz;eKHz;eKH zz;eKHz;eKHz;eKHz;eKHz;eKHU~3M@|L%*fX^~qex|Z^NUG@HAy}wxRFP8mumHl*; z{dCp)i}n6uy}wxY(^dA`-}DdV%bku*-uy5 zPglLaSnn^^`-|;<>~hxT!R8@31GWxq9oRas>%elra=>!Ha=>!Ha=>!Ha=>!Ha=>!H za=>!Ha=>!Ha=>!Ha=>!Ha=>!Ha=>z6_ss$Qdw2Q0y7gTy*gV)gBxk^`3%3q-_B`10 zhnrWM2b+iF4A?rbbztkjt^>;f%K^&)%K^&)%K^&)%K^&)%K^&)%K^&)%K^&)%K^&) z%K^&)%K^&)%K^&)%K^&)%K^&)%K^&)%K^&)%K^&)%K^&)%K^&)%YmJl18z3c(sKTL z5zcT@pP5rXcGPmdxZ_Aaud0uu$LsCddc0VV6HC?O zbYHgXD&utB-f^=$R<*W>keu~a=?TaVM*#Zu$O>+NylbU!c4aiz+5sk*N%E~7TarAh-U0aVA>v3YKdYtadc3owhuG>3K_Lu$Q ztowSr-Y%A^$7}0xdb?O^+<3h`Zk+Cm?fJ~jpv{BLLvjXe9oRasbzs+l<$&dY<$&dY z<$&dY<$&dY<$&dY<$&dY<$&dY<$&dY<$&dY<$&dY<-qpkfE%6n-|y&g^7~4uaoeT; zzt)|7d^w&}{kn8tELFyfcXrjs*VgBg?NZ~mOJ9!@OV#6~FIBe3RXt8DRqrSLZLN9j zpVxLd&$f<>8=u$ydb?O3Ppqqqmnz$}rLU`u(^baB^=13^Rym&RCzdMxZLPAORK1_> zi>1nV@y@RL_}cn>vR!K2cIoSJVySwZ^rg!7xT?pArRx2pzpXW|{qx!`=h@bAapUva zUvC%d_STW^Kgo7oWn5f;XKk0`=&H}V zt(N2H@zU2dZk+UGe4KS(#!J=PwPl=C*{-W>*Hw?xmhsZxR?9f4vcFW_7fY3KQl+n} zY?rFHYs)yPvi*Ox>?bwuc+!`;t>g8$?cHCGm*Yu|+b(^nGG40mrAl9_^z*7RetWAN zSMMi%UE{_{U&hB-_hr0PyC1SruCiTf+;-{9IH}T)t1?ci9v9b_adF$FudD1QRkn+zN?%vKT`X0` zi+8lj@ubEbZ(Fy^IH~%0(wCaob{Qu%uko^7j~7eT6|<8)uPZ*R@(IC{HS zSAE{N?b4U=VqIms)VS@^mvK_1A6I3ZR6Q=PFXQ62OJ7&nPpWJeOO?K^db?Pvj2G`{ zmE%c`JKnZ#mvK_{@uV*`ukA8UYF^`IyB;r=s>j*$hno?b2b+iF4A?rbbztkjt^>;f z%K^&)%K^&)%K^&)%K^&)%K^&)%Yhxufl`sXj{d2&s(*d|l-k(8p1-U9Q$H7ZUAy{o z`Kv#N9lgG6f0r*d4>k|U8L)L=>%i85T?du}mIIapmIIapmIIapmIIapmIIapmIIap zmIIapmIIapmIIapmIIapmIIapmIIap|91|!?$-Y=|Ghe?TerCXBz>v7+xEZv{r=tO zlh0M^?tXsD4YvYp9&8?xGhpk$)`6`9yACV|EC(zHEC(zHEC(zHEC(zHEC(zHEC(zH zEC(zHEC(zHEC(zHEC(zHEC(zHEC(zH{+b+cM-Bh?BJ|&Hm}RD1wfWdR=EHKweaa^ASAUr*e5q#t)4>BpT%`ucp@js52zSLJoYRe2q8RbEG2)vrf8?!3~E zJFoQP&MSS(oxdh`Y+h|%lQUrJz}A7S1G^3^2P_9H2P_9H2P_9H2P_9H2P_9H2P_9H z2P_9H2P_9H2P_9H2P_9H2P_9H2P_9H2P_9H2P_Bj$^rd%K=OAz@^?hG-Q|e>dnlV9 zH!rq6{u}EE+IiUZZS^j$s(ih8r4-#Nwgch|pvlKGHo^XKwoTi1)_t>x|Rl()9tZN1xi zx9=I21C|4p1C|4p1C|4p1C|4p1C|4p1C|4p1C|4p1Ako($j_za_Z3p*=k{XxdA^L7 zD&xdbWxTF3ep`<#PUIgX5zzF2D9IO$83@lxe@RsH*yD&zC2a-6)z$#xkp zRr+#Xc|B5PoUXE6_Scs2vRzxoS5@y@#z|lHm+?|%oLH)i*Hy-E>v3hg97kK9Pj8p~ z^f)=cjMt9aPsYo3sdD_d?J`cPjMp`<{bhSqyG~Oem8K@+HaecjA=2QAoWqQB?Yhc1-M8nD)H1QZ z+pxdiNi77s9_)HZ&Va20TL-od>^iUh(s?S}{yQ@BTIq$Ce+~vHMt1ka+9&8?x zGhpk$)`6`9yACV|EC(zHEC(zHEC(zHEC(zHEC(zHEC(zHEC(zHEC(zHEC(zHEC(zH zEC(zHEC(zHEC(zHEC>GX9Ec)!-EC`?zq{JjI@lk7TdVwC)3(;L{qeW8M!VVj{kzwb zozGoQHV-xr$r-S9VC%rv!GGsE$m_W$ujib+o@?@Yj@i}cw>+~vvpoCn2EB(0hN?*|Ibbfd2cWy#76q^6zF;jyXL+-G<&Ayb z_IW2~z}A7S16v1n9as)n4pZNJ3rANHDE;|D|>70CMy#q_J8F16?ZO5B+pj#%lSFy zcdut<;>Vsd=Z-D2IOl!1@yiq6{(0sJ&rMpDbN+UN7k*Uz+l7hb(cb8`A~AcPf=8Cv ze|h5Fn>%K|lm&^@+iWx6zpv$q{HYhRBIo!$hL-Bz`mqIx{ZemaS)xn!Y&_6;RpPO? zPI}?SeHSO%q+ZCPM77j}E=}B)e7U!-&Y3?quZt6vlGD?3S>l?jlm9GCR7k#zM;0bt zubSL(VWMyLti4usW#X!_WzKu)ou3mgzmt4{3;n#!&zU!w4>#|N63I`k!SqFm=O!iJ z*>e9r=OQ<%wR&X*+L0A~pR37bTMK?yZv+`Oj@>&hg}Z zC68YI>qQAyS0<(|IpV3xQ|J5hEKZDByz%Z;pDauqmKv}uaYy#Mcxb``zaHi%GE)m? zVWM~TUQ3Qxnpm6s_-ilp^E*H1^_-UaV3+31&&jD5yuh!E`H9prd|*jpdG@Lq_{_qD zY_F4As0;jhTAo<_N%!neZ0r2GbvHk;zHV~CF3nj_scCp=UXEw9O)iurIj_sDgZYWn z^xZz+uj3_&P1*0Z`_Ki6#PE={=OEY4Zq*^4=Qi7iSrOZ~I; z^{1w-yL@ho{PBLvnGd&)=On|(uX7;;jJ9u3p^>ICw<*)y&#QX0h|JmfnuTQu< z%St3y^~||h{`20LI3smMulMti7P;#(%U|c46RG9)!kUDe|1AHxY)VW?P1GizE9(<( zJ!JXMXN!Mbn-d>bPtMGGf8B3Rxa%{^&(lV~Ue+a2an@i<&T-wk+~kkD)_<;B63JA% zdsSNGKF2>3?sHk6sFe!Be3838vJx&wHv7+iyoYyU{>(k`2ZN14Kf3?5fvV2Z#@z>o(zpno9uTS3hW}g!q6Unsu^UU== zSGM@;M&^BE;`3y@?zui^eq4@i_Q&0jvp(E)nijp9eYd#umCZgMH~6~Vf6n=$WO}@C z}^?9+>->;VV`{4?|-zxt( ztVp=yFZ1hbwSV8Ma_;-?^I75VhszVG+vZy<{ks3dUw2CrZk?~pxlg+L->QULN6T~W zuP#@XCsL0AZ>;lq@rQpdSe{7TE+1N+Gv6+^R^{A3+;hUBocpu8&gc2hVWI#07vO-T&t09N*>GVxNam%AonME{&C}f$$1@e zUw6+_^L_5E@~>-Q&i&b~N4alry`Rf{UG7`%d2X@4u2$tdS4~X4fQA0`&r75plm1+k zb6=PHjJu!BPb8mKu3xd(uZOhAJ;!bGIkP$EI(PG)7Ns6px%Q?%$hp@=dNI zx4yO{Zc09;6yB1P&+c_@$+;i7>u-yHom>2UGb`tQP2!;_f-({Zf%zAH^bfJ(h~x^V@QNzm$AimXp8kKDaFB`OxLd zvYh8}mtRFAcO4Xo-1S+|AHSe~?kN?ej&n!lD0Mr(rc{*r{xPv~)Pe=KJTxUwp1C_pevJA1w8Gx57Uc7K_~dxmYC6m2N|&$UVzn;~qXG?wl$o-~)d(N5HI7)pVJgtHIJeD<% z-19(jw=No$j$FPLk5bp|;iV&YKTC_;^J|v>p11Y6)O}Ab>*oE6vXT3qS2jw0E|-^! z+QeR^V?h(1~r$r<8bA_UjyT2Cq>%Bze?w{#?dUgS8% z{rgP!_qTLEAL%*k#I57>$URS%^y{L8o7Y>@-Mm&W;d8G<+2u>SDmlm5ko+;#>`HE)&aLP_-%3#`PkUE#Ieb(NzuqcE?)jif&ihZ@ zzq-`&*Soy0D*pcAU+;Id-TFVfieCp+{CrmS^HI^|-Ln;a{#0`JsiQ0T&$+r=XOCBk z+;dE2H?B@)H(y^>ch7SNRd(z02f1HVa`%m2Yr1*7zjEZB%PL3i=Yy5}eW%G@ zRocJbGXC={96Z^OZj;%<*&;! zZoSVb=|8vP{&~8nzwR^L=QO^HyYBYw?4B>~pB<&X=Y81Az28F9qty4^S5J2Lt^O0E z)Q_X5w{`36n}l0OS@lx$@YBn4-1Rtosyknc&MxOZo9^ygRa-}??^ELrai7P81Kf4E zp|88nZadlK#T5;s)MM*~=eYOTsX=NTe0soaw?4jUmPWKoA(>fckAGk?(Xw^tX*n8FSu@w`#d*xcCUA6 zTX+Ag(#(C1$4747>U4DdM8dtErZe38UU;6H$8S$f-B+(%Feh?9PHpI}%K_cob@g^% zcir53p<6F~4s?0dEW_PjKWrSOey;uUcz2v{ZgbBghxT&oqi0+9^Q{XL?)83qs9Rqz zpX=7?`xD&#{oda0bFbgreQ#WTwVT)W=S8XKy!YC;JUXLIl=_~K)yUmfUhC-A_si|v zeQn}-F8A}bcI)EJmTp~Kb7*RvUq13?cYT&T$L-g)otx)EP2Kxl)zrPtA4j|UMOt_F zzMt*u@@xJ@{<>)9Ue`lo+`LY1F*2OWbwZ{ro8P-22k`E;sHxA@#bSK5MSe zzZULwZ<^-b_o##2^`1Y&t+zLNx$CAy2Y3ATUETTL=;-ddZy)BKi(0pI*X3QuM5&+a zJlVmm(^IbW_pSQwdFP$_ZoNExk(J%z06)bAf=T;S&Kj?S(>?J~EX3eR!R9eW*-I)AHrv)pxmeh0sPdb)X9 z-Oc6gtP9=q*Dd@bKSb>Gs3-&lX|)1d~~C`UM?NxpF=WI`&AfznS1Viw~MleDrJwH^Mo4W2QHJp`N7x`Yk++F8;3{0K(%9n0&dEBX+ z%Z)z|ICZsdMkXmd%i#62)B-Jp5b!s_nGeg-RUy-dP~o7pL35+?zksi z=&rA6?)6^S>Q;B&9&_FGl$|&3`M0CXf$m-1`Hwl%9j9!vyXT&=7rDIb*T=1|S+m^wy=tbrzTdjUt()6Ny6+W*E_Um0@D=X5>Nv+; z$73%@T^~2qxy6k;ZKlhKio@M{yx+Z#tM<9weGj_7r+a?+Xq11BTH}9@w#NN@W_>9) zZ--R#&mZgj&tKO1@4uzodDd6;_y08s_k6I{{oL&A((XEcu&Vz)wATOJ=8r_``@nap z-&6mzWy^}r?s+Sp|2c>}?^km7`7`$S&n@}f`YN36)_IXiZe4xb*yZ-(GeNX7i~ zX#xLyxjFgzQu7p7-Mpo_-07At=eb*+SKYdh-+Sfr&rJFtL%UM}GBwtNBq{WqWg zK9|q0!~FiZ`Tg@@KL1>s&t0ESm-FB6i}-Ox-2L#lBFWD=`=71P(eCvY@Zal-y8G2* z1zpa}&F8-Fx31xzGt0X?`lh`9-nN&2zRvI0SwYudlFxsB<^AvH%J}b5C8O+<|9|H% z^*p+GUw8kytDb*OtLdKq<~8#_f2iiJ=buXZ=bNT(9^b9$zb_PZxpqT&m&fN<@UOq2 z|9MX}_j!KYIC8&Nsp7t8b}8-h?}VoQzF)&V4@@ua-)~v}eYcjI_oWTp`HNR^Ido`4 zcmF6~)xVAsZeAa2=D+{dbl2zcrQQ3OQqFxonRWc@tmUuss{VD=_dhqNmYlb=w6n`5 z>;HHDQa>+xzLcA<{1x2yxDqw|`aQtsR1N>Vxtja@UTNlXVMJXwkHxCH=e#Ey`t?@E zt-mMh`s=8s`}xHsjor_a3VAQD?sEH+7VdmU)OGi#+p4+!o^I}*??0*FexB2+iGQE6 ze`ELafy|ojIXkNEa{unuetsJH=e+vv`~%y&*Z2H>?mB(DrvLs}%iRa|Y~`QpYy0EY z$#ecD4INo@WU+2(Q#0}no!&e1h`QbKb<5ZNl!DVn9zLV*OEuEcs%C!z$$zS*{=Ze5 z2e%Mx9(Ip;xT&~%pH^)ik~3iIVE0-FRo&aQ`@hXYY6iA42g=6#Zzs?GWTQ@cj|E_u1)p@t8*UPSc|Nr*$@9NL*-+umo z`{%!_U;n@T^WW94U;Tkk@8lo&q^JJCr^Jk7GyBx-mS+EFGhj1dGY~%m{r5~uOKaMw zaq6GUzGMb@B-xMbPYxiDB9A7IA&(^ol7q;>#_PH9MsB>B8*k;t+qv;hZoHct@8!n(x$!}6e3%;_<;KUk@kwrc z8h>Z}Yq#(+{*3&b{DS=AVK3_3-EPSK!KEa(&uI_W{DM9?W2bV>!C(m|(8&?y^q$_1Uh zf=>CMvv<(hC+JiNI{OBl{en)#pi?R6R1P{-f=<<-Q!VIJ4>~o1PR*cGE9le?I(33h z-Jnx1=+qB74T4U?ptFC_X%uu02s({}PLrV1H0U%7I?aPli=cC0&}kWTS_PffLFb^L z(*-av$Zs${ET>D)&?FuRK8cDCMJ-k5N8Wd7$zj<-y8Bl!q!0Qy#87 zLisr5%43wrDvwhhubinoL3yI`B<0D7JmH0)^J36> zDd@Z$bY2NMuLhmhg3jwf=Z&EAX3%*n=)4_t-U&MI2A%hU&ilzuuIG&pls{DdNcm&s zPn17Z{!ICEuIbFGga!KV<%B7XdD3?_(r@WVPdF8#8_ff8(ysz?p$`zF>DOXmmqFhzE znsRmJ8p<`5Ybn=OuA^L6xt?-;7Ic~iofbjo zz@XDI=(Gwtt%J@%L8nd7IXLJX5_Ap?I*FhY1)a7*r(Mu#A9OkdosL1LQ_$%gbh-qc zu0iLppwlhr93FJK2b~^4r)SXV6?BdWI=zEVpP=wu{2x$^W#<$lWjl?NyvrF^vV zG0Mj(4^$qcJXm>%@=)br%EOgMC?BVMyz)rpQOYMMpQt=qd5rQ{<#Ed6l{1wmC{I+L zq&!*qB;}KpPf?zte5&$k%BL%zk?p_sw}f_dn*o~vn*p1F|78aFtb1lMz47#WR?s;+ z=$sRD&J8-}1)cMQ&eWhYE$B=SIu`_;8A0d5pmR~sxj5*|3_6ztolAqxte|sQ(78P5 z%nmwN1f4lS=gOcnH|Sgybgm9M*94twgU)q9=lY;?L(sV~=-d=^ZVo!P1f5%h&TT>G z_Mme|(77|{+!b{04m$S)oqL1MeL?5`pz}b`c`)ca6m%XAI*$aMM}y8|LFe(H^F+{j zGUz-Nbe;}6&jg)kgU)k7=lP)XLeP0J=)4qkUJg311f5rd&TB#E^`P@c(0McHycKlb z4m$4yop*!IdqL;@pz}e{`7r2w6m&igI-dlcPlL{9LFeCPehxbGg3kP)vmod!3_6Q~&f=i+OVIf>==>IRmIR%pL1$Ue zSsrv&1f7*ZXI0Qyo$TcLslpoNwaV+1*DL?7{D<$k$HI-{A*H*5hTvxfCa((3n$_ zd9d;j<)O;Ml!q&iP(Dui_@wd8dt}fV6?9GrIwuC5(LrZS&>0(a#s!`6K_@flOb9v? zgU+O&Gdbv-6m(8bc5>YxPf?zte5&$k%BL%zp?s$DS;}WCpQC)P@_EYVD^FFPraV1q ztd9$V&WxaQVbHlK=v*9hW(J*0g3hHuXI9X;Ea+SwbY=&gD}v6PpmSx=nHzMj3OZK@ zooj;5wL#~)pmTlDxgqG>7<6t5IyVQMTY}E5LFcxhb9>OaBk0^2bnXf|cL$w&g3i4` z=f0qGf6#d#=scM0jfA5(r@`3dDGm7h|6TKO5}XO*8*eqQ+nbamETf+TlpR3ca`5$eqZ?mY6QC2J(a;!ZEHn-p4`o6Vpo!2VXfkvXbTV`bGzB^pIt@AHgp9v2f7lP3ta_W4P66W3tb0Y z58VLW2;BtT4BZ0V3f%_X4&4FW3Ec(V4c!CX3*86Z4?O@q2t5Qn3_Sup3Oxor4m|-q z2|Wcp4Lt)r3q1!t54`}r2)zWo47~!q3cUus4!r@r3B3iq4ZQ=s3%v)u4}AcA2z>;7 z41EH93VjBB4t)WA34H~94SfTB3w;ND5B&iB2>k^849$b)LkpmV&?0Ct^b7PW^c%DU zS_&QE~6jVA}JO8_q9dUbRpnvxIbKfP);IkINMSb%%OD zJ)vIE5m0Za57ZaRfR2RvLH(ft&{5FQ&@s@l&_HMqG#DBJ4TXk5!=Vw-anSM5NN5yv z0(2rY8X5zQg~mbSp-gB3G!dEvO@>Z_PKHi_ra-4cr$MJfXFz8{XF+E}=RoH|=RxN~ zQ=w_lbm#(T26Q2G5p*#$6S@St6q*HH23-!#hOU6-KvzO@p{t;)p=+RPq3fXQp&OtZ zp_`zapp|7B?p>LpXq3@vYp&y_hp`W0ip?T1JXaTekS_CbIet~|4euI`kOQB`Z za%cs#5?TeVhSor9p>@!D=y&K3=uc<^v=Q0_ZHBU-El^ta-`#BeMwAcA4;6q4LWQ8h z&>qm9P!XsoR17K(r9&m4l29qAG*kvE3zdWRg33dCL;FA#pnakJpo&l>s4`RqstQ$u zszWuPnoupMHdF_y3)O?_Lk*yY(Ed;(=m4lO)C6h@HG`T%EuaITmQX9GHFOZv209oz z1UeK-KoQgyY6rE4IzSzvPEcp43)B@l4C)3Q4t0lmKs}*e&=F8?s1MW^%7Bi9`a#LR zBix#21E8azqoHG0!@KVg-(M`ht7b`gwBG_hR%V`h0cS{ho(Z)py|*B&|jUqW9&UqjzO z-$LI(-$Oq@KSDo2KST4N`OpGrA+!iu4E+NA3jGExftEtcpykjCXeG1?S`Dp%)g3{`=uLe-$^P>pPD&+kd|8q2@X&1>}T zzJ2R=lA8EbYC*N3I#6Ax9#kJ{05yd6hZ;c#K#id$P*bQG)EsI79SF6AT0yO$gP=Ch z!O$Vlp-=*fptevus6EsH>Iij$IzwHcuFzpnH|TJvJJbW}3H5@GfOJJToj)IPcj)9Ja210|N!O#$BC^QTj4vm10gN}zrLZhG)pcA3d&=_beG!7aMWkM66 ziO?iyGISDjGIR}*33QdEiLl;0ZpbMdkpo^iI z&?V5N&@AXO=yGT_bOkgAx)PcTT?JhYT?1VUT?btc-2mMP-2~kX-2&YT-3Hwb-2vSR z-38qZ-2>eV-3Q$dJperjJp?@rJpw%nJqA4vJpnxlJq0}tJp(-pJqJAxy#T!ky#&1s zy#l=oy#~Dwy#c)my#>7uy#u`qy$8JyeE@w3eFS|BeFA+7eFl9FeF1$5eFc3DeFJ?9 zeFuFH{Q&(4{RI6C&4cDc3!sJ2B4{!63-l}W8?*#k3N3?{Lo1+_&?;y(v<6xWt%KG> zze9gOe?l9ejnF1&Gn55wfzq=74IW#+7372RLj|CMP$8%=v7u5M zo41-!0xAiWf=WYWpt4XoXfLQdv^TU5Q~}x-+7GG-Re~x*RiLU+HK;mN1F8wtf@(u` zpt?{!s6NyHY6$HQHG&R+8beK>rcg7eIn)9=5NZjvf?7idL2aOep+lfUp#&5`ZJ~Bh zd#D4{5$XhWhPpsqp~IkV(BV*bs0Y*&>IEGE^@jRDeW48KNT?r_{5KM%@+_4M1E8az zqoHG0!@KVg-(M`ht7b`gwBG_hR%V`h0cS{ho(Z)py|*B&YoT?}dgyoP z59m*51GEv^1Z{@0pe;~Z5q$rH@0r-Fti7>CsYI~3KfHjL+MZns3cShDh-u^ z%0lI!y`b{Y-q1c!1!!MrKd2&9391ZLfvQ5)pz2T!s3uekstwhF>O%FP`cMO?A+$f# z2s!|23^jq8Ld~G&Pz&fls3p`2Y7HF(wSf+X4uKAZ5>N!Sh1x;wp$H>9z z4uiTuheO?=9#BuH7jy*F8|nk~g)*Qcp?*+*XaIB+bTo7fbSyLw8Uzi7hCoB1VbE}B z1aus9JTwv-1)Tt$2#touKx3hC(0C{lngC6NCP9;-lc1BKQ=lo(snBWA>ChR_nb29# z+0Z%AxzKsg`Os8o8Z;fc0Ga_^2wenS49$ctfi8t+L6y5$`UCnC+5l~YHbI-AENBaqRutd=p!`q) zs324bDh%xb?FkiuibBPp;!rwN0xAiWf=WYWpt4XoXfLQdv^TU5Q~}x-+7GG-Re~x* zRiLU+HK;mN1F8wtf@(u`pt?{!s6NyHY6$HQHG&R+8beK>rcg7eIn)9=5NZjvf?7id zL2aOep+lfUp#&5`ZJ~Bhd#D4{5$XhWhPpsqp~IkV(BV*bs0Y*&>IEGE^@jRDeW48K zNT?sw9~uB11sx3?104$uga$!_p&`&vXc#mc8UY;#9S@C!MnNY)Cqkp4G0<3O95f!v zgeE`}p-Ir>Y-RtibLRXz&rZVDlc7_fDbT6VY0&A=8PJ)~SXLRA?GB z9l8LT0bK}P1YHcxgf4+Dg=RsQL6<|bp(~&{(3Q|!=ql)H=o;u+=sM_n=mzLU=qBi9 z=oaW!=r-tf=nm*k=q~7P=pN`^=sxIv=mF?K=ppD~=n?2q=rQPV=n3da=qc!F=o#o) z=sD1N=pE=?=soCt=mY3O=p*Q3=o9Eu=ribZ z=nLpe=qu=J=o{!;=sW0p=m+S>q_Y2tFs*r$*3FwWZa(2B-S}BI=IO?K-B_R-3w2|W zZYPH;U*+QQat}8^v`a zT{lYTMoHZ$r5mMnql|8p)s1qxv6pU?*Nwe(V;|k9pd0(@#(ug{Q8y~-MrGZoq8n9p zqnd71*NqywQByZ+=|*kcsG}Qob)%kc)Ypv$y3tTK_ScO@x^aMRG}etKy3tfOn(0P! z-Dsg32kJ&k-Dss7t##ud-Dsm52kXWmx^bv(By=OvjkdbcPB+@?MhD&Ks2iPhqqA;w z(T%RUahPs&(~ZM*qq}bO(2btD(Mvau(2d@@(MLD>>PCic9H|@qbfdp+4A70EbmM5< zI7T;))s2C=F-SKC>&6h>7^)k?bYr+~jL?nabmMs47^xehbmIiwI8ir7>&6(}7^@rO zbYr}3Wa`EQ-I%BwlXPRUZk(hWC+o&3x-msJPSuUmbmMg0I72ti)Qz)r<80kHM>o#Z zjq`NleBGF;8`E@Sx^7&c8#8p{LfyDXH!jwVnYwX_Zd|GxvvlJ!-MCygX6wckx-myL zuGEdWx^b0mT&)||=*G3lM$XTXu7j?JZh&rtZh~%xZh>xvZi8-z?tt!u?t<=y?t$)w z?t|`!9)KQ%9)cc*9)TW(9)li-o`9Z&o`Rl+o`If)o`ar;UVvVNUV>hRUV&bPUV~nT z-hkeO-h$qS-htkQ-h?2 zeu936=0WqJ1<*oh5wsZk1^N~G4O#*%g_c3fp%u_dXce>?S_7?x)sVy4>f=qLi`S0Yy+-s2$WE>Hu|wIzgSGE>KtKFsK`J zIMf~L0riA>K}SHnp*~PwC<8hY>Ie0Q20%waM?=Rz$3g?4LC|1m2s9KL1`UTsK*vGH zLnEP4&4xIs=37rL<4V?p> z3!Mj@4^0)N{=3T4py|*B&|jUqW9&UqjzO-$LI(-$Oq@KSDo2KST4N`OpGrA+!iu4E+NA z3jGExftEtcpykjCXeG1?S`Dp%)y$g0#${oLDiueP)(>7R2!-T)rIOo^`Qn(Luh}f5p)357-|AFg_=Rlp%&1AP)n#4 z)EYVnY6Be%9ReK+C7=ju3$=sVLmi-wqOKE0O`OmP>I`*(xCnhv<_Mi{SN&B{RwS=HbR@A%}^G!1xhR7zyGD49P>f>p#o4rs1Q^b+5_4X zDgqUSib2Jpbf^SW5-J6ihRQ%?p>oh(P9me1#}?P5^4pth7N+-KnFvIK!-vJ zD1zET?V$Eh2dE>|3F-`Wfx1G6LEWIkq3%!*s3+75Is)nq^?~|A8PJhXKd3)606Gdf z8af6#He3F`<(SIFf!I0-8Vn7AhC;)j;m`=^IOuq2Bs2;-0Xh*H4UK`uLgS$EP$o11 zng~sTCPODdCqt(|Q=n6!)1cF#GoUk}v!Jt~bD(pf^Puygsn9fNI&=Xv1G*5p2)Y=W z30(qR3eAEpgD!_=Lsvj^pev!d&{fdY&^6Gt&~?!D&<)Uy&`r?I&@Ird&~4D|&>hg7 z&|T2o&^^$-(0$PT&;!te&_mF}&?C^J&|}c!&=b&;&{NRU&@<4p&~wo9&PU3&|A>k&^yq((0kDP&oh(P9me1#}?P5^4pth7N+-KnFvIK!-vJD1zET?V$Eh2dE>|3F-`W zfx1G6LEWIkq3%!*s3+75Is)nq^?~|A8PJhXKd3)606Gdf8af6#78(c*f(AoFprOz( zXgD+iIu1G>8VQYpPJm8?MnhwuvCueZJd_DdfF?qdpvlll(8jUGV}`cD)bukI`jtgCiE8cHuMhkF7zJs zKJ)?fA@mXSG4u)aDfAiiIrIheCG-{aHS`ViE%Y7qJ@f<&Rm4r$`rJ*uVS*RSe7gQeF8`=k|0PPFy2UUbB zL6xB@P*tcJR2`}T)r4w6wV^stU8o*ZA8G(Kg!YFTK?gvMp(apMs2S88Y5^SxwS-zh zt)YXUHqgP)A<&^v0*auvP&=qS)B)-Ub%HuWU7)VeVNf^daHu=f1L_I&f{uWCLw%sW zPzH1))DP+p4Sh$t)VuMC)5^d2YEr>PHzsd9idK; zALI`OK!H#Y)ENqfLZDEn3ls)*g~Fi-s2kKB>H+nHdO^LRK2Tq%AJiWj01bo&L4%aLit%O!VtD!Z}T4)`#9@+qHgf>B&p)Jr>XdAR0+5zo^c0s$L zJr}%jN2RTD7kSpW{MS-G1(V*y13@9cP3yKZJf#O2(p!iS% zC?S*xN(?1|l0wO#&y*fPA5jP$$R_@`nPTKqv_63JJTo210|N!O#$BC^QTj4vm0DLZhJ3&=_beG!7aMO@Jmslc34a6lf|m4Vn(kfM!Co zpxMwIXf8Alnh!027D9`l#n2LHDYOh)4y}MzLaU(F&>Cnhv<_MiZGbjHo1o3m7HBK9 z4cZRvfOba8Ir9H3;U2t8dbji*>AlkXr1wi7kUl7VNcyn!5$U7S$E1%-pO8K&eMATYRr0+{VkbWrrNcyq# z6X~bY&!nGAzmR?@{Yv_^^c(57((k0-OMj65DE&$Lv-B6~uhQS7zf1p+{we)S`nR-G zg8%y*`A6DW+C|z`+D$r&bX4hR($S@3NXL|pB^_Hjj&xk15K$rBg_!lujj`S~`t%TIqDs>7_GBXOzw)omo1IbXMtX(%GeRNavK!C7oM3k91yX zcj0;8wrAtVclrAM*TDpvMS?O}p<)te~SNzj|-l6sH zSC~q2sVrSZx~gFUxoq-#polCCXXN4lhn-9F&}!qnhtn@hP@zN8dCrVF}o-92@daCp^>FLrlq-RRclAbL+M|!UGJn8w;3#1oHFOps? zy+nGc^fKw?(krA_O0SY$Exks1t@JwS_0k)pH%f1k-YmUEdaLv{>Fv@xq<2d1lHM)7 zM|!XHKI#3^2c!>5ACf*SeMI`G^fBq<(kG-(N}rNGEqzA%tn@kQ^U@cjFG^pMzASx3 z`l|Fb>Fd%rq;E>!lD;i{NBXYxJ?Z<>52PPTKazed{Y3hy^fT$_(l4Z6O23kRE&WFN zt@JzT_tGDvKT3a+{w)1P`m6Le>F?4%q<>2PlKw62lu%v)NIOfrNV`hANk@^6DjiKa zx^xWbn9{MNV@t=8jw>BcI=*xQ>4egWq!UXgkxnX|Ogg!A3h9*6siad&r;$!8olZKv zbOz~+(wU?)OJ|YJDxFO_yL1leoYJ|Zb4%xu&MWOMoliQyw1;#7>4MUQqzg+IkuEA- zOuD#q3F(s3rKC$smys?jT~4~ZbOq^((v_quOIMMuDqT&wx^xZcn$oqTYfIOWt}9(n zy1sM+>4wscq#H{&k!~v8OuD&r3+a~9t)yE^w~_XgZY$kR+DqD7y1lfIbO&i)>5kH! zr2VA*r30h`rGun9O9xAbNQX*ykq(pYDjhBzA>B>7yL1ofp3=RfdrS9`?kn9-y1(=Q z>4DONqz6k6ksc~NOnSKV24nmZq!&vskzOjjOnSNW3h9;7tE5*;uaRCWy-s?)^akmT z(wn3=OK*|hD!om5yYvp}ozlCccT4Y)-YdONdcX7m>4VaTqz_9Ukv=MYO!~O=3F(v4 zr=(9ypOHQ*eNOtk^abgQ(wC$!OJ9+`Dt%4*y7Ud{o6@(WZ%f~izAJrC`o8o7>4(yf zq#sK^k$x)uO!~R>3+b2AucTi~zma|`{Z9J5^atsW(x0S1OMj96D*a9RyYvs~pVGgi ze@i%bS&xE(s88YO2?CqFP%U-p>!hY#L`Km zlS(I(PA;87I;C_f>D1C`q|-{LlTI(4K{}&!Ch5%5S){W{XOqq@okKdObS~-K(s`uw zO1n$vlg=;gAzeVapmZVW!qP>gi%J)hE-qa{x}C)0=q{~W|lP)h^LAs)JCF#o2 zRivv*SCg(TT|>I2bS>%H(siWkO4pOFFWo@8p>!kZ#?nosn@Ts6ZZ6$Ix}|h0>DJP1 zq&=nEO1G2tlJ=HvFYP1ULE2ZkqjV=}KWTsI0O>&KAnDH1!O|hpq0(KX!=$@Phf7CD zca!ce-9x&kbT8@N(tV`+O81lQFFindp!6W=!O}ydhe{8V9xgpXdZhFy>Cw_-q{m8+ zlO8WUL3*O}B3R#Pm`W5Jwtk?^epMw(sQKeO3#y?FTFr|q4Xl@#nMZpmr5^_ zUM{^tdZqL#>DAI}q}NKXlU^^qL3*R~Ch5)6Tco#2Z!_r5jk4hhtJ}!Mi`lR$J>C@6@q|Zv9lRhtfLHeTfCF#r3SER2>Uz5Hr zeM9=D^eySz(s!iqO5c;dFa1FJq4Xo^$I?%vpGrTIelGn&`la+M>DSV4q~A)vlYTG# zLHeWgC+W}9U!=cEf0O<${X_bv^e^e((oTux`Mw3~Dk>8R4tq@zp6kd7%G zOFFi69O<~y@ucHRCy-7kok%*dbQ0;L(#fQgOQ(=dDV<6>wR9Tkw9@IM(@STN&M2Kp zI8#S(q_a!skj^QcOFFl79_hT&?$Y_B^Gkb37mzL}T}ZmHbP?&I(#52UOP7!? zDP2msv~(HiveM8jGzq^nEUkgh3ROS-mn9qGE#^`z@dH;`^9 z-AKBzbQ9^O(#@osOSh12DcwrCwR9V4PwBSO?WDb=y`|er`$%_?_Lc4^-AUR{+Fv?A zI#4=Dy0dh!bcl4QbQkF`>8{e@(h<_#q`OP^knSnnOS-poAL+i*{iOR#50D-xJxF@6 z^bqNx(!->OOOKEqDLqPhwDcJ1vC`wD$4gI;o+v#@db0Es>8aAwq^C>Ike(?$OM15S z9O=2z^Q7lXFOXg+y-0eo^b+Z%(#xcmORtb#DZNU1we%Y4wbJXP*Gq4Z-YC6Edb9Kv z>8;Y+q_<1&klrc1OM18T9_hW(`=s|vACNvMeMtJS^bzT!(#NEaOP`QFDSb-%wDcM2 zv(o3J&r4sBz9@Z3`m*#D>8sM$q_0cgkiIE>OZv9-9qGH$_oVMjKahSX{Yd(;^b_f) z($A!yOTUnQDg8?Nwe%b5x6<#V-%Edx{wV!P`m^*G>95k?q`yo5kp3zCOZvC8QxbXp zFYPStBJC>eCLKjOs&q8z=+ZHyV@k)8jx8NWI<9m)>G;wKq!UUfl1?n0L^`Q-GU?>f zDWp?Mr;<)BoklvXbUNwu(ix;PN@tSJES*I=1>c$CDaOP4Yh$h zp|(&v$P4m@+Cx522gn!d2z7$|Ab%(T3WS27&QLHE0);|dpfIQ_6b?l|-JtGJ52z>9 z3+fH^f%-!Ip#IPRXdpBQ8Vn7AhC;)j;m`0p!?7R=ppn7dJH{*o(8Y6>-j znnNw1mQX9GHPi<3gxW&wATP)pY7hB99Uxz*Bh(4Zn&?V?HbOpK!U4yPeH=vu)E$B9M2f7R0gYH8Qpoh>S z=rQyJdI~**o?|iJ>G=QYaae97+MDgi=AN zp)^ohC>@j@$^d1AGC`T4EKpV`8y$g0#${oLDiueP)(>7R2!-T)rIOo^`Qn( zL#Pqd7-|AFg_=Rlp%zd}s1?*2Y6E#fZJ~CM7vv4KhkT$8kT28`>IC^g{!joE2n9i% zp?S_7?x)Vz3?S=M1`=JBS zLFf>47&-zSg^oeTp%c(a=oEAsIs=`B&Ozs)3(!UA5_B250$qi!LD!)h&`szTbQ`(@ z-G%N!_n`;SL+BCo76g=oR!DdIPQD`+CR7Wm z4b_3_LiM2fPy?tT)Cg(}HG!H!&7kH`3#cX33Th3tfjptMP&>#A@`l<&K2Qh97wQOg zg8U$VC;$qCf}qY&FcboXLS3LRs4El>ML^x4?obb?C)5k-4fTQgLj9or&;V#4Gzc0D z4S|M2!=T~N2xufU3K|WKfyP4Ppz+WIXd*NTnhZ^Wrb5%8>Cg;lCNvA04b6e(Li3>c z&;n>7v(CA8CUgtB4c&q6LieEi&;#fp^ay$kJ%OG=&!Fef3+N^E3VIE_f!;#zp!d)R=p*zA z`V4)6zCz!i@6ZqEC-e*Y4LPO2>p#dDa)DeSHz*1e6^aH$hhji6p;%CCC=L`CiU-Aq z5C`tuI8|Gk4O39d<@WKeP_1(Xs>1*L}4Kxv_LPlo84VWrngqS)puD zb|?pw6UqhUhVnppA$KSrlpped3P1&+LQr9-2vig*1{H@&KqaA4P-&H>v9U7>I&0_p~Jhk8Ig zpIe0Q20#O$LC|1m2s9KL1`UTsKqH}1&}e83G!_~MjfW;c6QN1aWM~RB z6`BT3hh{)Cp;^#uXbv`*reF3*rM2~*rwR7*rC{|*rnL5*rV91*r(X9IG{MFIHWkNIHEYJ zIHowRIH5SHIHfqPIHNeLIHx$TxS+VGxTLtOxT3hKxTd(SxS_bIxTUzQxTCnMxTm&oBDo@kBBdggBDEroBCR5wBE2GmBBLUcBC{fkBC8^sBD*4oBBvskBDW%sBCo<- zkx!9d;h`v?D5xl;D6A-=D5@x?D6S}>D5)r=D6J@?D61%^D6goXsHmusH&)@ zsII7?sHv!>sI91@sH>=_sIO?CXsBqUXsl?WXsT$YXs&3XXsKwWXsu|Y@Km%_v{QH~ zycO*gK8g+sUqwenCxxHFUlE`PR0JtHD}ogvicm!tMVO+iB3u!n=%(nd=%MJT=%whb z=%eVX=%?tf7@!!a7^E1i7@`=e7^WDm7@-)c7^N7k7^4`g7^fJon4p-bn53Ajn4*}f zn5LMnn4y@dn5CGln4_4hn5USpSfE&_Sfp62SfW^}Sf*I6SfN;{SfyC4Sfg00Sf^O8 z*r3>`*reF3*rM2~*rwR7*rC{|*rnL5*rV91*r(X9IG{MFIHWkNIHEYJIHowRIH5SH zIHfqPIHNeLIHx$TxS+VGxTLtOxT3hKxTd(SxS_bIxTUzQxTCnMxTmDNRgq1RU6DhPQ;|!NTaibRSK+S6r^v7HP!v!U zR1{JaRuoYbRTNVcSCmkcRFqPbR+LecRg_bdS5#0`R8&$_R#Z_`Ra8?{SJY6{RMb+` zR@70{Rn$||S2R#GR5VgFRy0vGRWwsHSF}*HRJ2mGRAKQ1n#vQuJ2zQS?>xQ}kC1 zPz+QIQVdoMQ4CcKQw&#(P>fWJQjAuNQH)iLQ;b(kP)t-zQcPA%QA|}#Q%qOPP|Q@! zQp{G&QOs4$Q_NQ^P%Km|QY=<1Q7ly~Q!H1kP^?s}Qmj_2QLI(0Q><5PP;69eQfyXi zQEXLgQ*2l4Q0!FfQtVdjQS4RhQ|wn9P#jboQXEzsQ5;nqQyf>EP@GhpQk+(tQJhtr zQ=C^^P+U}8Qe0MCQCwAAQ(RZvP~249QruSDQQTGBQ`}cPP&`yTQan~XQ9M;VQ#@C^ zP`p&UQoL5YQM^^WQ@mGvP<&K;QhZi?QG8W=Q+!waQ2bQ zh@yz9h^C0Hh@pt7h^2_Fh@*(Bh^L6JNT5ilNTf)tNTNupNTx`xNTEonNTo=vNTW!r zNT*1z$e_rm$fU@u$fC%q$fn4y$f3xo$fd}w$fL-sa98A0L}_e>M80g z8Ymhn8YvnpnkbqonkkwqS}0m7S}9s9+9*5~Z58blUJ7qTdxejpgThzQQPD}^r|?$< zC;}Bhiq48)MTjC)(M1ua=&A@;L@2r`x+{7pdMbJ;dMo-U`YQS<`YQ$~1}X+A1}lar zhAM_BhAT!WMk+=rMk~fB#wx}s#w#W$CMqT=CM%{WrYfc>rYmMBW-4YWW-I0><|^hX z<|`H`7Ah7g7Aux0mMWGhmMc~$Rw`C0Rx8#h)+*L1)+;tBHYzqLHY>I$wkozMwkvih zb}Du$b}RNM_A2%%_A3r34k`{Q4l9l*jw+5Rjw?I4fKft_n9r z6h%};G(~hp3`I;uEJbWZ97SA3JVks(0!2baB1K|F5=Bx)GDUJl3PnmqDn)8V8bw+~ zIz@U#21Q0iCPijN7DZM?Hbr(t4nLQzstN>N%-Mp0H#PElS_K~YgrNl{r*MNw5zO;KG@Ls3&vOHo@3P_am{Sg}O0RIyC4T(Ls2Qn5<0TCqm4R*3QL#y}S+Pa2Rk2O6U9m&4 zQ?X02Td_y6SFumAUvWTjP;p3cSaC#gRB=pkTya8iQgKRgT5(2kR&h>oUU5NjQE^Fe zS#d>iRdG#mU2#KkQ*ldiTX9EmS8-2qU-3ZkQ1M9dSn)*hRPjvlT=7EjQt?XhTJc8l zR`E{pUhzTkQSnLfS@A{jRq;*nUGYQlQ}IjjTj7*eU;ika6)p-_g_|OZBB~;qBDx}m zBBmmiBDNxqBCaByBEBMlBB3IYBC#TgBB>&oBDo@kBBdggBDEroBCR5wBE2GmBBLUc zB6B3%{=74^YCx#BUvL&OE18YVPUawUlDWv-WF9gv=}zV&^OGKA0kR-jh%8JNA&Zj5 z$l_!PvLsoGEKQan%aY~D@?-_FB3X&7OjaSQlGVuSWDT+=S&OVq)*_~PZ{YZZ@fD9yq$j)Ri z8A67VUC1!9D;Z8kklo1cWDl|@*^BH=_96R{{mB000CFHXh#X7~A%~K~$l>G&awIv5 z98HcP$CBg7@#F+@A~}hiOim%ElGDiPwA)k`Z$miq>@+J9-d`-R~-;(dh_v8oiBl(H^OnxE1lHbVh9k;TapWJ$6VS(+?EmLyh=z24q9B5!sk*LN+Cvk$sS}+vKQH#>_he?`;q<00pvh(5IL9} zLJlQ|k;BOm{0<5xJOLLM|njk;};y&Xq|MsgFmncPBdCAX2=$sOcQau>Oq z+(Ygq_mTU_1LQ&S5P6t9LLMcLk;lmsw~z9rw0@5vA3NAeT- znfyY2CBKp1$sgoT@)!A=bV|?HzoawiLb{S}WE3(g8I6oi#vo&ovB=nD95OB$kBmyu7A?uR$$ogahvLV@sY)m#Go084Q=41=9CE1E>O|~ID$+l!W(u?#a z+mk+I2hx}9NOmIqNPjYb3?zfd&SWqdLWYuE$S|@i8BRu!-N^1_53(oOi|kGIA^Vd3 z$o}L2av(W~983-&hmym{;p7N%Bsq#4O^zYQlH~av`~hTud$@my*lK<>U%-CAo@RO|Bu=lIzIz=0@)7Tk$tUDf@)`M@ zd_le>Uy-lLH{@IL9r>R8Kz<}Yk)O#g~qze%SId<9H8lP;ty=|)B& zqmt3c=wu8sCK-#2O~xVPlJUs+WCAiFnTSkGCLxoO$;jkn3Nj^`icC$WA=8rS$n<0e zG9#IZ%uHq>vy$1!>|_oyCz*@PP39r^ z$kJpPvMgDSEKgP-E0UGS%48L?Dp`%JPSzl6lC{X%WF4|DS&ytwHXs|4jmXAi6S67U zjBHM}AX}2H$kt>V(vxgUwj;erZ?Zk1TgUG?;5OOFvj2upmAV-p;$kF5&ax6KH z98XRlCz6xM$>bDrDmjguPR<}_lC#L!~5^^cIj9gBxAXk#B z$kpT;axJ-zTu*KwH@-6v}d{2HLKa!uw&*T^KEBTH5PW~W&lE28`q*F$| z{w1AB7t)nCo7N@$x38pvI<$1tVUKRYmha`T4Zgq4q2D1N7g4B zkPXR3WMi@k*_3QXHYZz_&DcdyqZJUSw~w580RONA@QNkORp<oJLM3XOJ@^8RO6Qamohf+TedZpZLMWn zXW7PTedfr?X6{dXW8Ccwhxx=qhHECv}~m;TWQNy#QY~?IldCOM8vQ@Nfl`LCj%T~p* zRkdu@EL(NUR>QK@v~0C3TW!l$$FkM6Z1pT#eaqIsvNg19jVxPZ%htrQHMMNbEL(HS z*21#2v}~;`TWibK#KCqb%EK%QnWcjkRp!EZca?Ho>w@ zv}}_s+hof&#j;JcY||{;bjvowvdxU#qW}5(zdD{?|NZMfbDC{VbIfV3In6Vt`R261 zoEDnXB6C`7PD{*bsW~k(r{(6f!kku`(<*aXZBA>JbJ}fAd(3ICIqfs2{pNJQoDQ1PA#*xxPDjk?s5u=or{m^y z!kkW;(8v@OGpF8&}vGpG0F z^ue4yn$st9`fN^L%;~E+eKV);=Jdmyewx!SbNX#gPMM7_tpAvkvpKn#ldCzonNt*V zifT^L%qhA##W1Is<`m1EVw+PObBb$D@ysc{IVCWsgyxjUoD!Q;5_3vwPRYzExjCgU zr>l*ybjn^P8Z%4$y8%qhD$qeC4>hq{gq^&B1QJ32IQbZF@4 z(8$rDv7$A;{68v!g?>qeFRPqc74PIkTYt3n$IjuLR4d%4boHm)$ zW^>wNPFu}sn>lSaryb_B)0}ph({6LxV@`X`X`eamH>U&UbkLj*nbTo&I$};o&FPpq z9XF>F=5+GU^yl~Y@MGtc`Olp;r!(es)|}3n(|L2cU``j!>5@5JHm57*bk&@$nbUQ1 zx?xT?&FPjo-8QE?=5*Jb?wQklb9!J-56$V3IXyO~C+76joSvD}b8~uOPA|>rl{vjO zr#I&G)|}p%(|dFJU``*+>61BqHm5J<^wpfcnbUW3`e9B#&FPmp{Wd43EXEgsf6U3* zoLtPw)tubSDT+BoHK%Ci6y2O+m{Ux1ie*l*%_)vK#WkmR<`mzY5|~p$b4p}RiOngA zIVClxWagCIoKl!mN^?qOPN~f)jX9+?r*!6&-kdU+Q$};jWKNmQDT_H}HK%Oml--rcg7e zIn)Ab3AKV+Lv0{Ws4dhE@`Ajf_K*+M0rG`9LY*Sz7WsMP__^O0c0Q>_Uc-UCD4Vg6u|iCwq`R$zEh{vJcsp>__$|2ap5FLF8a^2sxA-Mh+)OkR!=a zvy$1!>|_oyCz*@PP39r^$kJpPvMgDSEKgP-E0UGS%48L?Dp`%JPSzl6lC{X% zWF4|DS&ytwHXs`+!-5-;jmaitQ?eP^oNPh1BwLZK$u^`X*_LcadXe5_d(wyOK>Cs$ z$xfsn=}!ibfn*TbnG7aF$WXFNB>(e0qCc<7!~XOA#V~W~YEI$i6k$%?%&EIM^)RQN z=G4oadYe-pbLwkO{miMqISnwUf#x*GoCcfI5OW%8PQ%P;xH*k5r;+9~%A7`<(-?Ca zYfj_LX}md2FsF&;G|8MMo6{6?nrcqd%xStg%`m5#<}}NkW}DL-bDC>T^UP_!IV~`! zh32%#oEDqY5_4K=PRq<`xjC&cr{E^>l($nMzyZdtZQ`Z8pFEAw63wNYi#Qp$GXO~uJNpEeCwLP zx+b))iL7hl$Tjl2_3i%ruG8QoP*NxvlpIO{rG!#Jsi8DbS|}Zq9?Ae^gfc;yp)62V zC>xX=$^qqsazVMFJWyW99m)sghdiJHP(i2=R2V7(6@`jH#i0^VNvIT58Y%;og~~zY zp$bq%s1j5essdGoszKGE8c)A_xlzLGz&n&_ZYtv=~|fErpgr%b^v}N@x|d8d?Lbh1Nmq zp$*VRXcM#<+5&Bbwn5vW9nel_7qlDN1MP+OLHnTt&_U=BbQn4U9fgiT$DtF@N$3=G z8ae}=h0a0ep$pJO=n`}px&mE=u0hwK8_-SY7IYiB1Kox0LHD5t&_n1E^cZ>qJ%ye@ z&!HF4OXwB!8hQi0h2BB$p%2hU=o9oA`T~80zCquiAJ9+e7xWu)%C4_${(+n!7swTI zgQ7rDp=eNaC{R*HzYa zwRK%%UDsOIb=Gygb=_cHH~v}wyi42oFTBb6Pj9xaTdeC=>$=UlZnv&Gtm{tey34xm zwyt}u>t5@+&${l9T%-T_sQkYV$phwe(3}pL(_wQuVopcR>6kekH>VTkbkdwonbT== zI%7^}&FP#uoj0co=5*1VE}7G1bGl+qSIy~~IbAoW8|HM=oNk%ZZF9O~PIt}eo;lq& zrw8Wr(3~Eb(_?dbVop!Z>6tk_H>Vfo^wOMOnbT`?dSgy+&FP&vy*H;1=Je5=KAF>J zbNXUVU(M;8Iej;$ALjJaoPL?pZ*y|WVLS}|V@}TI)=HzBhQOqf-IYl$4=;jo| zoMM_&EOUx&PI1gBt~tdsr}*ZSz?>4AQzCOpY)(ncDXBRnGpFR{l){`+no}xsN^MSQ z%qguor8B4W=9IylGMZB+bINQ^SL^ zBlD9UWC5}uS%@r579oq0#mM4h39=+viY!f*ASPVF zCRvNDP1YgnlJ&^?WCOAx*@$dRHX)mm&B*3t3$i8Iifm1`Aw9{qWINJ}^d{SrK4b^d zm+VM(BK=5zGJp&ugUHTgFd0IIl3mC!vMU)*Mv&de?qmw3r;^ji>EsM@COM0oP0k_b zlJm&<(2pxhBLr0*a&@t#ZbOJgFoq|q7XP~ptIp{oe z0lElXf-XZ>psUa|=sI)*x(VHaZbNsVyU;!8ex&~MJoZ1of93)CkbFcwCZCW`$!Fwq z@&);ld_}${-;i&~cjSBW1No8sM1Cf}kYCAfM$B<*m zapZV%0y&YKL{28BkWw<-1G$mhL~bUxkXy-ZwA)k`Z$miq>@+J9-d`-R~-;(dh_v8oiBl(H^OnxE1lHbVhyr)0hGZkMG1-J{N;V^#lP$=WWGk{Y*@pBa+mh`_FVdTAPx_D@NMEue*@^Tc z{mB3_kPIR_lfh&N8A^5`!^o~=I2l29BfFD5$ev^`vNzd>>`V3|`;!C6f#e`^Fgb)A zN)983lOxEHk#zp^-9wN3!K0){OOKHrD?Ltnyz~U=iPDp#CreL}o+>>}db;!s>6y~A zq-RUdk)A6(PkO%e0_la)i=-DzFOgm zH%o7k-YUIKdb{)v>7CNMq<2g2k=`r4PkO)f0qKL%holcnACW#PeN6he^a<&c(x;?P zOP`TGD}7G-yz~X>i_({*FH2vMzAAl9`nvQD>6_BGq;E^#k-jT^Px`*}1L=p-kE9<< zKaqYa{Y?6~^b6^i(yydnOTUqREB#LTz4Qm^kJ6u{KTCg+{wn=V`n&WG>7UZSq<>30 z<^I3N*ngy*rCp?5rQM{XNJo{9CLLWmhICBnSkkd0JMyPTJo5kf3J?d13&n%tLkXaS zP$DQXlmto&C4-VfDWH^4DkwFS21*O1gVIA8po~x^C^M7=$_izJvO_tboKP+(HrQ~)Xn6@m&wMWCWkF{n6H0xAiWf=WYWpt4Xos612wst8qrDnnJEs!%nk zI#dIy3DtsXLv^6KP(7$V)BtJ-HG&#LO`xVwGpIS#0%{4hf?7juAWx_*)DH53yrK4x z57Ytjg*rl=AV0_-3V;HkAgD7G423|UP!}i+>I#KJ5l}a%JJbW}3H5?{Lw%sWP(P?Y zGyob14T1(kL!hD1Flab50vZ{q|Gxf_9xXjadaU#~>G9GNq$f&GlAbI*MS80AH0kNm zGo)w!=|Are{rBHsmRx2_&yk)hJx_YR^aAOH(u<@QOD~aLD!oj4x%3L@mC~!ES4*#v zUMsy$dcE`p>5bBxq&G`%k=`o3O?tcZ4(XlJyQFtZ?~&dsy-#|-^a1IE(ubrEOCOOw zDt%1)xbz9>lhUW8PfMSXJ}Z4r`n>c7>5Gy5zh{`2pv%w|=qhv#x(?ldZbG-9+t3~8 zE_4sN4?Tb$LXV)w&=cq>^bC3qy?|aqub|h^8|W?c4tfuLfIdQ>pwG}3=qvOM`VReo zenP*X-;h%t{mA+Ua)w+WSI7;D0!4+QLD8WYP)sNm6dQ^I#f9QQ@u37zLMRcG7)k;q zg_1$Zp%hR`C>4}CQvZHEN+X?CI-PWS=?u~tr87xqmd+xbRXUq=cIh0_Ii+(+=Z@_E zJznL3@LWQ8hP!XsoR17K}sek_rN=TQKE+t)Bx{P#L>2lKLr7K8R zl&&OQS-OgJRq1Nd)un4l*Oaa$U0b@2bY1Cs()FbqNH>&jB;8oLiF8xxX41{2TS&K* zZYAAXx{b7_bX)0m(q7Wu((R>vq&rCaN_Uj*B<&~dFC8Eq7}@{(oC$(DL%~o86bf~L z!l15DI1~YOgStaKpq@}Ks5jIH>I?OQ`a=VtfzTjmFf;@j3JrsXLnEM(&?sm$GzJpqbDtXm+IjeLS2aJy&|3^nB?B(hH>*NiUXOBE3|4 zne=k$71ArES4ppyUL(C$dY$xo=?&5wr8h}$mfj-0ReGEBcIh3`JEeC?@0Q*py;pjl z^nU3B(g&pvNgtLzB7Ic)nDlY!6VfN8Pf4GaJ|lfr`keH6=?l^qr7uZemcAl=Rr;Fr zb?F<@H>Gb$-;V76JwD%o?n3vV`_KdEA@m4(3_XFKLeHS*&?|iJ>G=QYaae97+MDgi=ANp)^q1Nd5cuC!KV9=?u~tr87xqmd+xb zRXUq=cIh0_Ii+(+=a$YRombjjI-hiYX%FcF(gmdpNf(wbB3)Fvm~?UJ64E6j`+tvf zrJ&MK8K^8&4k`~-fGR?jpvq7cs47$qst(nFYC^T3+E5*+E>sVy4>f=qLXDutP!p&r z)C_7KseeBgT1dB)ZYAAXx{b7_bX)0m(q7Wu((R>vq&rCaN_Uj*B<&~dFC8EqC>-CMekbYJOy(*30eNDq`ABt2Mqi1bkD zVba5;M@Wy99wj|mdW`f~>2cEIr6)*Fl%6C#S$c}}ROxBb)1_xf&y=1eJzIK?^jztA z((|PkNH3ILB)wRAiS$zGWzx%~S4gjvUM0O+dX4m2>2=cUr8h`#l-?x0S$d1~R_Sfh z+og9%@08vpy<2*Z^j_(G()*2lH) zq$^2Rk*+3PL%No99qD?~4Wt`MH<51EGLHwhK;H^|8}#kacR=3>eHZlI(Dy*!3w6UKM4I0^uy4PKtBro81&=NPe4Bj{S@@m(9b|W3;i7Q^UyCqzX<&j^vlq%K)(w8 z8uaVXZ$Q5Z{TB4wx|5Fw?~vXly+?YV^a1HZ(nq9^NuQ8DC4ENvob(0hOVU@QuSwsK zz9oG}`kwRy=||E}q@PK@kbWioM*5xf2kB4JU!=cD|B(JAIed7fousWuTa(6+Qk25c zP`J->scI3Iq{gjOJJZE28Ggq|z!}=uYo+Pi{7!%Ch-8oUS?TsBl~N_uQX?Lzl{%@H z25FQgX_gjgm9a8T#>)ipN}KqkT{@&wx};ks$z+)#Q)QY=ml-lsX31=sBXeb*%$J4g zP-M}P*oI!qtv*2FXfn2NG@0gToQb2!(i>0gPwt$Z+&O0pJLj66vl2V!*_-4_p5#k` z6iSg4ONo?9nUqU~EcpMB^3{gnxK*G=z}4pUb$WePp+ej$((fWLrhi>HVil{Flz=NV zyk=l!#41tjvr6>_ajVSUE^XBcE2!QYvC8$fc$O>l28pxWGrD}S)8WWe|F5N2R$@4^ z&CD?~*UUUK^UW+Uv(U^UGmFhEF|*XnGBeA~tT40E%qlah&GeXAYi6C9^=3Ah*=S~y znayUZ*R!;jdL<2esb~ANmwMI{d#V4avzPiSPL5YH3~!aj?2`Jp5%^n^=kFa>guUGR?lca%tv;pt7tF%Wmhl#l9YPshiSd^6VdAB hN(@J;nQ3OGo9Q+)!^|!-qoKr+^~S=nNHi4k{s#j>---YL From e4e3663e614b94ca81e92672e14463f22b975242 Mon Sep 17 00:00:00 2001 From: Cedric Nugteren Date: Sat, 23 Jan 2016 16:09:07 +0100 Subject: [PATCH 09/50] Updated FindOpenCL for Intel Linux OpenCL paths --- cmake/Modules/FindOpenCL.cmake | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cmake/Modules/FindOpenCL.cmake b/cmake/Modules/FindOpenCL.cmake index 2a4c583c..c3f4c105 100644 --- a/cmake/Modules/FindOpenCL.cmake +++ b/cmake/Modules/FindOpenCL.cmake @@ -34,6 +34,7 @@ set(OPENCL_HINTS set(OPENCL_PATHS /usr/local/cuda /opt/cuda + /opt/intel/opencl /usr /usr/local ) @@ -52,7 +53,7 @@ mark_as_advanced(OPENCL_INCLUDE_DIRS) find_library(OPENCL_LIBRARIES NAMES OpenCL HINTS ${OPENCL_HINTS} - PATH_SUFFIXES lib lib64 lib/x86_64 lib/x64 lib/x86 lib/Win32 OpenCL/common/lib/x64 + PATH_SUFFIXES lib lib64 lib/x86_64 lib/x86_64/sdk lib/x64 lib/x86 lib/Win32 OpenCL/common/lib/x64 PATHS ${OPENCL_PATHS} DOC "OpenCL library" ) From f0b3091cdbb4465d4a3dd63ade52f061c3be60be Mon Sep 17 00:00:00 2001 From: Cedric Nugteren Date: Sun, 24 Jan 2016 17:35:31 +0100 Subject: [PATCH 10/50] Added Python function to compute defaults for a particular device/vendor combination --- scripts/database/database.py | 35 ++++++++++++++++++++++++++--------- 1 file changed, 26 insertions(+), 9 deletions(-) diff --git a/scripts/database/database.py b/scripts/database/database.py index 4ce26277..8d8f6c40 100644 --- a/scripts/database/database.py +++ b/scripts/database/database.py @@ -20,9 +20,11 @@ import json import pandas as pd # Constants -ATTRIBUTES = ["device", "device_vendor", "device_type", "device_core_clock", "device_compute_units", - "precision", "kernel_family", - "arg_m", "arg_n", "arg_k", "arg_alpha", "arg_beta"] +DEVICETYPE_ATTRIBUTES = ["device_vendor", "device_type"] +DEVICE_ATTRIBUTES = ["device", "device_core_clock", "device_compute_units"] +KERNEL_ATTRIBUTES = ["precision", "kernel_family", + "arg_m", "arg_n", "arg_k", "arg_alpha", "arg_beta"] +ATTRIBUTES = DEVICE_ATTRIBUTES + DEVICETYPE_ATTRIBUTES + KERNEL_ATTRIBUTES # Pandas options pd.set_option('display.width', 1000) @@ -62,7 +64,7 @@ def ConcatenateData(df1, df2): def RemoveDuplicates(df): return df.drop_duplicates() -# Bests +# Retrieves the results with the lowest execution times def GetBestResults(df): dfbest = pd.DataFrame() grouped = df.groupby(ATTRIBUTES+["kernel"]) @@ -71,6 +73,21 @@ def GetBestResults(df): dfbest = ConcatenateData(dfbest, bestcase) return dfbest +# Sets defaults for devices of the same type/vendor based on the smallest values of all know +# entries. The average might be better for performance but some parameters might not be supported +# on other devices. +def CalculateDefaults(df): + dfdefault = pd.DataFrame() + grouped = df.groupby(DEVICETYPE_ATTRIBUTES + KERNEL_ATTRIBUTES) + for name, dfgroup in grouped: + defaultValues = dfgroup.min(axis=0) + defaultValues["device"] = "default" + defaultValues["device_compute_units"] = 0 + defaultValues["device_core_clock"] = 0 + defaultValues["time"] = 0.0 + dfdefault = dfdefault.append(defaultValues, ignore_index=True) + return dfdefault + # ================================================================================================== # C++ header generation # ================================================================================================== @@ -112,7 +129,7 @@ def GetPrecision(family, precision): # The C++ device type and vendor def GetDeviceVendor(vendor, devtype): - return(" { // %s %ss\n kDeviceType%s, kDeviceVendor%s, {\n" + return(" { // %s %ss\n kDeviceType%s, \"%s\", {\n" % (vendor, devtype, devtype, vendor)) # Prints the data to a C++ database @@ -132,7 +149,7 @@ def PrintData(df): f.write(GetDeviceVendor(vendor, devtype)) for device, dfdevice in dfdevtype.groupby(["device"]): devicename = "\"%s\"," % device - f.write(" { %-20s { " % devicename) + f.write(" { %-48s { " % devicename) # Collects the paramaters for this case and prints them parameters = [] @@ -200,9 +217,9 @@ SaveDatabase(database, file_db) # Retrieves the best performing results bests = GetBestResults(database) -# TODO: Determines the defaults for other vendors and per vendor -#defaults = CalculateDefaults(bests) -#bests = ConcatenateData(bests, defaults) +# Determines the defaults for other vendors and per vendor +defaults = CalculateDefaults(bests) +bests = ConcatenateData(bests, defaults) # Outputs the data as a C++ database PrintData(bests) From 76c91480303dd398b4ff5953a833e493b1409630 Mon Sep 17 00:00:00 2001 From: Cedric Nugteren Date: Sun, 24 Jan 2016 17:56:27 +0100 Subject: [PATCH 11/50] Minor improvements to the database script, including proper file paths --- scripts/database/database.py | 32 ++++++++++++++++++-------------- 1 file changed, 18 insertions(+), 14 deletions(-) diff --git a/scripts/database/database.py b/scripts/database/database.py index 8d8f6c40..cda55fcc 100644 --- a/scripts/database/database.py +++ b/scripts/database/database.py @@ -64,6 +64,9 @@ def ConcatenateData(df1, df2): def RemoveDuplicates(df): return df.drop_duplicates() +def RemoveEntriesByDevice(df, devicename): + return df[df["device"] != devicename] + # Retrieves the results with the lowest execution times def GetBestResults(df): dfbest = pd.DataFrame() @@ -80,12 +83,12 @@ def CalculateDefaults(df): dfdefault = pd.DataFrame() grouped = df.groupby(DEVICETYPE_ATTRIBUTES + KERNEL_ATTRIBUTES) for name, dfgroup in grouped: - defaultValues = dfgroup.min(axis=0) - defaultValues["device"] = "default" - defaultValues["device_compute_units"] = 0 - defaultValues["device_core_clock"] = 0 - defaultValues["time"] = 0.0 - dfdefault = dfdefault.append(defaultValues, ignore_index=True) + default_values = dfgroup.min(axis=0) + default_values["device"] = "default" + default_values["device_compute_units"] = 0 + default_values["device_core_clock"] = 0 + default_values["time"] = 0.0 + dfdefault = dfdefault.append(default_values, ignore_index=True) return dfdefault # ================================================================================================== @@ -133,12 +136,12 @@ def GetDeviceVendor(vendor, devtype): % (vendor, devtype, devtype, vendor)) # Prints the data to a C++ database -def PrintData(df): +def PrintData(df, outputdir): # Iterates over the kernel families: creates a new file per family for family, dffamily in df.groupby(["kernel_family"]): dffamily = dffamily.dropna(axis=1, how='all') - f = open(family+'.h', 'w+') + f = open(os.path.join(outputdir, family+'.h'), 'w+') f.write(GetHeader(family)) # Loops over the different entries for this family and prints their headers @@ -177,17 +180,16 @@ if len(sys.argv) != 3: # Parses the command-line arguments path_json = sys.argv[1] path_clblast = sys.argv[2] -file_db = path_clblast+"/scripts/database/database.db" -glob_json = path_json+"/*.json" +file_db = os.path.join(path_clblast, "scripts", "database", "database.db") +glob_json = os.path.join(path_json, "*.json") # Checks whether the command-line arguments are valid; exists otherwise -clblast_h = path_clblast+"/include/clblast.h" # Not used but just for validation +clblast_h = os.path.join(path_clblast, "include", "clblast.h") # Not used but just for validation if not os.path.isfile(clblast_h): print "[ERROR] The path '"+path_clblast+"' does not point to the root of the CLBlast library" sys.exit() if len(glob.glob(glob_json)) < 1: - print "[ERROR] The path '"+path_json+"' does not contain any JSON files" - sys.exit() + print "## The path '"+path_json+"' does not contain any JSON files" # ================================================================================================== # The main body of the script @@ -222,6 +224,8 @@ defaults = CalculateDefaults(bests) bests = ConcatenateData(bests, defaults) # Outputs the data as a C++ database -PrintData(bests) +path_cpp_database = os.path.join(path_clblast, "include", "internal", "database") +print "## Producing a C++ database in '"+path_cpp_database+"'" +PrintData(bests, ".") # ================================================================================================== From 276e772a2c672ad868ba57e06d55e4991c793207 Mon Sep 17 00:00:00 2001 From: Cedric Nugteren Date: Sat, 30 Jan 2016 11:43:21 +0100 Subject: [PATCH 12/50] Added first auto-generated database headers from the Python database; only K40 and Iris supported now --- include/internal/database.h | 11 +- include/internal/database/copy.h | 126 ++++++++------------- include/internal/database/pad.h | 126 ++++++++------------- include/internal/database/padtranspose.h | 118 ++++++++------------ include/internal/database/transpose.h | 134 +++++++++-------------- include/internal/database/xaxpy.h | 127 +++++++++------------ include/internal/database/xdot.h | 111 +++++++++---------- include/internal/database/xgemm.h | 131 +++++++++------------- include/internal/database/xgemv.h | 119 +++++++------------- scripts/database/database.db | Bin 2093996 -> 2094065 bytes scripts/database/database.py | 42 +++++-- src/database.cc | 8 +- src/routines/level3/xgemm.cc | 2 +- src/routines/level3/xher2k.cc | 2 +- src/routines/level3/xherk.cc | 2 +- src/routines/level3/xsyr2k.cc | 2 +- src/routines/level3/xsyrk.cc | 2 +- 17 files changed, 424 insertions(+), 639 deletions(-) diff --git a/include/internal/database.h b/include/internal/database.h index 1ac0e646..9107f978 100644 --- a/include/internal/database.h +++ b/include/internal/database.h @@ -56,15 +56,8 @@ class Database { static constexpr auto kDeviceTypeAll = "default"; // The OpenCL device vendors - static constexpr auto kDeviceVendorNVIDIA = "NVIDIA Corporation"; - static constexpr auto kDeviceVendorAMD = "Advanced Micro Devices, Inc."; - static constexpr auto kDeviceVendorIntel = "Intel"; static constexpr auto kDeviceVendorAll = "default"; - // The OpenCL device names - static constexpr auto kDefaultDevice = "default"; - - // The database consists of separate database entries, stored together in a vector static const DatabaseEntry XaxpySingle, XaxpyDouble, XaxpyComplexSingle, XaxpyComplexDouble; static const DatabaseEntry XdotSingle, XdotDouble, XdotComplexSingle, XdotComplexDouble; @@ -72,8 +65,8 @@ class Database { static const DatabaseEntry XgemmSingle, XgemmDouble, XgemmComplexSingle, XgemmComplexDouble; static const DatabaseEntry CopySingle, CopyDouble, CopyComplexSingle, CopyComplexDouble; static const DatabaseEntry PadSingle, PadDouble, PadComplexSingle, PadComplexDouble; - static const DatabaseEntry TraSingle, TraDouble, TraComplexSingle, TraComplexDouble; - static const DatabaseEntry PadTraSingle, PadTraDouble, PadTraComplexSingle, PadTraComplexDouble; + static const DatabaseEntry TransposeSingle, TransposeDouble, TransposeComplexSingle, TransposeComplexDouble; + static const DatabaseEntry PadtransposeSingle, PadtransposeDouble, PadtransposeComplexSingle, PadtransposeComplexDouble; static const std::vector database; // The constructor diff --git a/include/internal/database/copy.h b/include/internal/database/copy.h index 541a352b..3bd85fa6 100644 --- a/include/internal/database/copy.h +++ b/include/internal/database/copy.h @@ -5,9 +5,9 @@ // width of 100 characters per line. // // Author(s): -// Cedric Nugteren +// Database generator // -// This file populates the database with best-found tuning parameters for the Copy kernels. +// This file populates the database with best-found tuning parameters for the 'Copy' kernels. // // ================================================================================================= @@ -16,54 +16,21 @@ namespace clblast { const Database::DatabaseEntry Database::CopySingle = { "Copy", Precision::kSingle, { - { // NVIDIA GPUs - kDeviceTypeGPU, kDeviceVendorNVIDIA, { - { "GeForce GTX 480", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_WPT",1}, {"COPY_VW",2} } }, - { "Tesla K20m", { {"COPY_DIMX",8}, {"COPY_DIMY",16}, {"COPY_WPT",2}, {"COPY_VW",4} } }, - { "Tesla K40m", { {"COPY_DIMX",16}, {"COPY_DIMY",16}, {"COPY_WPT",4}, {"COPY_VW",4} } }, - } - }, - { // AMD GPUs - kDeviceTypeGPU, kDeviceVendorAMD, { - { "Tahiti", { {"COPY_DIMX",16}, {"COPY_DIMY",8}, {"COPY_WPT",4}, {"COPY_VW",2} } }, - } - }, { // Intel GPUs - kDeviceTypeGPU, kDeviceVendorIntel, { - { "Iris", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_WPT",1}, {"COPY_VW",4} } }, + kDeviceTypeGPU, "Intel", { + { "Iris", { {"COPY_DIMX",16}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",2} } }, + { "default", { {"COPY_DIMX",16}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",2} } }, + } + }, + { // NVIDIA Corporation GPUs + kDeviceTypeGPU, "NVIDIA Corporation", { + { "Tesla K40m", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",4}, {"COPY_WPT",2} } }, + { "default", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",4}, {"COPY_WPT",2} } }, } }, { // Default - kDeviceTypeAll, kDeviceVendorAll, { - { kDefaultDevice, { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_WPT",1}, {"COPY_VW",1} } }, - } - }, - } -}; - -// ================================================================================================= - -const Database::DatabaseEntry Database::CopyDouble = { - "Copy", Precision::kDouble, { - { // NVIDIA GPUs - kDeviceTypeGPU, kDeviceVendorNVIDIA, { - { "GeForce GTX 480", { {"COPY_DIMX",16}, {"COPY_DIMY",8}, {"COPY_WPT",1}, {"COPY_VW",1} } }, - { "Tesla K20m", { {"COPY_DIMX",16}, {"COPY_DIMY",8}, {"COPY_WPT",1}, {"COPY_VW",2} } }, - { "Tesla K40m", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_WPT",1}, {"COPY_VW",2} } }, - } - }, - { // AMD GPUs - kDeviceTypeGPU, kDeviceVendorAMD, { - { "Tahiti", { {"COPY_DIMX",16}, {"COPY_DIMY",8}, {"COPY_WPT",2}, {"COPY_VW",4} } }, - } - }, - { // Intel GPUs - kDeviceTypeGPU, kDeviceVendorIntel, { - } - }, - { // Default - kDeviceTypeAll, kDeviceVendorAll, { - { kDefaultDevice, { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_WPT",1}, {"COPY_VW",1} } }, + kDeviceTypeAll, "default", { + { "default", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",2} } }, } }, } @@ -73,26 +40,39 @@ const Database::DatabaseEntry Database::CopyDouble = { const Database::DatabaseEntry Database::CopyComplexSingle = { "Copy", Precision::kComplexSingle, { - { // NVIDIA GPUs - kDeviceTypeGPU, kDeviceVendorNVIDIA, { - { "GeForce GTX 480", { {"COPY_DIMX",16}, {"COPY_DIMY",16}, {"COPY_WPT",1}, {"COPY_VW",1} } }, - { "Tesla K20m", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_WPT",2}, {"COPY_VW",1} } }, - { "Tesla K40m", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_WPT",1}, {"COPY_VW",1} } }, - } - }, - { // AMD GPUs - kDeviceTypeGPU, kDeviceVendorAMD, { - { "Tahiti", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_WPT",1}, {"COPY_VW",1} } }, - } - }, { // Intel GPUs - kDeviceTypeGPU, kDeviceVendorIntel, { - { "Iris", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_WPT",1}, {"COPY_VW",1} } }, + kDeviceTypeGPU, "Intel", { + { "Iris", { {"COPY_DIMX",16}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",2} } }, + { "default", { {"COPY_DIMX",16}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",2} } }, + } + }, + { // NVIDIA Corporation GPUs + kDeviceTypeGPU, "NVIDIA Corporation", { + { "Tesla K40m", { {"COPY_DIMX",16}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",1} } }, + { "default", { {"COPY_DIMX",16}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",1} } }, } }, { // Default - kDeviceTypeAll, kDeviceVendorAll, { - { kDefaultDevice, { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_WPT",1}, {"COPY_VW",1} } }, + kDeviceTypeAll, "default", { + { "default", { {"COPY_DIMX",16}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",1} } }, + } + }, + } +}; + +// ================================================================================================= + +const Database::DatabaseEntry Database::CopyDouble = { + "Copy", Precision::kDouble, { + { // NVIDIA Corporation GPUs + kDeviceTypeGPU, "NVIDIA Corporation", { + { "Tesla K40m", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",2}, {"COPY_WPT",2} } }, + { "default", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",2}, {"COPY_WPT",2} } }, + } + }, + { // Default + kDeviceTypeAll, "default", { + { "default", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",2}, {"COPY_WPT",2} } }, } }, } @@ -102,25 +82,15 @@ const Database::DatabaseEntry Database::CopyComplexSingle = { const Database::DatabaseEntry Database::CopyComplexDouble = { "Copy", Precision::kComplexDouble, { - { // NVIDIA GPUs - kDeviceTypeGPU, kDeviceVendorNVIDIA, { - { "GeForce GTX 480", { {"COPY_DIMX",16}, {"COPY_DIMY",8}, {"COPY_WPT",1}, {"COPY_VW",1} } }, - { "Tesla K20m", { {"COPY_DIMX",8}, {"COPY_DIMY",32}, {"COPY_WPT",1}, {"COPY_VW",1} } }, - { "Tesla K40m", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_WPT",1}, {"COPY_VW",1} } }, - } - }, - { // AMD GPUs - kDeviceTypeGPU, kDeviceVendorAMD, { - { "Tahiti", { {"COPY_DIMX",8}, {"COPY_DIMY",32}, {"COPY_WPT",4}, {"COPY_VW",2} } }, - } - }, - { // Intel GPUs - kDeviceTypeGPU, kDeviceVendorIntel, { + { // NVIDIA Corporation GPUs + kDeviceTypeGPU, "NVIDIA Corporation", { + { "Tesla K40m", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",1} } }, + { "default", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",1} } }, } }, { // Default - kDeviceTypeAll, kDeviceVendorAll, { - { kDefaultDevice, { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_WPT",1}, {"COPY_VW",1} } }, + kDeviceTypeAll, "default", { + { "default", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",1} } }, } }, } diff --git a/include/internal/database/pad.h b/include/internal/database/pad.h index 4a599648..d833a934 100644 --- a/include/internal/database/pad.h +++ b/include/internal/database/pad.h @@ -5,9 +5,9 @@ // width of 100 characters per line. // // Author(s): -// Cedric Nugteren +// Database generator // -// This file populates the database with best-found tuning parameters for the Pad kernels. +// This file populates the database with best-found tuning parameters for the 'Pad' kernels. // // ================================================================================================= @@ -16,54 +16,21 @@ namespace clblast { const Database::DatabaseEntry Database::PadSingle = { "Pad", Precision::kSingle, { - { // NVIDIA GPUs - kDeviceTypeGPU, kDeviceVendorNVIDIA, { - { "GeForce GTX 480", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",4} } }, - { "Tesla K20m", { {"PAD_DIMX",16}, {"PAD_DIMY",32}, {"PAD_WPTX",2}, {"PAD_WPTY",1} } }, - { "Tesla K40m", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",2} } }, - } - }, - { // AMD GPUs - kDeviceTypeGPU, kDeviceVendorAMD, { - { "Tahiti", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",2} } }, - } - }, { // Intel GPUs - kDeviceTypeGPU, kDeviceVendorIntel, { - { "Iris", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",2} } }, + kDeviceTypeGPU, "Intel", { + { "Iris", { {"PAD_DIMX",32}, {"PAD_DIMY",16}, {"PAD_WPTX",2}, {"PAD_WPTY",1} } }, + { "default", { {"PAD_DIMX",32}, {"PAD_DIMY",16}, {"PAD_WPTX",2}, {"PAD_WPTY",1} } }, + } + }, + { // NVIDIA Corporation GPUs + kDeviceTypeGPU, "NVIDIA Corporation", { + { "Tesla K40m", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, + { "default", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, } }, { // Default - kDeviceTypeAll, kDeviceVendorAll, { - { kDefaultDevice, { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, - } - }, - } -}; - -// ================================================================================================= - -const Database::DatabaseEntry Database::PadDouble = { - "Pad", Precision::kDouble, { - { // NVIDIA GPUs - kDeviceTypeGPU, kDeviceVendorNVIDIA, { - { "GeForce GTX 480", { {"PAD_DIMX",16}, {"PAD_DIMY",16}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, - { "Tesla K20m", { {"PAD_DIMX",16}, {"PAD_DIMY",16}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, - { "Tesla K40m", { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, - } - }, - { // AMD GPUs - kDeviceTypeGPU, kDeviceVendorAMD, { - { "Tahiti", { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",2} } }, - } - }, - { // Intel GPUs - kDeviceTypeGPU, kDeviceVendorIntel, { - } - }, - { // Default - kDeviceTypeAll, kDeviceVendorAll, { - { kDefaultDevice, { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, + kDeviceTypeAll, "default", { + { "default", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, } }, } @@ -73,26 +40,39 @@ const Database::DatabaseEntry Database::PadDouble = { const Database::DatabaseEntry Database::PadComplexSingle = { "Pad", Precision::kComplexSingle, { - { // NVIDIA GPUs - kDeviceTypeGPU, kDeviceVendorNVIDIA, { - { "GeForce GTX 480", { {"PAD_DIMX",16}, {"PAD_DIMY",16}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, - { "Tesla K20m", { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",2} } }, - { "Tesla K40m", { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, - } - }, - { // AMD GPUs - kDeviceTypeGPU, kDeviceVendorAMD, { - { "Tahiti", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, - } - }, { // Intel GPUs - kDeviceTypeGPU, kDeviceVendorIntel, { - { "Iris", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, + kDeviceTypeGPU, "Intel", { + { "Iris", { {"PAD_DIMX",32}, {"PAD_DIMY",16}, {"PAD_WPTX",2}, {"PAD_WPTY",4} } }, + { "default", { {"PAD_DIMX",32}, {"PAD_DIMY",16}, {"PAD_WPTX",2}, {"PAD_WPTY",4} } }, + } + }, + { // NVIDIA Corporation GPUs + kDeviceTypeGPU, "NVIDIA Corporation", { + { "Tesla K40m", { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, + { "default", { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, } }, { // Default - kDeviceTypeAll, kDeviceVendorAll, { - { kDefaultDevice, { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, + kDeviceTypeAll, "default", { + { "default", { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, + } + }, + } +}; + +// ================================================================================================= + +const Database::DatabaseEntry Database::PadDouble = { + "Pad", Precision::kDouble, { + { // NVIDIA Corporation GPUs + kDeviceTypeGPU, "NVIDIA Corporation", { + { "Tesla K40m", { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",2} } }, + { "default", { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",2} } }, + } + }, + { // Default + kDeviceTypeAll, "default", { + { "default", { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",2} } }, } }, } @@ -102,25 +82,15 @@ const Database::DatabaseEntry Database::PadComplexSingle = { const Database::DatabaseEntry Database::PadComplexDouble = { "Pad", Precision::kComplexDouble, { - { // NVIDIA GPUs - kDeviceTypeGPU, kDeviceVendorNVIDIA, { - { "GeForce GTX 480", { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, - { "Tesla K20m", { {"PAD_DIMX",32}, {"PAD_DIMY",16}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, - { "Tesla K40m", { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, - } - }, - { // AMD GPUs - kDeviceTypeGPU, kDeviceVendorAMD, { - { "Tahiti", { {"PAD_DIMX",8}, {"PAD_DIMY",16}, {"PAD_WPTX",2}, {"PAD_WPTY",1} } }, - } - }, - { // Intel GPUs - kDeviceTypeGPU, kDeviceVendorIntel, { + { // NVIDIA Corporation GPUs + kDeviceTypeGPU, "NVIDIA Corporation", { + { "Tesla K40m", { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, + { "default", { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, } }, { // Default - kDeviceTypeAll, kDeviceVendorAll, { - { kDefaultDevice, { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, + kDeviceTypeAll, "default", { + { "default", { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, } }, } diff --git a/include/internal/database/padtranspose.h b/include/internal/database/padtranspose.h index 53226c1d..dacc693f 100644 --- a/include/internal/database/padtranspose.h +++ b/include/internal/database/padtranspose.h @@ -5,37 +5,32 @@ // width of 100 characters per line. // // Author(s): -// Cedric Nugteren +// Database generator // -// This file populates the database with best-found tuning parameters for the PadTranspose kernels. +// This file populates the database with best-found tuning parameters for the 'Padtranspose' kernels. // // ================================================================================================= namespace clblast { // ================================================================================================= -const Database::DatabaseEntry Database::PadTraSingle = { - "PadTranspose", Precision::kSingle, { - { // NVIDIA GPUs - kDeviceTypeGPU, kDeviceVendorNVIDIA, { - { "GeForce GTX 480", { {"PADTRA_TILE",16}, {"PADTRA_WPT",2}, {"PADTRA_PAD",1} } }, - { "Tesla K20m", { {"PADTRA_TILE",16}, {"PADTRA_WPT",2}, {"PADTRA_PAD",1} } }, - { "Tesla K40m", { {"PADTRA_TILE",32}, {"PADTRA_WPT",2}, {"PADTRA_PAD",1} } }, - } - }, - { // AMD GPUs - kDeviceTypeGPU, kDeviceVendorAMD, { - { "Tahiti", { {"PADTRA_TILE",16}, {"PADTRA_WPT",4}, {"PADTRA_PAD",0} } }, - } - }, +const Database::DatabaseEntry Database::PadtransposeSingle = { + "Padtranspose", Precision::kSingle, { { // Intel GPUs - kDeviceTypeGPU, kDeviceVendorIntel, { - { "Iris", { {"PADTRA_TILE",16}, {"PADTRA_WPT",2}, {"PADTRA_PAD",0} } }, + kDeviceTypeGPU, "Intel", { + { "Iris", { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",2} } }, + { "default", { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",2} } }, + } + }, + { // NVIDIA Corporation GPUs + kDeviceTypeGPU, "NVIDIA Corporation", { + { "Tesla K40m", { {"PADTRA_PAD",1}, {"PADTRA_TILE",32}, {"PADTRA_WPT",2} } }, + { "default", { {"PADTRA_PAD",1}, {"PADTRA_TILE",32}, {"PADTRA_WPT",2} } }, } }, { // Default - kDeviceTypeAll, kDeviceVendorAll, { - { kDefaultDevice, { {"PADTRA_TILE",16}, {"PADTRA_WPT",1}, {"PADTRA_PAD",0} } }, + kDeviceTypeAll, "default", { + { "default", { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",2} } }, } }, } @@ -43,27 +38,23 @@ const Database::DatabaseEntry Database::PadTraSingle = { // ================================================================================================= -const Database::DatabaseEntry Database::PadTraDouble = { - "PadTranspose", Precision::kDouble, { - { // NVIDIA GPUs - kDeviceTypeGPU, kDeviceVendorNVIDIA, { - { "GeForce GTX 480", { {"PADTRA_TILE",16}, {"PADTRA_WPT",1}, {"PADTRA_PAD",1} } }, - { "Tesla K20m", { {"PADTRA_TILE",16}, {"PADTRA_WPT",1}, {"PADTRA_PAD",1} } }, - { "Tesla K40m", { {"PADTRA_TILE",16}, {"PADTRA_WPT",2}, {"PADTRA_PAD",1} } }, - } - }, - { // AMD GPUs - kDeviceTypeGPU, kDeviceVendorAMD, { - { "Tahiti", { {"PADTRA_TILE",8}, {"PADTRA_WPT",4}, {"PADTRA_PAD",0} } }, - } - }, +const Database::DatabaseEntry Database::PadtransposeComplexSingle = { + "Padtranspose", Precision::kComplexSingle, { { // Intel GPUs - kDeviceTypeGPU, kDeviceVendorIntel, { + kDeviceTypeGPU, "Intel", { + { "Iris", { {"PADTRA_PAD",0}, {"PADTRA_TILE",16}, {"PADTRA_WPT",2} } }, + { "default", { {"PADTRA_PAD",0}, {"PADTRA_TILE",16}, {"PADTRA_WPT",2} } }, + } + }, + { // NVIDIA Corporation GPUs + kDeviceTypeGPU, "NVIDIA Corporation", { + { "Tesla K40m", { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",1} } }, + { "default", { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",1} } }, } }, { // Default - kDeviceTypeAll, kDeviceVendorAll, { - { kDefaultDevice, { {"PADTRA_TILE",16}, {"PADTRA_WPT",1}, {"PADTRA_PAD",0} } }, + kDeviceTypeAll, "default", { + { "default", { {"PADTRA_PAD",0}, {"PADTRA_TILE",16}, {"PADTRA_WPT",1} } }, } }, } @@ -71,28 +62,17 @@ const Database::DatabaseEntry Database::PadTraDouble = { // ================================================================================================= -const Database::DatabaseEntry Database::PadTraComplexSingle = { - "PadTranspose", Precision::kComplexSingle, { - { // NVIDIA GPUs - kDeviceTypeGPU, kDeviceVendorNVIDIA, { - { "GeForce GTX 480", { {"PADTRA_TILE",16}, {"PADTRA_WPT",1}, {"PADTRA_PAD",1} } }, - { "Tesla K20m", { {"PADTRA_TILE",16}, {"PADTRA_WPT",1}, {"PADTRA_PAD",1} } }, - { "Tesla K40m", { {"PADTRA_TILE",16}, {"PADTRA_WPT",1}, {"PADTRA_PAD",0} } }, - } - }, - { // AMD GPUs - kDeviceTypeGPU, kDeviceVendorAMD, { - { "Tahiti", { {"PADTRA_TILE",16}, {"PADTRA_WPT",2}, {"PADTRA_PAD",0} } }, - } - }, - { // Intel GPUs - kDeviceTypeGPU, kDeviceVendorIntel, { - { "Iris", { {"PADTRA_TILE",16}, {"PADTRA_WPT",2}, {"PADTRA_PAD",0} } }, +const Database::DatabaseEntry Database::PadtransposeDouble = { + "Padtranspose", Precision::kDouble, { + { // NVIDIA Corporation GPUs + kDeviceTypeGPU, "NVIDIA Corporation", { + { "Tesla K40m", { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",1} } }, + { "default", { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",1} } }, } }, { // Default - kDeviceTypeAll, kDeviceVendorAll, { - { kDefaultDevice, { {"PADTRA_TILE",16}, {"PADTRA_WPT",1}, {"PADTRA_PAD",0} } }, + kDeviceTypeAll, "default", { + { "default", { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",1} } }, } }, } @@ -100,27 +80,17 @@ const Database::DatabaseEntry Database::PadTraComplexSingle = { // ================================================================================================= -const Database::DatabaseEntry Database::PadTraComplexDouble = { - "PadTranspose", Precision::kComplexDouble, { - { // NVIDIA GPUs - kDeviceTypeGPU, kDeviceVendorNVIDIA, { - { "GeForce GTX 480", { {"PADTRA_TILE",16}, {"PADTRA_WPT",1}, {"PADTRA_PAD",1} } }, - { "Tesla K20m", { {"PADTRA_TILE",16}, {"PADTRA_WPT",1}, {"PADTRA_PAD",1} } }, - { "Tesla K40m", { {"PADTRA_TILE",16}, {"PADTRA_WPT",1}, {"PADTRA_PAD",1} } }, - } - }, - { // AMD GPUs - kDeviceTypeGPU, kDeviceVendorAMD, { - { "Tahiti", { {"PADTRA_TILE",8}, {"PADTRA_WPT",2}, {"PADTRA_PAD",1} } }, - } - }, - { // Intel GPUs - kDeviceTypeGPU, kDeviceVendorIntel, { +const Database::DatabaseEntry Database::PadtransposeComplexDouble = { + "Padtranspose", Precision::kComplexDouble, { + { // NVIDIA Corporation GPUs + kDeviceTypeGPU, "NVIDIA Corporation", { + { "Tesla K40m", { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",1} } }, + { "default", { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",1} } }, } }, { // Default - kDeviceTypeAll, kDeviceVendorAll, { - { kDefaultDevice, { {"PADTRA_TILE",16}, {"PADTRA_WPT",1}, {"PADTRA_PAD",0} } }, + kDeviceTypeAll, "default", { + { "default", { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",1} } }, } }, } diff --git a/include/internal/database/transpose.h b/include/internal/database/transpose.h index 1d12a13e..46a38bc2 100644 --- a/include/internal/database/transpose.h +++ b/include/internal/database/transpose.h @@ -5,37 +5,32 @@ // width of 100 characters per line. // // Author(s): -// Cedric Nugteren +// Database generator // -// This file populates the database with best-found tuning parameters for the Transpose kernels. +// This file populates the database with best-found tuning parameters for the 'Transpose' kernels. // // ================================================================================================= namespace clblast { // ================================================================================================= -const Database::DatabaseEntry Database::TraSingle = { +const Database::DatabaseEntry Database::TransposeSingle = { "Transpose", Precision::kSingle, { - { // NVIDIA GPUs - kDeviceTypeGPU, kDeviceVendorNVIDIA, { - { "GeForce GTX 480", { {"TRA_DIM",16}, {"TRA_WPT",2}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0} } }, - { "Tesla K20m", { {"TRA_DIM",16}, {"TRA_WPT",2}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0} } }, - { "Tesla K40m", { {"TRA_DIM",16}, {"TRA_WPT",2}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0} } }, - } - }, - { // AMD GPUs - kDeviceTypeGPU, kDeviceVendorAMD, { - { "Tahiti", { {"TRA_DIM",16}, {"TRA_WPT",4}, {"TRA_PAD",0}, {"TRA_SHUFFLE",1} } }, - } - }, { // Intel GPUs - kDeviceTypeGPU, kDeviceVendorIntel, { - { "Iris", { {"TRA_DIM",8}, {"TRA_WPT",4}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0} } }, + kDeviceTypeGPU, "Intel", { + { "Iris", { {"TRA_DIM",8}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",4} } }, + { "default", { {"TRA_DIM",8}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",4} } }, + } + }, + { // NVIDIA Corporation GPUs + kDeviceTypeGPU, "NVIDIA Corporation", { + { "Tesla K40m", { {"TRA_DIM",8}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",4} } }, + { "default", { {"TRA_DIM",8}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",4} } }, } }, { // Default - kDeviceTypeAll, kDeviceVendorAll, { - { kDefaultDevice, { {"TRA_DIM",16}, {"TRA_WPT",1}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0} } }, + kDeviceTypeAll, "default", { + { "default", { {"TRA_DIM",8}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",4} } }, } }, } @@ -43,56 +38,23 @@ const Database::DatabaseEntry Database::TraSingle = { // ================================================================================================= -const Database::DatabaseEntry Database::TraDouble = { - "Transpose", Precision::kDouble, { - { // NVIDIA GPUs - kDeviceTypeGPU, kDeviceVendorNVIDIA, { - { "GeForce GTX 480", { {"TRA_DIM",8}, {"TRA_WPT",2}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0} } }, - { "Tesla K20m", { {"TRA_DIM",16}, {"TRA_WPT",2}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0} } }, - { "Tesla K40m", { {"TRA_DIM",16}, {"TRA_WPT",2}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0} } }, - } - }, - { // AMD GPUs - kDeviceTypeGPU, kDeviceVendorAMD, { - { "Tahiti", { {"TRA_DIM",16}, {"TRA_WPT",2}, {"TRA_PAD",0}, {"TRA_SHUFFLE",1} } }, - } - }, - { // Intel GPUs - kDeviceTypeGPU, kDeviceVendorIntel, { - } - }, - { // Default - kDeviceTypeAll, kDeviceVendorAll, { - { kDefaultDevice, { {"TRA_DIM",16}, {"TRA_WPT",1}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0} } }, - } - }, - } -}; - -// ================================================================================================= - -const Database::DatabaseEntry Database::TraComplexSingle = { +const Database::DatabaseEntry Database::TransposeComplexSingle = { "Transpose", Precision::kComplexSingle, { - { // NVIDIA GPUs - kDeviceTypeGPU, kDeviceVendorNVIDIA, { - { "GeForce GTX 480", { {"TRA_DIM",16}, {"TRA_WPT",1}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0} } }, - { "Tesla K20m", { {"TRA_DIM",16}, {"TRA_WPT",1}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0} } }, - { "Tesla K40m", { {"TRA_DIM",16}, {"TRA_WPT",1}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0} } }, - } - }, - { // AMD GPUs - kDeviceTypeGPU, kDeviceVendorAMD, { - { "Tahiti", { {"TRA_DIM",16}, {"TRA_WPT",2}, {"TRA_PAD",1}, {"TRA_SHUFFLE",1} } }, - } - }, { // Intel GPUs - kDeviceTypeGPU, kDeviceVendorIntel, { - { "Iris", { {"TRA_DIM",16}, {"TRA_WPT",1}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0} } }, + kDeviceTypeGPU, "Intel", { + { "Iris", { {"TRA_DIM",8}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",2} } }, + { "default", { {"TRA_DIM",8}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",2} } }, + } + }, + { // NVIDIA Corporation GPUs + kDeviceTypeGPU, "NVIDIA Corporation", { + { "Tesla K40m", { {"TRA_DIM",16}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } }, + { "default", { {"TRA_DIM",16}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } }, } }, { // Default - kDeviceTypeAll, kDeviceVendorAll, { - { kDefaultDevice, { {"TRA_DIM",16}, {"TRA_WPT",1}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0} } }, + kDeviceTypeAll, "default", { + { "default", { {"TRA_DIM",8}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } }, } }, } @@ -100,27 +62,35 @@ const Database::DatabaseEntry Database::TraComplexSingle = { // ================================================================================================= -const Database::DatabaseEntry Database::TraComplexDouble = { - "Transpose", Precision::kComplexDouble, { - { // NVIDIA GPUs - kDeviceTypeGPU, kDeviceVendorNVIDIA, { - { "GeForce GTX 480", { {"TRA_DIM",8}, {"TRA_WPT",1}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0} } }, - { "Tesla K20m", { {"TRA_DIM",16}, {"TRA_WPT",1}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0} } }, - { "Tesla K40m", { {"TRA_DIM",16}, {"TRA_WPT",1}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0} } }, - } - }, - { // AMD GPUs - kDeviceTypeGPU, kDeviceVendorAMD, { - { "Tahiti", { {"TRA_DIM",16}, {"TRA_WPT",1}, {"TRA_PAD",0}, {"TRA_SHUFFLE",1} } }, - } - }, - { // Intel GPUs - kDeviceTypeGPU, kDeviceVendorIntel, { +const Database::DatabaseEntry Database::TransposeDouble = { + "Transpose", Precision::kDouble, { + { // NVIDIA Corporation GPUs + kDeviceTypeGPU, "NVIDIA Corporation", { + { "Tesla K40m", { {"TRA_DIM",16}, {"TRA_PAD",1}, {"TRA_SHUFFLE",1}, {"TRA_WPT",2} } }, + { "default", { {"TRA_DIM",16}, {"TRA_PAD",1}, {"TRA_SHUFFLE",1}, {"TRA_WPT",2} } }, } }, { // Default - kDeviceTypeAll, kDeviceVendorAll, { - { kDefaultDevice, { {"TRA_DIM",16}, {"TRA_WPT",1}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0} } }, + kDeviceTypeAll, "default", { + { "default", { {"TRA_DIM",16}, {"TRA_PAD",1}, {"TRA_SHUFFLE",1}, {"TRA_WPT",2} } }, + } + }, + } +}; + +// ================================================================================================= + +const Database::DatabaseEntry Database::TransposeComplexDouble = { + "Transpose", Precision::kComplexDouble, { + { // NVIDIA Corporation GPUs + kDeviceTypeGPU, "NVIDIA Corporation", { + { "Tesla K40m", { {"TRA_DIM",16}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } }, + { "default", { {"TRA_DIM",16}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } }, + } + }, + { // Default + kDeviceTypeAll, "default", { + { "default", { {"TRA_DIM",16}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } }, } }, } diff --git a/include/internal/database/xaxpy.h b/include/internal/database/xaxpy.h index 058e3c0a..783e142d 100644 --- a/include/internal/database/xaxpy.h +++ b/include/internal/database/xaxpy.h @@ -5,9 +5,9 @@ // width of 100 characters per line. // // Author(s): -// Cedric Nugteren +// Database generator // -// This file populates the database with best-found tuning parameters for the Xaxpy kernels. +// This file populates the database with best-found tuning parameters for the 'Xaxpy' kernels. // // ================================================================================================= @@ -16,26 +16,45 @@ namespace clblast { const Database::DatabaseEntry Database::XaxpySingle = { "Xaxpy", Precision::kSingle, { - { // NVIDIA GPUs - kDeviceTypeGPU, kDeviceVendorNVIDIA, { - { "GeForce GTX 480", { {"WGS",128}, {"WPT",1}, {"VW",2} } }, - { "Tesla K20m", { {"WGS",128}, {"WPT",2}, {"VW",2} } }, - { "Tesla K40m", { {"WGS",128}, {"WPT",1}, {"VW",4} } }, - } - }, - { // AMD GPUs - kDeviceTypeGPU, kDeviceVendorAMD, { - { "Tahiti", { {"WGS",64}, {"WPT",1}, {"VW",2} } }, - } - }, { // Intel GPUs - kDeviceTypeGPU, kDeviceVendorIntel, { - { "Iris", { {"WGS",512}, {"WPT",1}, {"VW",1} } }, + kDeviceTypeGPU, "Intel", { + { "Iris", { {"VW",1}, {"WGS",64}, {"WPT",1} } }, + { "default", { {"VW",1}, {"WGS",64}, {"WPT",1} } }, + } + }, + { // NVIDIA Corporation GPUs + kDeviceTypeGPU, "NVIDIA Corporation", { + { "Tesla K40m", { {"VW",4}, {"WGS",128}, {"WPT",1} } }, + { "default", { {"VW",4}, {"WGS",128}, {"WPT",1} } }, } }, { // Default - kDeviceTypeAll, kDeviceVendorAll, { - { kDefaultDevice, { {"WGS",128}, {"WPT",1}, {"VW",1} } }, + kDeviceTypeAll, "default", { + { "default", { {"VW",1}, {"WGS",64}, {"WPT",1} } }, + } + }, + } +}; + +// ================================================================================================= + +const Database::DatabaseEntry Database::XaxpyComplexSingle = { + "Xaxpy", Precision::kComplexSingle, { + { // Intel GPUs + kDeviceTypeGPU, "Intel", { + { "Iris", { {"VW",2}, {"WGS",128}, {"WPT",1} } }, + { "default", { {"VW",2}, {"WGS",128}, {"WPT",1} } }, + } + }, + { // NVIDIA Corporation GPUs + kDeviceTypeGPU, "NVIDIA Corporation", { + { "Tesla K40m", { {"VW",1}, {"WGS",128}, {"WPT",1} } }, + { "default", { {"VW",1}, {"WGS",128}, {"WPT",1} } }, + } + }, + { // Default + kDeviceTypeAll, "default", { + { "default", { {"VW",1}, {"WGS",128}, {"WPT",1} } }, } }, } @@ -45,53 +64,15 @@ const Database::DatabaseEntry Database::XaxpySingle = { const Database::DatabaseEntry Database::XaxpyDouble = { "Xaxpy", Precision::kDouble, { - { // NVIDIA GPUs - kDeviceTypeGPU, kDeviceVendorNVIDIA, { - { "GeForce GTX 480", { {"WGS",128}, {"WPT",1}, {"VW",1} } }, - { "Tesla K20m", { {"WGS",512}, {"WPT",1}, {"VW",2} } }, - { "Tesla K40m", { {"WGS",64}, {"WPT",1}, {"VW",2} } }, - } - }, - { // AMD GPUs - kDeviceTypeGPU, kDeviceVendorAMD, { - { "Tahiti", { {"WGS",256}, {"WPT",1}, {"VW",1} } }, - } - }, - { // Intel GPUs - kDeviceTypeGPU, kDeviceVendorIntel, { + { // NVIDIA Corporation GPUs + kDeviceTypeGPU, "NVIDIA Corporation", { + { "Tesla K40m", { {"VW",2}, {"WGS",128}, {"WPT",1} } }, + { "default", { {"VW",2}, {"WGS",128}, {"WPT",1} } }, } }, { // Default - kDeviceTypeAll, kDeviceVendorAll, { - { kDefaultDevice, { {"WGS",128}, {"WPT",1}, {"VW",1} } }, - } - }, - } -}; -// ================================================================================================= - -const Database::DatabaseEntry Database::XaxpyComplexSingle = { - "Xaxpy", Precision::kComplexSingle, { - { // NVIDIA GPUs - kDeviceTypeGPU, kDeviceVendorNVIDIA, { - { "GeForce GTX 480", { {"WGS",256}, {"WPT",1}, {"VW",1} } }, - { "Tesla K20m", { {"WGS",128}, {"WPT",1}, {"VW",1} } }, - { "Tesla K40m", { {"WGS",128}, {"WPT",2}, {"VW",1} } }, - } - }, - { // AMD GPUs - kDeviceTypeGPU, kDeviceVendorAMD, { - { "Tahiti", { {"WGS",64}, {"WPT",1}, {"VW",1} } }, - } - }, - { // Intel GPUs - kDeviceTypeGPU, kDeviceVendorIntel, { - { "Iris", { {"WGS",256}, {"WPT",1}, {"VW",1} } }, - } - }, - { // Default - kDeviceTypeAll, kDeviceVendorAll, { - { kDefaultDevice, { {"WGS",128}, {"WPT",1}, {"VW",1} } }, + kDeviceTypeAll, "default", { + { "default", { {"VW",2}, {"WGS",128}, {"WPT",1} } }, } }, } @@ -101,25 +82,15 @@ const Database::DatabaseEntry Database::XaxpyComplexSingle = { const Database::DatabaseEntry Database::XaxpyComplexDouble = { "Xaxpy", Precision::kComplexDouble, { - { // NVIDIA GPUs - kDeviceTypeGPU, kDeviceVendorNVIDIA, { - { "GeForce GTX 480", { {"WGS",128}, {"WPT",2}, {"VW",1} } }, - { "Tesla K20m", { {"WGS",256}, {"WPT",1}, {"VW",1} } }, - { "Tesla K40m", { {"WGS",64}, {"WPT",2}, {"VW",1} } }, - } - }, - { // AMD GPUs - kDeviceTypeGPU, kDeviceVendorAMD, { - { "Tahiti", { {"WGS",64}, {"WPT",1}, {"VW",1} } }, - } - }, - { // Intel GPUs - kDeviceTypeGPU, kDeviceVendorIntel, { + { // NVIDIA Corporation GPUs + kDeviceTypeGPU, "NVIDIA Corporation", { + { "Tesla K40m", { {"VW",1}, {"WGS",64}, {"WPT",1} } }, + { "default", { {"VW",1}, {"WGS",64}, {"WPT",1} } }, } }, { // Default - kDeviceTypeAll, kDeviceVendorAll, { - { kDefaultDevice, { {"WGS",128}, {"WPT",1}, {"VW",1} } }, + kDeviceTypeAll, "default", { + { "default", { {"VW",1}, {"WGS",64}, {"WPT",1} } }, } }, } diff --git a/include/internal/database/xdot.h b/include/internal/database/xdot.h index 05841eb7..66a5231e 100644 --- a/include/internal/database/xdot.h +++ b/include/internal/database/xdot.h @@ -5,9 +5,9 @@ // width of 100 characters per line. // // Author(s): -// Cedric Nugteren +// Database generator // -// This file populates the database with best-found tuning parameters for the Xdot kernels. +// This file populates the database with best-found tuning parameters for the 'Xdot' kernels. // // ================================================================================================= @@ -16,22 +16,45 @@ namespace clblast { const Database::DatabaseEntry Database::XdotSingle = { "Xdot", Precision::kSingle, { - { // NVIDIA GPUs - kDeviceTypeGPU, kDeviceVendorNVIDIA, { - } - }, - { // AMD GPUs - kDeviceTypeGPU, kDeviceVendorAMD, { - } - }, { // Intel GPUs - kDeviceTypeGPU, kDeviceVendorIntel, { - { "Iris", { {"WGS1",512}, {"WGS2",512} } }, + kDeviceTypeGPU, "Intel", { + { "Iris", { {"VW",1}, {"WGS1",512}, {"WGS2",32} } }, + { "default", { {"VW",1}, {"WGS1",512}, {"WGS2",32} } }, + } + }, + { // NVIDIA Corporation GPUs + kDeviceTypeGPU, "NVIDIA Corporation", { + { "Tesla K40m", { {"VW",1}, {"WGS1",128}, {"WGS2",1024} } }, + { "default", { {"VW",1}, {"WGS1",128}, {"WGS2",1024} } }, } }, { // Default - kDeviceTypeAll, kDeviceVendorAll, { - { kDefaultDevice, { {"WGS1",64}, {"WGS2",64} } }, + kDeviceTypeAll, "default", { + { "default", { {"VW",1}, {"WGS1",128}, {"WGS2",32} } }, + } + }, + } +}; + +// ================================================================================================= + +const Database::DatabaseEntry Database::XdotComplexSingle = { + "Xdot", Precision::kComplexSingle, { + { // Intel GPUs + kDeviceTypeGPU, "Intel", { + { "Iris", { {"VW",1}, {"WGS1",512}, {"WGS2",1024} } }, + { "default", { {"VW",1}, {"WGS1",512}, {"WGS2",1024} } }, + } + }, + { // NVIDIA Corporation GPUs + kDeviceTypeGPU, "NVIDIA Corporation", { + { "Tesla K40m", { {"VW",1}, {"WGS1",128}, {"WGS2",1024} } }, + { "default", { {"VW",1}, {"WGS1",128}, {"WGS2",1024} } }, + } + }, + { // Default + kDeviceTypeAll, "default", { + { "default", { {"VW",1}, {"WGS1",128}, {"WGS2",1024} } }, } }, } @@ -41,45 +64,15 @@ const Database::DatabaseEntry Database::XdotSingle = { const Database::DatabaseEntry Database::XdotDouble = { "Xdot", Precision::kDouble, { - { // NVIDIA GPUs - kDeviceTypeGPU, kDeviceVendorNVIDIA, { - } - }, - { // AMD GPUs - kDeviceTypeGPU, kDeviceVendorAMD, { - } - }, - { // Intel GPUs - kDeviceTypeGPU, kDeviceVendorIntel, { + { // NVIDIA Corporation GPUs + kDeviceTypeGPU, "NVIDIA Corporation", { + { "Tesla K40m", { {"VW",1}, {"WGS1",256}, {"WGS2",1024} } }, + { "default", { {"VW",1}, {"WGS1",256}, {"WGS2",1024} } }, } }, { // Default - kDeviceTypeAll, kDeviceVendorAll, { - { kDefaultDevice, { {"WGS1",64}, {"WGS2",64} } }, - } - }, - } -}; -// ================================================================================================= - -const Database::DatabaseEntry Database::XdotComplexSingle = { - "Xdot", Precision::kComplexSingle, { - { // NVIDIA GPUs - kDeviceTypeGPU, kDeviceVendorNVIDIA, { - } - }, - { // AMD GPUs - kDeviceTypeGPU, kDeviceVendorAMD, { - } - }, - { // Intel GPUs - kDeviceTypeGPU, kDeviceVendorIntel, { - { "Iris", { {"WGS1",512}, {"WGS2",512} } }, - } - }, - { // Default - kDeviceTypeAll, kDeviceVendorAll, { - { kDefaultDevice, { {"WGS1",64}, {"WGS2",64} } }, + kDeviceTypeAll, "default", { + { "default", { {"VW",1}, {"WGS1",256}, {"WGS2",1024} } }, } }, } @@ -89,21 +82,15 @@ const Database::DatabaseEntry Database::XdotComplexSingle = { const Database::DatabaseEntry Database::XdotComplexDouble = { "Xdot", Precision::kComplexDouble, { - { // NVIDIA GPUs - kDeviceTypeGPU, kDeviceVendorNVIDIA, { - } - }, - { // AMD GPUs - kDeviceTypeGPU, kDeviceVendorAMD, { - } - }, - { // Intel GPUs - kDeviceTypeGPU, kDeviceVendorIntel, { + { // NVIDIA Corporation GPUs + kDeviceTypeGPU, "NVIDIA Corporation", { + { "Tesla K40m", { {"VW",1}, {"WGS1",128}, {"WGS2",1024} } }, + { "default", { {"VW",1}, {"WGS1",128}, {"WGS2",1024} } }, } }, { // Default - kDeviceTypeAll, kDeviceVendorAll, { - { kDefaultDevice, { {"WGS1",64}, {"WGS2",64} } }, + kDeviceTypeAll, "default", { + { "default", { {"VW",1}, {"WGS1",128}, {"WGS2",1024} } }, } }, } diff --git a/include/internal/database/xgemm.h b/include/internal/database/xgemm.h index 49598c8c..9fbd8fbb 100644 --- a/include/internal/database/xgemm.h +++ b/include/internal/database/xgemm.h @@ -5,9 +5,9 @@ // width of 100 characters per line. // // Author(s): -// Cedric Nugteren +// Database generator // -// This file populates the database with best-found tuning parameters for the Xgemm kernels. +// This file populates the database with best-found tuning parameters for the 'Xgemm' kernels. // // ================================================================================================= @@ -16,56 +16,21 @@ namespace clblast { const Database::DatabaseEntry Database::XgemmSingle = { "Xgemm", Precision::kSingle, { - { // NVIDIA GPUs - kDeviceTypeGPU, kDeviceVendorNVIDIA, { - { "GeForce GTX 480", { {"MWG",128}, {"NWG",64}, {"KWG",32}, {"MDIMC",16}, {"NDIMC",16}, {"MDIMA",16}, {"NDIMB",16}, {"KWI",2}, {"VWM",2}, {"VWN",2}, {"STRM",1}, {"STRN",0}, {"SA",1}, {"SB",1} } }, - { "Tesla K20m", { {"MWG",128}, {"NWG",64}, {"KWG",16}, {"MDIMC",16}, {"NDIMC",8}, {"MDIMA",16}, {"NDIMB",32}, {"KWI",2}, {"VWM",4}, {"VWN",1}, {"STRM",1}, {"STRN",0}, {"SA",1}, {"SB",1} } }, - { "Tesla K40m", { {"MWG",128}, {"NWG",128}, {"KWG",16}, {"MDIMC",16}, {"NDIMC",16}, {"MDIMA",32}, {"NDIMB",16}, {"KWI",8}, {"VWM",2}, {"VWN",1}, {"STRM",1}, {"STRN",0}, {"SA",1}, {"SB",1} } }, - { kDefaultDevice, { {"MWG",128}, {"NWG",64}, {"KWG",16}, {"MDIMC",16}, {"NDIMC",16}, {"MDIMA",16}, {"NDIMB",16}, {"KWI",2}, {"VWM",2}, {"VWN",1}, {"STRM",1}, {"STRN",0}, {"SA",1}, {"SB",1} } }, - } - }, - { // AMD GPUs - kDeviceTypeGPU, kDeviceVendorAMD, { - { "Tahiti", { {"MWG",128}, {"NWG",128}, {"KWG",32}, {"MDIMC",16}, {"NDIMC",16}, {"MDIMA",32}, {"NDIMB",8}, {"KWI",2}, {"VWM",4}, {"VWN",4}, {"STRM",1}, {"STRN",1}, {"SA",1}, {"SB",1} } }, - } - }, { // Intel GPUs - kDeviceTypeGPU, kDeviceVendorIntel, { - { "Iris", { {"MWG",64}, {"NWG",64}, {"KWG",32}, {"MDIMC",8}, {"NDIMC",8}, {"MDIMA",16}, {"NDIMB",8}, {"KWI",8}, {"VWM",4}, {"VWN",4}, {"STRM",1}, {"STRN",0}, {"SA",1}, {"SB",0} } }, + kDeviceTypeGPU, "Intel", { + { "Iris", { {"KWG",16}, {"KWI",8}, {"MDIMA",16}, {"MDIMC",8}, {"MWG",128}, {"NDIMB",32}, {"NDIMC",16}, {"NWG",64}, {"SA",1}, {"SB",1}, {"STRM",1}, {"STRN",1}, {"VWM",4}, {"VWN",1} } }, + { "default", { {"KWG",16}, {"KWI",8}, {"MDIMA",16}, {"MDIMC",8}, {"MWG",128}, {"NDIMB",32}, {"NDIMC",16}, {"NWG",64}, {"SA",1}, {"SB",1}, {"STRM",1}, {"STRN",1}, {"VWM",4}, {"VWN",1} } }, + } + }, + { // NVIDIA Corporation GPUs + kDeviceTypeGPU, "NVIDIA Corporation", { + { "Tesla K40m", { {"KWG",16}, {"KWI",8}, {"MDIMA",16}, {"MDIMC",8}, {"MWG",64}, {"NDIMB",16}, {"NDIMC",16}, {"NWG",128}, {"SA",1}, {"SB",1}, {"STRM",1}, {"STRN",0}, {"VWM",2}, {"VWN",4} } }, + { "default", { {"KWG",16}, {"KWI",8}, {"MDIMA",16}, {"MDIMC",8}, {"MWG",64}, {"NDIMB",16}, {"NDIMC",16}, {"NWG",128}, {"SA",1}, {"SB",1}, {"STRM",1}, {"STRN",0}, {"VWM",2}, {"VWN",4} } }, } }, { // Default - kDeviceTypeAll, kDeviceVendorAll, { - { kDefaultDevice, { {"MWG",32}, {"NWG",32}, {"KWG",16}, {"MDIMC",8}, {"NDIMC",8}, {"MDIMA",16}, {"NDIMB",16}, {"KWI",1}, {"VWM",1}, {"VWN",1}, {"STRM",1}, {"STRN",1}, {"SA",0}, {"SB",0} } }, - } - }, - } -}; - -// ================================================================================================= - -const Database::DatabaseEntry Database::XgemmDouble = { - "Xgemm", Precision::kDouble, { - { // NVIDIA GPUs - kDeviceTypeGPU, kDeviceVendorNVIDIA, { - { "GeForce GTX 480", { {"MWG",32}, {"NWG",64}, {"KWG",16}, {"MDIMC",8}, {"NDIMC",16}, {"MDIMA",16}, {"NDIMB",32}, {"KWI",2}, {"VWM",1}, {"VWN",2}, {"STRM",1}, {"STRN",0}, {"SA",1}, {"SB",1} } }, - { "Tesla K20m", { {"MWG",64}, {"NWG",128}, {"KWG",16}, {"MDIMC",8}, {"NDIMC",32}, {"MDIMA",32}, {"NDIMB",32}, {"KWI",8}, {"VWM",2}, {"VWN",4}, {"STRM",1}, {"STRN",1}, {"SA",1}, {"SB",1} } }, - { "Tesla K40m", { {"MWG",64}, {"NWG",64}, {"KWG",16}, {"MDIMC",16}, {"NDIMC",16}, {"MDIMA",16}, {"NDIMB",32}, {"KWI",2}, {"VWM",1}, {"VWN",1}, {"STRM",1}, {"STRN",0}, {"SA",1}, {"SB",1} } }, - { kDefaultDevice, { {"MWG",32}, {"NWG",64}, {"KWG",16}, {"MDIMC",8}, {"NDIMC",16}, {"MDIMA",16}, {"NDIMB",32}, {"KWI",2}, {"VWM",1}, {"VWN",1}, {"STRM",1}, {"STRN",1}, {"SA",1}, {"SB",1} } }, - } - }, - { // AMD GPUs - kDeviceTypeGPU, kDeviceVendorAMD, { - { "Tahiti", { {"MWG",128}, {"NWG",64}, {"KWG",16}, {"MDIMC",32}, {"NDIMC",8}, {"MDIMA",32}, {"NDIMB",16}, {"KWI",8}, {"VWM",1}, {"VWN",2}, {"STRM",1}, {"STRN",0}, {"SA",0}, {"SB",0} } }, - } - }, - { // Intel GPUs - kDeviceTypeGPU, kDeviceVendorIntel, { - } - }, - { // Default - kDeviceTypeAll, kDeviceVendorAll, { - { kDefaultDevice, { {"MWG",32}, {"NWG",32}, {"KWG",16}, {"MDIMC",8}, {"NDIMC",8}, {"MDIMA",16}, {"NDIMB",16}, {"KWI",1}, {"VWM",1}, {"VWN",1}, {"STRM",1}, {"STRN",1}, {"SA",0}, {"SB",0} } }, + kDeviceTypeAll, "default", { + { "default", { {"KWG",16}, {"KWI",8}, {"MDIMA",16}, {"MDIMC",8}, {"MWG",64}, {"NDIMB",16}, {"NDIMC",16}, {"NWG",64}, {"SA",1}, {"SB",1}, {"STRM",1}, {"STRN",0}, {"VWM",2}, {"VWN",1} } }, } }, } @@ -75,27 +40,39 @@ const Database::DatabaseEntry Database::XgemmDouble = { const Database::DatabaseEntry Database::XgemmComplexSingle = { "Xgemm", Precision::kComplexSingle, { - { // NVIDIA GPUs - kDeviceTypeGPU, kDeviceVendorNVIDIA, { - { "GeForce GTX 480", { {"MWG",32}, {"NWG",64}, {"KWG",16}, {"MDIMC",16}, {"NDIMC",8}, {"MDIMA",16}, {"NDIMB",16}, {"KWI",2}, {"VWM",1}, {"VWN",1}, {"STRM",1}, {"STRN",1}, {"SA",1}, {"SB",1} } }, - { "Tesla K20m", { {"MWG",32}, {"NWG",64}, {"KWG",16}, {"MDIMC",16}, {"NDIMC",8}, {"MDIMA",8}, {"NDIMB",8}, {"KWI",8}, {"VWM",2}, {"VWN",2}, {"STRM",1}, {"STRN",0}, {"SA",1}, {"SB",0} } }, - { "Tesla K40m", { {"MWG",32}, {"NWG",64}, {"KWG",16}, {"MDIMC",8}, {"NDIMC",32}, {"MDIMA",32}, {"NDIMB",16}, {"KWI",8}, {"VWM",1}, {"VWN",1}, {"STRM",0}, {"STRN",1}, {"SA",1}, {"SB",1} } }, - { kDefaultDevice, { {"MWG",32}, {"NWG",64}, {"KWG",16}, {"MDIMC",16}, {"NDIMC",8}, {"MDIMA",16}, {"NDIMB",16}, {"KWI",2}, {"VWM",1}, {"VWN",1}, {"STRM",1}, {"STRN",1}, {"SA",1}, {"SB",1} } }, - } - }, - { // AMD GPUs - kDeviceTypeGPU, kDeviceVendorAMD, { - { "Tahiti", { {"MWG",16}, {"NWG",64}, {"KWG",32}, {"MDIMC",8}, {"NDIMC",8}, {"MDIMA",8}, {"NDIMB",16}, {"KWI",2}, {"VWM",1}, {"VWN",1}, {"STRM",1}, {"STRN",1}, {"SA",1}, {"SB",0} } }, - } - }, { // Intel GPUs - kDeviceTypeGPU, kDeviceVendorIntel, { - { "Iris", { {"MWG",32}, {"NWG",32}, {"KWG",16}, {"MDIMC",8}, {"NDIMC",8}, {"MDIMA",16}, {"NDIMB",16}, {"KWI",1}, {"VWM",1}, {"VWN",1}, {"STRM",1}, {"STRN",1}, {"SA",0}, {"SB",0} } }, + kDeviceTypeGPU, "Intel", { + { "Iris", { {"KWG",32}, {"KWI",8}, {"MDIMA",32}, {"MDIMC",16}, {"MWG",64}, {"NDIMB",8}, {"NDIMC",16}, {"NWG",64}, {"SA",1}, {"SB",0}, {"STRM",1}, {"STRN",0}, {"VWM",1}, {"VWN",1} } }, + { "default", { {"KWG",32}, {"KWI",8}, {"MDIMA",32}, {"MDIMC",16}, {"MWG",64}, {"NDIMB",8}, {"NDIMC",16}, {"NWG",64}, {"SA",1}, {"SB",0}, {"STRM",1}, {"STRN",0}, {"VWM",1}, {"VWN",1} } }, + } + }, + { // NVIDIA Corporation GPUs + kDeviceTypeGPU, "NVIDIA Corporation", { + { "Tesla K40m", { {"KWG",16}, {"KWI",2}, {"MDIMA",32}, {"MDIMC",32}, {"MWG",32}, {"NDIMB",32}, {"NDIMC",8}, {"NWG",64}, {"SA",0}, {"SB",1}, {"STRM",0}, {"STRN",0}, {"VWM",1}, {"VWN",1} } }, + { "default", { {"KWG",16}, {"KWI",2}, {"MDIMA",32}, {"MDIMC",32}, {"MWG",32}, {"NDIMB",32}, {"NDIMC",8}, {"NWG",64}, {"SA",0}, {"SB",1}, {"STRM",0}, {"STRN",0}, {"VWM",1}, {"VWN",1} } }, } }, { // Default - kDeviceTypeAll, kDeviceVendorAll, { - { kDefaultDevice, { {"MWG",32}, {"NWG",32}, {"KWG",16}, {"MDIMC",8}, {"NDIMC",8}, {"MDIMA",16}, {"NDIMB",16}, {"KWI",1}, {"VWM",1}, {"VWN",1}, {"STRM",1}, {"STRN",1}, {"SA",0}, {"SB",0} } }, + kDeviceTypeAll, "default", { + { "default", { {"KWG",16}, {"KWI",2}, {"MDIMA",32}, {"MDIMC",16}, {"MWG",32}, {"NDIMB",8}, {"NDIMC",8}, {"NWG",64}, {"SA",0}, {"SB",0}, {"STRM",0}, {"STRN",0}, {"VWM",1}, {"VWN",1} } }, + } + }, + } +}; + +// ================================================================================================= + +const Database::DatabaseEntry Database::XgemmDouble = { + "Xgemm", Precision::kDouble, { + { // NVIDIA Corporation GPUs + kDeviceTypeGPU, "NVIDIA Corporation", { + { "Tesla K40m", { {"KWG",32}, {"KWI",2}, {"MDIMA",16}, {"MDIMC",8}, {"MWG",64}, {"NDIMB",16}, {"NDIMC",32}, {"NWG",128}, {"SA",1}, {"SB",0}, {"STRM",1}, {"STRN",1}, {"VWM",2}, {"VWN",4} } }, + { "default", { {"KWG",32}, {"KWI",2}, {"MDIMA",16}, {"MDIMC",8}, {"MWG",64}, {"NDIMB",16}, {"NDIMC",32}, {"NWG",128}, {"SA",1}, {"SB",0}, {"STRM",1}, {"STRN",1}, {"VWM",2}, {"VWN",4} } }, + } + }, + { // Default + kDeviceTypeAll, "default", { + { "default", { {"KWG",32}, {"KWI",2}, {"MDIMA",16}, {"MDIMC",8}, {"MWG",64}, {"NDIMB",16}, {"NDIMC",32}, {"NWG",128}, {"SA",1}, {"SB",0}, {"STRM",1}, {"STRN",1}, {"VWM",2}, {"VWN",4} } }, } }, } @@ -105,29 +82,19 @@ const Database::DatabaseEntry Database::XgemmComplexSingle = { const Database::DatabaseEntry Database::XgemmComplexDouble = { "Xgemm", Precision::kComplexDouble, { - { // NVIDIA GPUs - kDeviceTypeGPU, kDeviceVendorNVIDIA, { - { "GeForce GTX 480", { {"MWG",16}, {"NWG",32}, {"KWG",16}, {"MDIMC",8}, {"NDIMC",8}, {"MDIMA",16}, {"NDIMB",8}, {"KWI",2}, {"VWM",1}, {"VWN",4}, {"STRM",1}, {"STRN",0}, {"SA",0}, {"SB",0} } }, - { "Tesla K20m", { {"MWG",16}, {"NWG",128}, {"KWG",32}, {"MDIMC",8}, {"NDIMC",32}, {"MDIMA",8}, {"NDIMB",32}, {"KWI",2}, {"VWM",1}, {"VWN",4}, {"STRM",1}, {"STRN",1}, {"SA",1}, {"SB",0} } }, - { "Tesla K40m", { {"MWG",32}, {"NWG",32}, {"KWG",16}, {"MDIMC",32}, {"NDIMC",8}, {"MDIMA",16}, {"NDIMB",32}, {"KWI",8}, {"VWM",1}, {"VWN",1}, {"STRM",1}, {"STRN",1}, {"SA",0}, {"SB",1} } }, - { kDefaultDevice, { {"MWG",16}, {"NWG",32}, {"KWG",16}, {"MDIMC",8}, {"NDIMC",8}, {"MDIMA",16}, {"NDIMB",8}, {"KWI",2}, {"VWM",1}, {"VWN",4}, {"STRM",1}, {"STRN",0}, {"SA",0}, {"SB",0} } }, - } - }, - { // AMD GPUs - kDeviceTypeGPU, kDeviceVendorAMD, { - { "Tahiti", { {"MWG",128}, {"NWG",32}, {"KWG",16}, {"MDIMC",32}, {"NDIMC",8}, {"MDIMA",32}, {"NDIMB",16}, {"KWI",8}, {"VWM",2}, {"VWN",1}, {"STRM",1}, {"STRN",1}, {"SA",0}, {"SB",0} } }, - } - }, - { // Intel GPUs - kDeviceTypeGPU, kDeviceVendorIntel, { + { // NVIDIA Corporation GPUs + kDeviceTypeGPU, "NVIDIA Corporation", { + { "Tesla K40m", { {"KWG",16}, {"KWI",8}, {"MDIMA",8}, {"MDIMC",8}, {"MWG",32}, {"NDIMB",32}, {"NDIMC",16}, {"NWG",32}, {"SA",0}, {"SB",0}, {"STRM",1}, {"STRN",0}, {"VWM",1}, {"VWN",1} } }, + { "default", { {"KWG",16}, {"KWI",8}, {"MDIMA",8}, {"MDIMC",8}, {"MWG",32}, {"NDIMB",32}, {"NDIMC",16}, {"NWG",32}, {"SA",0}, {"SB",0}, {"STRM",1}, {"STRN",0}, {"VWM",1}, {"VWN",1} } }, } }, { // Default - kDeviceTypeAll, kDeviceVendorAll, { - { kDefaultDevice, { {"MWG",32}, {"NWG",32}, {"KWG",16}, {"MDIMC",8}, {"NDIMC",8}, {"MDIMA",16}, {"NDIMB",16}, {"KWI",1}, {"VWM",1}, {"VWN",1}, {"STRM",1}, {"STRN",1}, {"SA",0}, {"SB",0} } }, + kDeviceTypeAll, "default", { + { "default", { {"KWG",16}, {"KWI",8}, {"MDIMA",8}, {"MDIMC",8}, {"MWG",32}, {"NDIMB",32}, {"NDIMC",16}, {"NWG",32}, {"SA",0}, {"SB",0}, {"STRM",1}, {"STRN",0}, {"VWM",1}, {"VWN",1} } }, } }, } }; + // ================================================================================================= } // namespace clblast diff --git a/include/internal/database/xgemv.h b/include/internal/database/xgemv.h index c315500f..ae9fbf30 100644 --- a/include/internal/database/xgemv.h +++ b/include/internal/database/xgemv.h @@ -5,9 +5,9 @@ // width of 100 characters per line. // // Author(s): -// Cedric Nugteren +// Database generator // -// This file populates the database with best-found tuning parameters for the Xgemv kernels. +// This file populates the database with best-found tuning parameters for the 'Xgemv' kernels. // // ================================================================================================= @@ -16,26 +16,39 @@ namespace clblast { const Database::DatabaseEntry Database::XgemvSingle = { "Xgemv", Precision::kSingle, { - { // NVIDIA GPUs - kDeviceTypeGPU, kDeviceVendorNVIDIA, { - { "GeForce GTX 480", { {"WGS1",64}, {"WPT1",1}, {"WGS2",64}, {"WPT2",1}, {"VW2",1}, {"WGS3",64}, {"WPT3",1}, {"VW3",1} } }, - { "Tesla K20m", { {"WGS1",64}, {"WPT1",1}, {"WGS2",64}, {"WPT2",1}, {"VW2",1}, {"WGS3",64}, {"WPT3",1}, {"VW3",1} } }, - { "Tesla K40m", { {"WGS1",256}, {"WPT1",1}, {"WGS2",256}, {"WPT2",1}, {"VW2",1}, {"WGS3",64}, {"WPT3",1}, {"VW3",4} } }, - } - }, - { // AMD GPUs - kDeviceTypeGPU, kDeviceVendorAMD, { - { "Tahiti", { {"WGS1",64}, {"WPT1",1}, {"WGS2",64}, {"WPT2",1}, {"VW2",1}, {"WGS3",64}, {"WPT3",1}, {"VW3",1} } }, - } - }, { // Intel GPUs - kDeviceTypeGPU, kDeviceVendorIntel, { - { "Iris", { {"WGS1",256}, {"WPT1",2}, {"WGS2",64}, {"WPT2",4}, {"VW2",4}, {"WGS3",256}, {"WPT3",2}, {"VW3",8} } }, + kDeviceTypeGPU, "Intel", { + { "Iris", { {"WGS1",64}, {"WPT1",2}, {"VW2",1}, {"WGS2",128}, {"WPT2",2}, {"VW3",4}, {"WGS3",64}, {"WPT3",8} } }, + { "default", { {"WGS1",64}, {"WPT1",2}, {"VW2",1}, {"WGS2",128}, {"WPT2",2}, {"VW3",4}, {"WGS3",64}, {"WPT3",8} } }, + } + }, + { // NVIDIA Corporation GPUs + kDeviceTypeGPU, "NVIDIA Corporation", { + { "Tesla K40m", { {"WGS1",256}, {"WPT1",1}, {"VW2",1}, {"WGS2",256}, {"WPT2",1}, {"VW3",1}, {"WGS3",128}, {"WPT3",1} } }, + { "default", { {"WGS1",256}, {"WPT1",1}, {"VW2",1}, {"WGS2",256}, {"WPT2",1}, {"VW3",1}, {"WGS3",128}, {"WPT3",1} } }, } }, { // Default - kDeviceTypeAll, kDeviceVendorAll, { - { kDefaultDevice, { {"WGS1",64}, {"WPT1",1}, {"WGS2",64}, {"WPT2",1}, {"VW2",1}, {"WGS3",64}, {"WPT3",1}, {"VW3",1} } }, + kDeviceTypeAll, "default", { + { "default", { {"WGS1",64}, {"WPT1",1}, {"VW2",1}, {"WGS2",128}, {"WPT2",1}, {"VW3",1}, {"WGS3",64}, {"WPT3",1} } }, + } + }, + } +}; + +// ================================================================================================= + +const Database::DatabaseEntry Database::XgemvComplexSingle = { + "Xgemv", Precision::kComplexSingle, { + { // Intel GPUs + kDeviceTypeGPU, "Intel", { + { "Iris", { {"WGS1",256}, {"WPT1",1}, {"VW2",1}, {"WGS2",64}, {"WPT2",1}, {"VW3",1}, {"WGS3",64}, {"WPT3",1} } }, + { "default", { {"WGS1",256}, {"WPT1",1}, {"VW2",1}, {"WGS2",64}, {"WPT2",1}, {"VW3",1}, {"WGS3",64}, {"WPT3",1} } }, + } + }, + { // Default + kDeviceTypeAll, "default", { + { "default", { {"WGS1",256}, {"WPT1",1}, {"VW2",1}, {"WGS2",64}, {"WPT2",1}, {"VW3",1}, {"WGS3",64}, {"WPT3",1} } }, } }, } @@ -45,53 +58,15 @@ const Database::DatabaseEntry Database::XgemvSingle = { const Database::DatabaseEntry Database::XgemvDouble = { "Xgemv", Precision::kDouble, { - { // NVIDIA GPUs - kDeviceTypeGPU, kDeviceVendorNVIDIA, { - { "GeForce GTX 480", { {"WGS1",64}, {"WPT1",1}, {"WGS2",64}, {"WPT2",1}, {"VW2",1}, {"WGS3",64}, {"WPT3",1}, {"VW3",1} } }, - { "Tesla K20m", { {"WGS1",64}, {"WPT1",1}, {"WGS2",64}, {"WPT2",1}, {"VW2",1}, {"WGS3",64}, {"WPT3",1}, {"VW3",1} } }, - { "Tesla K40m", { {"WGS1",64}, {"WPT1",1}, {"WGS2",64}, {"WPT2",1}, {"VW2",1}, {"WGS3",64}, {"WPT3",1}, {"VW3",1} } }, - } - }, - { // AMD GPUs - kDeviceTypeGPU, kDeviceVendorAMD, { - { "Tahiti", { {"WGS1",64}, {"WPT1",1}, {"WGS2",64}, {"WPT2",1}, {"VW2",1}, {"WGS3",64}, {"WPT3",1}, {"VW3",1} } }, - } - }, - { // Intel GPUs - kDeviceTypeGPU, kDeviceVendorIntel, { + { // NVIDIA Corporation GPUs + kDeviceTypeGPU, "NVIDIA Corporation", { + { "Tesla K40m", { {"WGS1",256}, {"WPT1",1}, {"VW2",1}, {"WGS2",256}, {"WPT2",1}, {"VW3",1}, {"WGS3",256}, {"WPT3",1} } }, + { "default", { {"WGS1",256}, {"WPT1",1}, {"VW2",1}, {"WGS2",256}, {"WPT2",1}, {"VW3",1}, {"WGS3",256}, {"WPT3",1} } }, } }, { // Default - kDeviceTypeAll, kDeviceVendorAll, { - { kDefaultDevice, { {"WGS1",64}, {"WPT1",1}, {"WGS2",64}, {"WPT2",1}, {"VW2",1}, {"WGS3",64}, {"WPT3",1}, {"VW3",1} } }, - } - }, - } -}; -// ================================================================================================= - -const Database::DatabaseEntry Database::XgemvComplexSingle = { - "Xgemv", Precision::kComplexSingle, { - { // NVIDIA GPUs - kDeviceTypeGPU, kDeviceVendorNVIDIA, { - { "GeForce GTX 480", { {"WGS1",64}, {"WPT1",1}, {"WGS2",64}, {"WPT2",1}, {"VW2",1}, {"WGS3",64}, {"WPT3",1}, {"VW3",1} } }, - { "Tesla K20m", { {"WGS1",64}, {"WPT1",1}, {"WGS2",64}, {"WPT2",1}, {"VW2",1}, {"WGS3",64}, {"WPT3",1}, {"VW3",1} } }, - { "Tesla K40m", { {"WGS1",64}, {"WPT1",1}, {"WGS2",64}, {"WPT2",1}, {"VW2",1}, {"WGS3",64}, {"WPT3",1}, {"VW3",1} } }, - } - }, - { // AMD GPUs - kDeviceTypeGPU, kDeviceVendorAMD, { - { "Tahiti", { {"WGS1",64}, {"WPT1",1}, {"WGS2",64}, {"WPT2",1}, {"VW2",1}, {"WGS3",64}, {"WPT3",1}, {"VW3",1} } }, - } - }, - { // Intel GPUs - kDeviceTypeGPU, kDeviceVendorIntel, { - { "Iris", { {"WGS1",256}, {"WPT1",1}, {"WGS2",64}, {"WPT2",4}, {"VW2",2}, {"WGS3",64}, {"WPT3",1}, {"VW3",1} } }, - } - }, - { // Default - kDeviceTypeAll, kDeviceVendorAll, { - { kDefaultDevice, { {"WGS1",64}, {"WPT1",1}, {"WGS2",64}, {"WPT2",1}, {"VW2",1}, {"WGS3",64}, {"WPT3",1}, {"VW3",1} } }, + kDeviceTypeAll, "default", { + { "default", { {"WGS1",256}, {"WPT1",1}, {"VW2",1}, {"WGS2",256}, {"WPT2",1}, {"VW3",1}, {"WGS3",256}, {"WPT3",1} } }, } }, } @@ -101,25 +76,9 @@ const Database::DatabaseEntry Database::XgemvComplexSingle = { const Database::DatabaseEntry Database::XgemvComplexDouble = { "Xgemv", Precision::kComplexDouble, { - { // NVIDIA GPUs - kDeviceTypeGPU, kDeviceVendorNVIDIA, { - { "GeForce GTX 480", { {"WGS1",64}, {"WPT1",1}, {"WGS2",64}, {"WPT2",1}, {"VW2",1}, {"WGS3",64}, {"WPT3",1}, {"VW3",1} } }, - { "Tesla K20m", { {"WGS1",64}, {"WPT1",1}, {"WGS2",64}, {"WPT2",1}, {"VW2",1}, {"WGS3",64}, {"WPT3",1}, {"VW3",1} } }, - { "Tesla K40m", { {"WGS1",64}, {"WPT1",1}, {"WGS2",64}, {"WPT2",1}, {"VW2",1}, {"WGS3",64}, {"WPT3",1}, {"VW3",1} } }, - } - }, - { // AMD GPUs - kDeviceTypeGPU, kDeviceVendorAMD, { - { "Tahiti", { {"WGS1",64}, {"WPT1",1}, {"WGS2",64}, {"WPT2",1}, {"VW2",1}, {"WGS3",64}, {"WPT3",1}, {"VW3",1} } }, - } - }, - { // Intel GPUs - kDeviceTypeGPU, kDeviceVendorIntel, { - } - }, { // Default - kDeviceTypeAll, kDeviceVendorAll, { - { kDefaultDevice, { {"WGS1",64}, {"WPT1",1}, {"WGS2",64}, {"WPT2",1}, {"VW2",1}, {"WGS3",64}, {"WPT3",1}, {"VW3",1} } }, + kDeviceTypeAll, "default", { + { "default", { {"WGS1",128}, {"WPT1",1}, {"VW2",1}, {"WGS2",128}, {"WPT2",1}, {"VW3",1}, {"WGS3",128}, {"WPT3",1} } }, } }, } diff --git a/scripts/database/database.db b/scripts/database/database.db index 7c8ec445083669dec4e3352236e92b08f7ad141e..bf793177d3a963daf9f82884ba198d9f8d6a722e 100644 GIT binary patch delta 20332 zcmZvk30zcF`^TAKh8;v~y)ai>&dGc{Lm1q2ltK}&H&a!>}imqn})cXKNh^|+;& zsjo_CWt~!U@6FWI%%$butLB!erR}-o|D1bg=AP$X-p}WHp6~BD_nhZB+r5{!&otbs zeQ>LGX}iQ4!8_LGpYJTJuursC*agE_!Dut&S4=Atd~E(ydP%f93k6@BF<+|Sx2#>e z5&qB26ar$$3W2c&W%iPZ_VT!bQn4_usAOVcbwGYe;ne&z$rOT`@Vi{WH9|s|3uc=ig$X1x!D2H~dx{Vt8%`loHcEsjo6p3;s-l8I zA=(x$Tlod0&a%pi!u-mTqKa~%rLDO~7Yp+Xic1To2%Ig@CDLF7t1Wn9t<+3@Rbk1* zQc>XZe7wB8rWA@Lg~dXQEm*elU$Q%kil+&!Y~f{gIfgN61gB~9pH3uWqLlHOB|Au>y7Lx1oy)9{7ZNW^>BU6OuppYtbvo&LDbL3d=n`hdY1m76H^RVhhAKb4Xxq^OiEAr&aO zgKI|${Xy8dgGVL^1JqKYFir;B<#Vk((s%FVTf8v6w(+esXUJ4n~9et{`#+lPiQ=&Bzr>uIA)wL9Q?yGduig za0DB2F72HLJ34mWbQC3ajF0PDB%EjA0t**exWvNOEPR7Dg_x@4 z;8#bPhWpmC(TZt0FE0>-n66PWG|EtolBrR$s6r1_ITLZ1i}4GsP2c;BVLS&^H4m!z zkctnh_y}aQ;Th9jnB{3&1LS>1qnyv)5mC9CsVP$Fc#nFWE!Z0Df{TvDf?;^KaJwAQ35ndAXS)@gItW?j57u6 z*_`84HM^)dUd0J2PK1m@6HJr!FwIf43YghkjgqHPMr)KY8s!D5FloN%Vtg-2mF9F+ z%~}=DQ1MI^&w`A`KW7q9c{fur+C0eg7Tvlb_+B^DzebeQhl{?I6YYLvh9Xw!9b28ugt3d9Hdny&kdWrt^yHykd9ii=b{S;bRS zT#V}nnG$rU_Yh5v<+Liuu|lI%YLqICGF78gdowu>OEcw!f*fnfBa+Q|hKgsZc$SK1 zt9TBoA8k5=z8hmQ(^;10`+xF$r@d%8RM}G|iO9E_G`ysm)v)h0)5Lh>+-@okD~yGsIM2@u&D&*i(jHTdXjh17 zjNG_xx9PC&OJKww6M}a?(J23Fl&2cS%U{(FZuybvPw;pUXccB7iYhKwafOO2A*1Sp zroAwknY0E8&GSqbXK9q#8fA_~nX6XN;zOpb?1{LH)?jj3uHrfsuTb$y$Y}LpQ%`i^ zu>2HAIbu4hgPDCy>x0bpYLtB%Wxq!GM6KX)$4u?@wM;5kX%&WlQ1LYtUsv%B72l)` zLcgU^aI?=$1NAUv{{Z!U0U9MxqXcP`V6}o9KPM@(vzb7vFexXhI7!9HD((syJ%5TE z{P(_)_k5~O-o%qmo5rEwGjtPs4*vOPG>4*%wgrZ3lt#(bD0v!Xv_=^dz#NKe&XR+F z9GmnBdIK{ECslk(#b2oSw2IFJ;7x-}@j7JrMw8@4S{EdFNuzwNQNGbA-)fZa0&uIl zriD7x`K&1j7ky{S@yTbiN)3cr_E+%$6%SPLAQcY|^g_Ewo8G0TY=}J3fBZmB>)Y4l z)7teID7Y>?1|G;u;zuK%`YRQ-J!H>x)antrbK4_U!>t~hZgxTboy_zF5mk0FFIBxM zJPc8fobOI zbaH#Ga%g+r?N- zEo(5B2c=6L7FpQtCj*)Ss2GrwC7kuKxuXufvRCf(%wBW8e|lnka(A(7gsXXpo&(ek zP0B9kgMG=VX%(v*U8PWagoUd~~;TbFXE1GECrN-g!{j*U76P%5DQfCjLH?#{4$j_^!*TmolV4!cgDp!TlT zrC|>S6auIjpiq{ud9fwNpbYd_w^KTN#sSI)G#-$hA-r;_<$+;5JFf3gH!C!%ge4xS|%W zsswapH5>YE>h^^G6QDbQ?gF~U5?;H8gbtQ>a0gZBA!?zSS_oAO&7pwrtg}3Eo$M`Q zU13XWB@9qFAQK=nODJ&%I}}#8PnuI6Pz<0}fLgPJ@9wa?9aQVf?DQX~pB1jDh3jhJ zhFZ9(7H(02&YLs#XV`le09puW5un8kVdG(o z5w6w->SptNM=dm}gp3=*|TPC@lsR?mx z4vB!003`$J$`V@Y9pQSW_HgUcK6oBbPe8o@^=9cqLqyLGsCUN*=?!36$B1=0`3;~; z0vl~SpaejPfRb3kRh=WICZk{Ma`XKsL`?M`LDxo_SX3@<>+0%4qW=Ke4rm9Uoq!s; zqJ+OA!fW(qxM*rsqaBP8NG4}%Zu$X z8OiZZ@nSy`j7q!Ig!XkmwCPqvSou(?{o{-p~)|mfTfrJD&9H zP12*#j)nFtbYLNt0ler^)V3}t**9A1be3;4>E_=aG7Wv6((z)M%uY(nH_CKnWAsh^ z5Zd$L;WqJPsVfW$r&#!ch0`pYVE{*Mj$Zy8YWya849(r|+vwwgql~2Sw33R#VzGY( zXf%bToeBX7pnlYI8F>0&VHlaw+K)ZR*o9LO)?DYIQR@3K#Wm zIj30m`|eQuc-M}W^)9{0!g=n)%_F$mKB(=@Xp3H~Ma@sPjMmjkE$J&DZT|Me&k{+Stt(6Q~$*7flw36>kboD7mMzs&OK##6JmMXe6^CFR`RD3J<{_gmR5)XgfPnCZt11-LDKqx8YzHM0rdwofF*n`m$L++ zI|UrwBWU16?w_Gf@!8p=U%EB&CKWoJVn>LbAa;fr2M{kT8!g-%z!u) zVkX2afOys2Iy{k5+o9l^(ezWg|YG?S(Td$zatg z+Tb~^fl{O)S}9E{rE8@Otu)jvDZ3+cju#&O3MU#1JbMUfYpvEwYqZi@t+WnEsNGEN z)hTcxZ1&Xf%*ESUX^U3cs+G2>CH(VDZl`Xz!M$TYqZK@K9MI>0P5?Ry=oC6Un=_%f zIb1pVbT(OTUrB=Qyj)z6i;Hq`Nh?&%LXiq7z~X#8QZ50P)Qw-06VkswdOqxv16OT$qVgRk{8puCrX&GflX>=4q?z z<{`n!g`>1JWDMz+)%(ec-rrRQxXM6R86=lDa4i`~>03!UZ#&V(4et5v3@8pz7eMh0 zVSYVV8K`=aoT3%r!{rODbXqH&(Mo5v(w9^+dJgk*^Stn*ZJa}o3U^3{K0dgE>*y`L z7|F+d_xa$49uS|0*b`zeh`mulBNu^JG;)2sQP)QDv>h)gI=fsAqHapp|~rO207@{$@YdphsN~kdX{V z2?xkXhR8)Txd@eu=2C&bJ;=>t_xP;Wn5>=RY>0Cp&V@J+AU^kBvd3#x8@a^-(!NRy zYeEZeLNhg?nVZloRBJ{?J4dwR-siaQ4Cux=vJBJF)pO*fFGDVd%0;GJWJv{{dx={k zZ&hWc?$YM`!Q|c3Np_NW+r88RTtVY6ze&?Jz^wlb^IqLT(H;I0!ZTpkE z?WbNXFD}xiKeh>NToYP;6WaJDG&|K=$kY2lUu&99`iRh#pes+w-YJ!fGPw}sLX-;p zz0Nw%hdD#9(U#mNhEu!_@dm`35N|>J5iRqxM&Mw7>uCe}AkZ4Fi$?DTS|fBV<${w7 zt6cC>f#O4~Z9L&riES1$;Z%ujA-03q9%2WGu~G;4Nr)$$7v!E_l#8))F-|V>r2@x< zThrR2Dc!Bom-wgMtl^pKpacBTZYxoH{F{wQ)ZtiZl^vs>|kLh3k@v1!$Kn(k!>~M^V!y?I%LbX zHrIV7DZ1lw@wr@_kc*R2f&1C4Pkp2phYHbNQeIYCUMO~4Mk1j*Q;|AprOsL@PAhfM zO7T>pYq*-81!BT7FTC|dYoxCzrZ@4Nk84=l|~~2;!ud05VHW{SqrT@rV4?nXfMfHnboizVFY4Xf*` zt6EyAVJn$*K<(54QLCXt$?<%gl@eFhIbcrD#N=Q z;=2&vgZMr`ymA8=-mj?FGrZ>kT>x|u&?S~|UA?urzq}>NSGhx#wnPQQN{Ceurvk*k zZMU}2Bi{!3%&|9ES9p7Nh*P}msVvRt6^K_M{s8eBKy2S_U96Y>{-vD%zKG-j9|?Y1 z$zLl4Xr(}{6ogXVx0=wb_pN`Sf$x(iSEMB9qU0i4E?UY3Cl&bk9;@r`5z!QL#zCOMmI*4IFLG(l{N_CV2DE?rUAqWm>j#Fhy5z*bRWBjh1D#q zVPP!`>*PULw~s#Tr6JoRkIn~j@u6H`x%fyb@GqZ`w@GHvJhPffkNkNSVn>LbAa(|b zj~)a2{$uL(?EAfd_5s=t=o6Ok;ZxTA-l~_A2ebxUiN9*4-?Y;2TImn1^e2_v6Z01Z z{PkJuWna$&vu+JjVFe2-Sy;saVgMVzAtN=^Gd6A3lJ-2*7En7t?E!UA2;aV99cRc^ zUa88dPr(X^l@O~SPGu0)-LP&#&)u|2|3Biwo7Rm!(qAH_w_fjcY~Th!MnFD*d;$5P z5%KX8xieA_*;^G%YVGs}fVE+XV2QYvu3FK`(zq%FF4sZ(48aWTXt5MKv~AOE3U zaYtwsxZ;j#rDIy@Q?2x0t@Ig{+|gHw$Jcow{-L$GXPZ`uCt!_}5Klq;0^(_ac>P0d ze){#A{Jgc2K`R-xl8;vMrII^8r|7rd3m^Q~db$mIqctymw%u;c*U6u4^4rLv4d8~u z5RX7S3h@{~e4qn=z}QbuUiXs{#eN$}yDmG0u~QklKVuJY+xU2Aevgl|Ws?#~Lx-q? zZdr*A13CieD4=5s;r+>cjJG>7zj`Gyf5r}A>_Em2a@#nzJO8$~64`L-@I*EOP&S~E zfN~TZKJPy@zqLPB~~0c8N0%5;}z+A zo=zFF*Qvu3*&BeC0$K)Wxk9+pP`;OEXL`S-L}p-YBV+q8wy)d9dq(iB4Cs=L%qjT; zU&m4W7ADSWTFn#ZG(ax{dIiv{3Zc+EWjDM=b-kB#m@HuIg^az3u@}2-^nZE$3T6CO zQ{5BS8bE6Stpl`PA-sDuU+N3fb!NnVqz+GHw*mbG z=nkN}3gJUznaDynE0Hy4>=uk2#@OL*8#m;W$fOTR>6GnG9iGT~0D2x!Pe8p`Lg(y! zmb4w7OBM%EkDd$~u?I5tAjTfd*hAblE}p=*Hb|j8mrQP6NgXsaiB2Nc5lEBR1gDK^r?s4Ys4xs091*rOSHjAG-c8a~7)T^j2X zR5rLr{v@DNfW829nk6jG=FNt5X_ZB7WfP4C)DjQ}$jTBvH=kdkL-qyoq4dE5{)(#3L!s&o+(`!Hi4VeF%feN3_OM@xB8*N3c*>u!63>^qEom$C0DHlE(VzwS*})$`k3 zjS#yhWA|e0-i+Nxv2prtzE6PrjE&l%L=(-}Eg73*Y^&Qw=lAk`k!hd2g^Kp^Mqhc$ z@1Qk2TYe{?20-rsYGet0bcAnIj<@}8pEMz#06GBZAfQ7E;jKscE55a4yoz=zV>g+x zr!aOgV>=nU#BGy_EM@F6#ugY`WbATc^X{3iU>uc-jpI-8N0^u2?bJmFQ%ZaXpq+pk z0KKCSPCUaGv_$9*Fa0OXm+$b8y=&bk(xgUZEF6qo#MqM=dkSM06Px@HQsG$9Nvv2$ zkR!N8ERjs`3rJ!q)#&dT&2^C93lhs3z4TtODCRyN9oWSY??`YYI+7g8XwZG0uN~+Z zEIkz2k(e+v~px9F-;>C3v0xeNn!Jek4s37OAucnP$Rxd zo3I(|)rI9^4Y3`8HlGRfXV%Ka=>*4$wX{NwI76!E=bR+w6K}aVvyms2nV(t`XVDt+ z&}A>k8KSFe9twwoJ~9qTV;(nhgO+9ll)tdpFg3psJNo2BtJi(yqNq3T!1*2 z=ox%Q4EJmQU~f zRJ--3+IQQ`AK50rr~Ne17&BTl&7avK#zhr!Vd0auMR2WxOGQO}nFjfA)=vO29 z$2g4R^DFYjfcdA|^b9O0sdSZ12_*d`DSt|!7?iG?>Clgxoh3GkN!O=8JJa1^e8rTq z!ayME1!Do_7`-*MArBv%Dwgs1Fl1tk;%*;cFrS#9vO35QejB!}_hV0ZK z1H|@q&3(d0KLZ9zu29ky+mJj;?7+z+D>-7vhU8IVT-|&hYicJ)FcZP^DPlYn`icpT z#%z!5tPC+xD`kpFb@zPC)?_eWo>6wL6O0kHUAQ(nXU~Gik-6;(vNvL6qS#HNB#F;y zlnk-EM#&U=0A=K$+ULcdAncKYo=*^aX{AK5w^m9L`@nmVuv1{CVc7{{U#*lV_G6@^ z@;EZyB(c9EpiC+(C@L>1EfELg7|Cv_C~_5w0}Ct0IgBM_Z-|33D`#bigPp#06HNAJ zQftJt7-xVp(5Y5}bW#a2HZ`8oNC>$+j#(;~4((=)sTsDhd z_LAqaVRbu95!17#&(4}oYRGg0`S_5Jk$g<#<4Zn%ubu!y*9pz~%GowDE;jTDqe{5bk0ggYmQGM!u-^ zaZ4&XNXimA*V8-(P3~nbmbEd=Q18?GG5m*yA87bb4gaO#hXx&9+}nK57n6`$=;fG3 zPw>$W-pMINoHCJ9CUHu!j}G|`HoLfSmD8@wxGFSUso^ROPu6g?j}Es@HRm^>sSZQ$ zm6+$EA41G6lmXU~!x7HzR8E=3DX(+NbWWLJ)S>mc<~H=P-zzhGIhV|E=!@plel*7i z4Vc7{tYjMfRzTWPoaST`yTOOv-9?V*LUBlBgJVV1XA)~T`=8bSH z^Jxz-mIa(r$0-XrWsz1vWrxh&>11v>M9O%?Ve=7C#y_R~nUU?$@Lmn?)9`1Iao?k) z@WG?&D(wP>c8ybh`G#g1`O3JRH>bfIv9RKn(|JxRHi~A>#$dxM3#H zE?}66oRY*T$(+)eQ@U`PnvUR1?cPE`}0qd9i4F6UF4O*zvVR1hM#Gm@H^;D zn5iG3;gK57(eNk@=LYI<*;!5DKTbP=teoJKlbmvjQ%-ZrnLr)7^0nDT6Yz#Uj~&S+ z+L;;IWetC=;cqnje;WRljI7VM=6qkIzh*u;x^IwnO#L{eKc@`fl!2TwC`gC)Ujw!N z-ZfHOJ6;F1{&j=*_z6);aKJC5SIT`@;DtwI>hC-P1zvbeI^KC~e$a{5gdbup#VDhL z)_0sC6!Rz@V|DD-G*+rlsH|((>K}%g@pD(X;1SvhjEGET;t; zKs6Hp`naBSoV?Xi(26k{6IB z=f86}JKipUo(0quP&bya^QgscL`QBE%0j&$y;oK9Jzk`)zuZv)ly&lj8 zKpO#VVhOGI#xjb|>l@6xrf&Cik>lL~XeXc#0qp|xQG^bk{nql>Naymz6*8ACuUaG@ zIYX(ANH+QrfJOq!0W=CwZln&y-LSM$*2id1n;iBSKzV@10?KCzFS}v6*p9A`dNd{2 znuZHPto1&02*J&)*|?yY)fA#kz+4+Ugf{`r1N0W4w^>3Twy?HU!e8Lg_WMJU>#wk`uZmC&edA%UYADSz)|PIyoZRofvr_BLU?A8U-kqA>6paI^A7X`m|vaodT#Ypnib*vxNRT)q0HPWy3U* zm&enrhul@AQCl|JU_c>&8Ut#=684>EZD9n79!=ZXM32!5d0JttR>+3}E`Qg0&zNbX zul%&$en6YOv<=X9Ksx~KWCH zO&P-9F1P;cF67N(*i?i93Ik*YWMK)ls@J5#Mw|VZop09)Ewn;Qt8n@DB?`r%2&^AEZ0qtN34L;&2Py(PtKuIj&mpX<|PNqlCclqS@N#nvN2XrIfuyoml<;D3e(v#y|(x%Sj zK^pD-2DHtc(V@NJp=}#z+QMu2?@8f1d!v4j!keK_w}d-U>s#SXaq`x1l%a^<{zJrm z`-wQ>kyre8P=pf|Tqk1n;E02UzU1edy5xB2QXdkc+rP~CUo-wUjQ@YCkL%MTHgupF z7+M`M7%#1ksPylZ&{@hH;JzjChcf;!#vji3S*nkROpCbDhTbw^6S?Ipns zE)d#wA@UtV|3SkXQuJ8&Ln*Xjp)CvTScqW&PrDqsN&ZqC;}?~QtNo&shu-f-sPA)b z+E+}dk;_f9RuQ_iD(a>o+oaR!QcEfdi=`8#BoGx(LOccWG{iFiv2k70f-ZFAi@%A= z#rMC7`Z5UhzZGTG_d|VdMMdfRD`tRV1}bKdVg}0w$KH&3;y`s)ArkfO(fnQ1?^aYe zzTKnw>S9G)v)Lo2*Ly`DtDy8WB-WyoTTzktm{mCCD{tC` z(aIapDr(Waw}jBP4K!8p8t!Tr-VUt|Aom@XKi>1GspL^mD(9sNUaI7!Dqfn*NjN=8 zNC-q1o(fijVH!I3RERYAxUJD`O>XP!wtkAmre4Av1G^U@Gr z%HX9;UK+|tc)|ohG@pI$K^laAk zfHnZy2xt>S`1@(X7Cq`bgRHgxpoAG@t$nGOV~RPhm=lUQDI4_bbfJ8R(evc<%>62) z3xF;Hx&-L5O8C1O!dAU}nkq%&A#GD@#@`VCf%pjGV}SVbEMc~9t>(Nco=NVn(OeB^ zB@Jk$4QOQzXdGwkOFo&ATHy&~_+wcmU&S_$7s3F^Lvu^FnP~}QxH!> zJOlA8K>XDv;S~ct6EfDrVgTzrp0hC51q246zBsrU3Dg0}a%L*J&S3 zU6|g0HlqP;W&_%+2DI6XhPQk{>%!+>k@eaP9r}u_*HFcTDaNcAi(;&@!GE6 zvzIo9o6vM#8p2B%yp+jEc-UoObr7mJEi@TUWleJKHlY35fc9Gh+V6AF8YG13=b)w! zg-|U0DC`c@yb-@T*F7rrG_T>M*LbOxm!|U4G)}_Xf2A*94^EKb(#AkiqnPhGry9^s zH=vzqKs!4ZwOmBT`St^0lKhLlfBqDzars}uZGZU-hyF!$_9>}7qZKnoF?os^D;s>~ zDJeY6ekgg9#OSFJE~yk^8AK7H1Q36rx6SrNra)Vm{zs$_w1w-hE9Qn`ZYt&{+2FPQ zwx5jNK&qsuc}yTxQges`L>ojqKok*Ti$;rrNF1%v{2&rXv|`#QrmbSy$p$Y7VdHQ~ zFR=G=NiRYi4RH*_Jb*YL%$6qqD){-Xwp_futL+)(cMt{StFRRD4jHeJEDo6>0Yw37 z4oF}LckF5VFj)QpN?HDRDdJsnPakqm3Xv>CvCy0afdSkx-RAFyKF_i>)o(_JvTSC3 zy<)Z~W~*X8P|P;j;DuSXr}}~b^4JO~zofjZw7gLI54|gtzT~B2ymXwGPIx5j@2~~< z$!n&XzII5l^WAGk!442RLX3mh2_UX{(bmCFK9H;F_t-78mClY#TLFClXd9sIEa86( zY>$+~r<%OVyQH6JE8T1g-hy}=;?EH80K@|(*tP}HmzF;yvbKhn+s5h1H`#Lwm>Uv0{jo$T;4rmCV3_zJI zVQH?-{engpK$TQNTRrQcN-Bj|22q440Ynq$vEjZ<8@=Ja0_ashHGp1Y3ESRc!*xk3 zX{#q(m$VAvYKUtft_6rK?~t=XPOn`4LKnH?l_?C68IT2#l_h*{k!^vyJjm%?M_WDd zR!Qq2Zh*KE;wFIT+|sZ(B&sN#AN)=p(;vsb&8MdI2o^ zS9s|vFJ0rMA9?9IC*grRY*P&A<6Y#$@h|7+5;}mJ-oD+m-g|}XS}qZlkmoqw#`1? znY%|jcxLWC3xBZifQ3I9z4Xc3^rfDlXg&#y_QvedHEdKtN^ ziqR0;Kx_-K9fSD%726o2{6juD+H%@PYo<&UfGPo10h+85ntsE!hQ9Me-?U}pmv7ou z`J&J7lAZ5^4&5a?->4XqVtf_jrx<_Ppx5r&9x}_=yxcQbc~x5=S|Nr*i~xxL_?0Z< zVz|8K(k@VXlb7c4(p$XrHYefJf7Kx1bC`DVY|1L>2*jfhKZp1QK=ju?wrI^@ zj?*4+FeiBFBrl!frPG{*OCGUwP?t4WkY2$c8h17UMt9_}LyG`S-9-RRbGKb#Kb@0gVGx0BF2Q=*u4VYEN7xRCmW! z%J^lBFEYO5@$tN#_IQID*jn1)4Qw5t^?)`2+NctK(%YVG@I>|%ZE#0+p7Ad*{zb;W z;#$Vv^am)~V4`yfjtYsrJ0x|*e z1>~m^UO(L4QvSzwpF8Z?IBJA_gE!7<+RdPoYseJFf0^-LVfnQtbW@Zo425(@8038N&1kh2H z@Vpo7*S#6LMH>uCWVadrXU4z7_`i63yl*s!tjRhyvZjEV0SW~arV?I}MZ8;7_Dp#@bV(*x_opp1X)+A}G!W1rK!a7nFOIXf@*Y)- zXafz6oHdIXA2I$C#$T%X_~c9W!=_#&D)V}Ed=|#HGJZJYN2oqJS!~agzctARTua(w zl*1tbwF1-{P&A-6Ea4Yj_9p(G1-pkfxktE{@%J(QXNaRek*OR{PsNbh$mZS&gJSZ@-Ob~Pkbhk zzadmOTAHv;r;Bm=I{j)Sr|g119wZe}js7X1nNISjcBF}Nm*!5vX>;10Eu1Z#t(>i$ z(atu`w$66W7-xIuGtO9N2WLlToHO2;;7oKTIg_28(QCik?X~o;7CGg=I3!IX_fZ~N z+`vO!?uY7?hnCRJo$|dGmwGQQYv5wheQ~gIu_SkC;%w@yZRQMhhB?hni__{1cSblP zol(vKYE0#%XJL(0L5#yUJ}x0SE(3-Jafp{?#^VDuIb|zu~0cG^HQ;zxo)`>2)g8Kan)k hIb@E*S$E1bHlj1B^7OxoamwXcm4%h%730Rm{Xaqp;Wz*Q diff --git a/scripts/database/database.py b/scripts/database/database.py index cda55fcc..01662a4b 100644 --- a/scripts/database/database.py +++ b/scripts/database/database.py @@ -20,6 +20,11 @@ import json import pandas as pd # Constants +VENDOR_DEFAULT = "default" +DEVICETYPE_DEFAULT = "All" +DEVICENAME_DEFAULT = "default" + +# Attributes DEVICETYPE_ATTRIBUTES = ["device_vendor", "device_type"] DEVICE_ATTRIBUTES = ["device", "device_core_clock", "device_compute_units"] KERNEL_ATTRIBUTES = ["precision", "kernel_family", @@ -67,13 +72,17 @@ def RemoveDuplicates(df): def RemoveEntriesByDevice(df, devicename): return df[df["device"] != devicename] +def GetEntriesByField(df, field, value): + return df[df[field] == value] + # Retrieves the results with the lowest execution times def GetBestResults(df): dfbest = pd.DataFrame() grouped = df.groupby(ATTRIBUTES+["kernel"]) for name, dfgroup in grouped: - bestcase = dfgroup.loc[[dfgroup["time"].idxmin()]] - dfbest = ConcatenateData(dfbest, bestcase) + besttime = dfgroup["time"].min() + bestcase = dfgroup[dfgroup["time"] == besttime].iloc[0] + dfbest = dfbest.append(bestcase, ignore_index=True) return dfbest # Sets defaults for devices of the same type/vendor based on the smallest values of all know @@ -81,14 +90,30 @@ def GetBestResults(df): # on other devices. def CalculateDefaults(df): dfdefault = pd.DataFrame() - grouped = df.groupby(DEVICETYPE_ATTRIBUTES + KERNEL_ATTRIBUTES) - for name, dfgroup in grouped: + + # Defaults per type/vendor + groups = df.groupby(DEVICETYPE_ATTRIBUTES+KERNEL_ATTRIBUTES+["kernel"]) + for name, dfgroup in groups: default_values = dfgroup.min(axis=0) - default_values["device"] = "default" + default_values["device"] = DEVICENAME_DEFAULT default_values["device_compute_units"] = 0 default_values["device_core_clock"] = 0 default_values["time"] = 0.0 dfdefault = dfdefault.append(default_values, ignore_index=True) + + # Defaults in general + groups = df.groupby(KERNEL_ATTRIBUTES+["kernel"]) + for name, dfgroup in groups: + default_values = dfgroup.min(axis=0) + default_values["device_vendor"] = VENDOR_DEFAULT + default_values["device_type"] = DEVICETYPE_DEFAULT + default_values["device"] = DEVICENAME_DEFAULT + default_values["device_compute_units"] = 0 + default_values["device_core_clock"] = 0 + default_values["time"] = 0.0 + dfdefault = dfdefault.append(default_values, ignore_index=True) + + # Database with both types of defaults only return dfdefault # ================================================================================================== @@ -132,8 +157,9 @@ def GetPrecision(family, precision): # The C++ device type and vendor def GetDeviceVendor(vendor, devtype): - return(" { // %s %ss\n kDeviceType%s, \"%s\", {\n" - % (vendor, devtype, devtype, vendor)) + if vendor == VENDOR_DEFAULT and devtype == DEVICETYPE_DEFAULT: + return(" { // Default\n kDeviceType%s, \"%s\", {\n" % (devtype, vendor)) + return(" { // %s %ss\n kDeviceType%s, \"%s\", {\n" % (vendor, devtype, devtype, vendor)) # Prints the data to a C++ database def PrintData(df, outputdir): @@ -226,6 +252,6 @@ bests = ConcatenateData(bests, defaults) # Outputs the data as a C++ database path_cpp_database = os.path.join(path_clblast, "include", "internal", "database") print "## Producing a C++ database in '"+path_cpp_database+"'" -PrintData(bests, ".") +PrintData(bests, path_cpp_database) # ================================================================================================== diff --git a/src/database.cc b/src/database.cc index b7275dad..7f5ac6eb 100644 --- a/src/database.cc +++ b/src/database.cc @@ -34,8 +34,8 @@ const std::vector Database::database = { XgemmSingle, XgemmDouble, XgemmComplexSingle, XgemmComplexDouble, CopySingle, CopyDouble, CopyComplexSingle, CopyComplexDouble, PadSingle, PadDouble, PadComplexSingle, PadComplexDouble, - TraSingle, TraDouble, TraComplexSingle, TraComplexDouble, - PadTraSingle, PadTraDouble, PadTraComplexSingle, PadTraComplexDouble + TransposeSingle, TransposeDouble, TransposeComplexSingle, TransposeComplexDouble, + PadtransposeSingle, PadtransposeDouble, PadtransposeComplexSingle, PadtransposeComplexDouble }; // ================================================================================================= @@ -78,6 +78,7 @@ Database::Parameters Database::Search(const std::string &this_kernel, const std::string &this_device, const Precision this_precision) const { for (auto &db: database) { + if (db.kernel == this_kernel && db.precision == this_precision) { // Searches for the right vendor and device type, or selects the default if unavailable. This @@ -89,7 +90,8 @@ Database::Parameters Database::Search(const std::string &this_kernel, // Searches for the right device. If the current device is unavailable, selects the vendor // default parameters. This assumes the default is last in the database. for (auto &device: vendor.devices) { - if (device.name == this_device || device.name == kDefaultDevice) { + + if (device.name == this_device || device.name == "default") { // Sets the parameters accordingly return device.parameters; diff --git a/src/routines/level3/xgemm.cc b/src/routines/level3/xgemm.cc index 94aadcad..3961a3fd 100644 --- a/src/routines/level3/xgemm.cc +++ b/src/routines/level3/xgemm.cc @@ -30,7 +30,7 @@ template <> const Precision Xgemm::precision_ = Precision::kComplexDoub // Constructor: forwards to base class constructor template Xgemm::Xgemm(Queue &queue, Event &event, const std::string &name): - Routine(queue, event, name, {"Copy","Pad","Transpose","PadTranspose","Xgemm"}, precision_) { + Routine(queue, event, name, {"Copy","Pad","Transpose","Padtranspose","Xgemm"}, precision_) { source_string_ = #include "../../kernels/level3/copy.opencl" #include "../../kernels/level3/pad.opencl" diff --git a/src/routines/level3/xher2k.cc b/src/routines/level3/xher2k.cc index 5b84decb..e9970fd1 100644 --- a/src/routines/level3/xher2k.cc +++ b/src/routines/level3/xher2k.cc @@ -28,7 +28,7 @@ template <> const Precision Xher2k::precision_ = Precision::kCom // Constructor: forwards to base class constructor template Xher2k::Xher2k(Queue &queue, Event &event, const std::string &name): - Routine(queue, event, name, {"Copy","Pad","Transpose","PadTranspose","Xgemm"}, precision_) { + Routine(queue, event, name, {"Copy","Pad","Transpose","Padtranspose","Xgemm"}, precision_) { source_string_ = #include "../../kernels/level3/copy.opencl" #include "../../kernels/level3/pad.opencl" diff --git a/src/routines/level3/xherk.cc b/src/routines/level3/xherk.cc index 6a915c0b..49fd12af 100644 --- a/src/routines/level3/xherk.cc +++ b/src/routines/level3/xherk.cc @@ -28,7 +28,7 @@ template <> const Precision Xherk::precision_ = Precision::kComp // Constructor: forwards to base class constructor template Xherk::Xherk(Queue &queue, Event &event, const std::string &name): - Routine(queue, event, name, {"Copy","Pad","Transpose","PadTranspose","Xgemm"}, precision_) { + Routine(queue, event, name, {"Copy","Pad","Transpose","Padtranspose","Xgemm"}, precision_) { source_string_ = #include "../../kernels/level3/copy.opencl" #include "../../kernels/level3/pad.opencl" diff --git a/src/routines/level3/xsyr2k.cc b/src/routines/level3/xsyr2k.cc index de5f1afc..966a000f 100644 --- a/src/routines/level3/xsyr2k.cc +++ b/src/routines/level3/xsyr2k.cc @@ -30,7 +30,7 @@ template <> const Precision Xsyr2k::precision_ = Precision::kComplexDou // Constructor: forwards to base class constructor template Xsyr2k::Xsyr2k(Queue &queue, Event &event, const std::string &name): - Routine(queue, event, name, {"Copy","Pad","Transpose","PadTranspose","Xgemm"}, precision_) { + Routine(queue, event, name, {"Copy","Pad","Transpose","Padtranspose","Xgemm"}, precision_) { source_string_ = #include "../../kernels/level3/copy.opencl" #include "../../kernels/level3/pad.opencl" diff --git a/src/routines/level3/xsyrk.cc b/src/routines/level3/xsyrk.cc index d8fc6357..630cb731 100644 --- a/src/routines/level3/xsyrk.cc +++ b/src/routines/level3/xsyrk.cc @@ -30,7 +30,7 @@ template <> const Precision Xsyrk::precision_ = Precision::kComplexDoub // Constructor: forwards to base class constructor template Xsyrk::Xsyrk(Queue &queue, Event &event, const std::string &name): - Routine(queue, event, name, {"Copy","Pad","Transpose","PadTranspose","Xgemm"}, precision_) { + Routine(queue, event, name, {"Copy","Pad","Transpose","Padtranspose","Xgemm"}, precision_) { source_string_ = #include "../../kernels/level3/copy.opencl" #include "../../kernels/level3/pad.opencl" From 310d05d187b4b36413477e054d8f8dbc032dde1c Mon Sep 17 00:00:00 2001 From: Cedric Nugteren Date: Sat, 30 Jan 2016 11:52:21 +0100 Subject: [PATCH 13/50] Updated to version 4.0 of the CLCudaAPI header --- include/internal/clpp11.h | 115 ++++++++++++++++++++++++++------------ 1 file changed, 78 insertions(+), 37 deletions(-) diff --git a/include/internal/clpp11.h b/include/internal/clpp11.h index df7a0d82..104a6436 100644 --- a/include/internal/clpp11.h +++ b/include/internal/clpp11.h @@ -76,7 +76,7 @@ class Event { explicit Event(const cl_event event): event_(event) { } // Regular constructor - explicit Event() { } + explicit Event(): event_(nullptr) { } // Retrieves the elapsed time of the last recorded event. Note that no error checking is done on // the 'clGetEventProfilingInfo' function, since there is a bug in Apple's OpenCL implementation: @@ -119,6 +119,13 @@ class Platform { platform_ = platforms[platform_id]; } + // Returns the number of devices on this platform + size_t NumDevices() const { + auto result = cl_uint{0}; + CheckError(clGetDeviceIDs(platform_, CL_DEVICE_TYPE_ALL, 0, nullptr, &result)); + return static_cast(result); + } + // Accessor to the private data-member const cl_platform_id& operator()() const { return platform_; } private: @@ -136,11 +143,11 @@ class Device { // Initialize the device. Note that this constructor can throw exceptions! explicit Device(const Platform &platform, const size_t device_id) { - auto num_devices = cl_uint{0}; - CheckError(clGetDeviceIDs(platform(), CL_DEVICE_TYPE_ALL, 0, nullptr, &num_devices)); + auto num_devices = platform.NumDevices(); if (num_devices == 0) { Error("no devices found"); } auto devices = std::vector(num_devices); - CheckError(clGetDeviceIDs(platform(), CL_DEVICE_TYPE_ALL, num_devices, devices.data(), nullptr)); + CheckError(clGetDeviceIDs(platform(), CL_DEVICE_TYPE_ALL, static_cast(num_devices), + devices.data(), nullptr)); if (device_id >= num_devices) { Error("invalid device ID "+std::to_string(device_id)); } device_ = devices[device_id]; } @@ -172,6 +179,7 @@ class Device { size_t CoreClock() const { return GetInfo(CL_DEVICE_MAX_CLOCK_FREQUENCY); } size_t ComputeUnits() const { return GetInfo(CL_DEVICE_MAX_COMPUTE_UNITS); } size_t MemorySize() const { return GetInfo(CL_DEVICE_GLOBAL_MEM_SIZE); } + size_t MaxAllocSize() const { return GetInfo(CL_DEVICE_MAX_MEM_ALLOC_SIZE); } size_t MemoryClock() const { return 0; } // Not exposed in OpenCL size_t MemoryBusWidth() const { return 0; } // Not exposed in OpenCL @@ -225,7 +233,7 @@ class Device { auto result = std::string{}; result.resize(bytes); CheckError(clGetDeviceInfo(device_, info, bytes, &result[0], nullptr)); - return std::string{result.c_str()}; + return std::string{result.c_str()}; // Removes any trailing '\0'-characters } }; @@ -342,7 +350,12 @@ class Queue { queue_(new cl_command_queue, [](cl_command_queue* s) { CheckError(clReleaseCommandQueue(*s)); delete s; }) { auto status = CL_SUCCESS; - *queue_ = clCreateCommandQueue(context(), device(), CL_QUEUE_PROFILING_ENABLE, &status); + #ifdef CL_VERSION_2_0 + cl_queue_properties properties[] = {CL_QUEUE_PROPERTIES, CL_QUEUE_PROFILING_ENABLE, 0}; + *queue_ = clCreateCommandQueueWithProperties(context(), device(), properties, &status); + #else + *queue_ = clCreateCommandQueue(context(), device(), CL_QUEUE_PROFILING_ENABLE, &status); + #endif CheckError(status); } @@ -408,7 +421,7 @@ class BufferHost { // ================================================================================================= // Enumeration of buffer access types -enum class BufferAccess { kReadOnly, kWriteOnly, kReadWrite }; +enum class BufferAccess { kReadOnly, kWriteOnly, kReadWrite, kNotOwned }; // C++11 version of 'cl_mem' template @@ -418,13 +431,17 @@ class Buffer { // Constructor based on the regular OpenCL data-type: memory management is handled elsewhere explicit Buffer(const cl_mem buffer): buffer_(new cl_mem), - access_(BufferAccess::kReadWrite) { + access_(BufferAccess::kNotOwned) { *buffer_ = buffer; } - // Regular constructor with memory management + // Regular constructor with memory management. If this class does not own the buffer object, then + // the memory will not be freed automatically afterwards. explicit Buffer(const Context &context, const BufferAccess access, const size_t size): - buffer_(new cl_mem, [](cl_mem* m) { CheckError(clReleaseMemObject(*m)); delete m; }), + buffer_(new cl_mem, [access](cl_mem* m) { + if (access != BufferAccess::kNotOwned) { CheckError(clReleaseMemObject(*m)); } + delete m; + }), access_(access) { auto flags = cl_mem_flags{CL_MEM_READ_WRITE}; if (access_ == BufferAccess::kReadOnly) { flags = CL_MEM_READ_ONLY; } @@ -439,57 +456,74 @@ class Buffer { Buffer(context, BufferAccess::kReadWrite, size) { } + // Constructs a new buffer based on an existing host-container + template + explicit Buffer(const Context &context, const Queue &queue, Iterator start, Iterator end): + Buffer(context, BufferAccess::kReadWrite, static_cast(end - start)) { + auto size = static_cast(end - start); + auto pointer = &*start; + CheckError(clEnqueueWriteBuffer(queue(), *buffer_, CL_FALSE, 0, size*sizeof(T), pointer, 0, + nullptr, nullptr)); + queue.Finish(); + } + // Copies from device to host: reading the device buffer a-synchronously - void ReadAsync(const Queue &queue, const size_t size, T* host) { + void ReadAsync(const Queue &queue, const size_t size, T* host, const size_t offset = 0) { if (access_ == BufferAccess::kWriteOnly) { Error("reading from a write-only buffer"); } - CheckError(clEnqueueReadBuffer(queue(), *buffer_, CL_FALSE, 0, size*sizeof(T), host, 0, - nullptr, nullptr)); + CheckError(clEnqueueReadBuffer(queue(), *buffer_, CL_FALSE, offset*sizeof(T), size*sizeof(T), + host, 0, nullptr, nullptr)); } - void ReadAsync(const Queue &queue, const size_t size, std::vector &host) { + void ReadAsync(const Queue &queue, const size_t size, std::vector &host, + const size_t offset = 0) { if (host.size() < size) { Error("target host buffer is too small"); } - ReadAsync(queue, size, host.data()); + ReadAsync(queue, size, host.data(), offset); } - void ReadAsync(const Queue &queue, const size_t size, BufferHost &host) { + void ReadAsync(const Queue &queue, const size_t size, BufferHost &host, + const size_t offset = 0) { if (host.size() < size) { Error("target host buffer is too small"); } - ReadAsync(queue, size, host.data()); + ReadAsync(queue, size, host.data(), offset); } // Copies from device to host: reading the device buffer - void Read(const Queue &queue, const size_t size, T* host) { - ReadAsync(queue, size, host); + void Read(const Queue &queue, const size_t size, T* host, const size_t offset = 0) { + ReadAsync(queue, size, host, offset); queue.Finish(); } - void Read(const Queue &queue, const size_t size, std::vector &host) { - Read(queue, size, host.data()); + void Read(const Queue &queue, const size_t size, std::vector &host, const size_t offset = 0) { + Read(queue, size, host.data(), offset); } - void Read(const Queue &queue, const size_t size, BufferHost &host) { - Read(queue, size, host.data()); + void Read(const Queue &queue, const size_t size, BufferHost &host, const size_t offset = 0) { + Read(queue, size, host.data(), offset); } // Copies from host to device: writing the device buffer a-synchronously - void WriteAsync(const Queue &queue, const size_t size, const T* host) { + void WriteAsync(const Queue &queue, const size_t size, const T* host, const size_t offset = 0) { if (access_ == BufferAccess::kReadOnly) { Error("writing to a read-only buffer"); } - if (GetSize() < size*sizeof(T)) { Error("target device buffer is too small"); } - CheckError(clEnqueueWriteBuffer(queue(), *buffer_, CL_FALSE, 0, size*sizeof(T), host, 0, - nullptr, nullptr)); + if (GetSize() < (offset+size)*sizeof(T)) { Error("target device buffer is too small"); } + CheckError(clEnqueueWriteBuffer(queue(), *buffer_, CL_FALSE, offset*sizeof(T), size*sizeof(T), + host, 0, nullptr, nullptr)); } - void WriteAsync(const Queue &queue, const size_t size, const std::vector &host) { - WriteAsync(queue, size, host.data()); + void WriteAsync(const Queue &queue, const size_t size, const std::vector &host, + const size_t offset = 0) { + WriteAsync(queue, size, host.data(), offset); } - void WriteAsync(const Queue &queue, const size_t size, const BufferHost &host) { - WriteAsync(queue, size, host.data()); + void WriteAsync(const Queue &queue, const size_t size, const BufferHost &host, + const size_t offset = 0) { + WriteAsync(queue, size, host.data(), offset); } // Copies from host to device: writing the device buffer - void Write(const Queue &queue, const size_t size, const T* host) { - WriteAsync(queue, size, host); + void Write(const Queue &queue, const size_t size, const T* host, const size_t offset = 0) { + WriteAsync(queue, size, host, offset); queue.Finish(); } - void Write(const Queue &queue, const size_t size, const std::vector &host) { - Write(queue, size, host.data()); + void Write(const Queue &queue, const size_t size, const std::vector &host, + const size_t offset = 0) { + Write(queue, size, host.data(), offset); } - void Write(const Queue &queue, const size_t size, const BufferHost &host) { - Write(queue, size, host.data()); + void Write(const Queue &queue, const size_t size, const BufferHost &host, + const size_t offset = 0) { + Write(queue, size, host.data(), offset); } // Copies the contents of this buffer into another device buffer @@ -573,6 +607,13 @@ class Kernel { 0, nullptr, &(event()))); } + // As above, but with the default local workgroup size + void Launch(const Queue &queue, const std::vector &global, Event &event) { + CheckError(clEnqueueNDRangeKernel(queue(), *kernel_, static_cast(global.size()), + nullptr, global.data(), nullptr, + 0, nullptr, &(event()))); + } + // Accessor to the private data-member const cl_kernel& operator()() const { return *kernel_; } private: From f573fe6bb3ac68dffbcca9da4c17c8bf0af178cf Mon Sep 17 00:00:00 2001 From: Cedric Nugteren Date: Sat, 30 Jan 2016 11:53:54 +0100 Subject: [PATCH 14/50] Fixed a bug in the graph scripts (thanks to Victor Pakhomov) --- test/performance/graphs/common.r | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/performance/graphs/common.r b/test/performance/graphs/common.r index 34a59c43..5b3e6e52 100644 --- a/test/performance/graphs/common.r +++ b/test/performance/graphs/common.r @@ -63,7 +63,7 @@ main <- function(routine_name, precision, test_names, test_values, if (precision == 64) { display_name <- gsub("^X","D",display_name); } if (precision == 3232) { display_name <- gsub("^X","C",display_name); } if (precision == 6464) { display_name <- gsub("^X","Z",display_name); } - executable <- paste("./client_", routine_name, sep="") + executable <- paste("./clblast_client_", routine_name, sep="") # Configures the outputfile pdf(paste(display_name, ".pdf", sep=""), height=8, width=13) From 44fb40e5c464a532d683352b73123c77d1a5a9f7 Mon Sep 17 00:00:00 2001 From: Cedric Nugteren Date: Sat, 30 Jan 2016 11:54:29 +0100 Subject: [PATCH 15/50] Prepared for MSVC support --- CMakeLists.txt | 22 ++++++++++------- README.md | 3 +++ test/correctness/tester.cc | 50 +++++++++++++++++++------------------- test/performance/client.cc | 10 ++++---- 4 files changed, 46 insertions(+), 39 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 40119c4e..bc4a9ddd 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -55,16 +55,20 @@ elseif ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "MSVC") endif() # C++ compiler settings -set(FLAGS "-O3 -std=c++11") -if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU") - set(FLAGS "${FLAGS} -Wall -Wno-comment -Wno-return-type -Wno-switch -Wno-missing-noreturn") - if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS 4.9.0) - set(FLAGS "${FLAGS} -Wno-attributes -Wno-unused-variable") +if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "MSVC") + set(FLAGS "/Ox") +else () + set(FLAGS "-O3 -std=c++11") + if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU") + set(FLAGS "${FLAGS} -Wall -Wno-comment -Wno-return-type -Wno-switch -Wno-missing-noreturn") + if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS 4.9.0) + set(FLAGS "${FLAGS} -Wno-attributes -Wno-unused-variable") + endif() + elseif ("${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang") + set(FLAGS "${FLAGS} -Weverything -Wno-c++98-compat -Wno-c++98-compat-pedantic -Wno-padded") + set(FLAGS "${FLAGS} -Wno-missing-prototypes -Wno-float-equal -Wno-switch-enum -Wno-switch") + set(FLAGS "${FLAGS} -Wno-exit-time-destructors -Wno-global-constructors -Wno-missing-noreturn") endif() -elseif ("${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang") - set(FLAGS "${FLAGS} -Weverything -Wno-c++98-compat -Wno-c++98-compat-pedantic -Wno-padded") - set(FLAGS "${FLAGS} -Wno-missing-prototypes -Wno-float-equal -Wno-switch-enum -Wno-switch") - set(FLAGS "${FLAGS} -Wno-exit-time-destructors -Wno-global-constructors -Wno-missing-noreturn") endif() set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${FLAGS}") diff --git a/README.md b/README.md index 8c7870a2..491ce489 100644 --- a/README.md +++ b/README.md @@ -41,10 +41,13 @@ The pre-requisites for compilation of CLBlast are: - Clang 3.3 or newer - AppleClang 5.0 or newer - ICC 14.0 or newer + - MSVC (Visual Studio) 2015 or newer * An OpenCL 1.1 or newer library, for example: - Apple OpenCL - NVIDIA CUDA SDK - AMD APP SDK + - Intel OpenCL + - Beignet An example of an out-of-source build (starting from the root of the CLBlast folder): diff --git a/test/correctness/tester.cc b/test/correctness/tester.cc index 350865f0..d9836500 100644 --- a/test/correctness/tester.cc +++ b/test/correctness/tester.cc @@ -80,11 +80,11 @@ template Tester::~Tester() { if (PrecisionSupported(device_)) { fprintf(stdout, "* Completed all test-cases for this routine. Results:\n"); - fprintf(stdout, " %lu test(s) passed\n", tests_passed_); + fprintf(stdout, " %zu test(s) passed\n", tests_passed_); if (tests_skipped_ > 0) { fprintf(stdout, "%s", kPrintWarning.c_str()); } - fprintf(stdout, " %lu test(s) skipped%s\n", tests_skipped_, kPrintEnd.c_str()); + fprintf(stdout, " %zu test(s) skipped%s\n", tests_skipped_, kPrintEnd.c_str()); if (tests_failed_ > 0) { fprintf(stdout, "%s", kPrintError.c_str()); } - fprintf(stdout, " %lu test(s) failed%s\n", tests_failed_, kPrintEnd.c_str()); + fprintf(stdout, " %zu test(s) failed%s\n", tests_failed_, kPrintEnd.c_str()); } fprintf(stdout, "\n"); clblasTeardown(); @@ -129,29 +129,29 @@ void Tester::TestEnd() { fprintf(stdout, " Status code %d (expected %d): ", entry.status_found, entry.status_expect); } for (auto &o: options_) { - if (o == kArgM) { fprintf(stdout, "%s=%lu ", kArgM, entry.args.m); } - if (o == kArgN) { fprintf(stdout, "%s=%lu ", kArgN, entry.args.n); } - if (o == kArgK) { fprintf(stdout, "%s=%lu ", kArgK, entry.args.k); } - if (o == kArgKU) { fprintf(stdout, "%s=%lu ", kArgKU, entry.args.ku); } - if (o == kArgKL) { fprintf(stdout, "%s=%lu ", kArgKL, entry.args.kl); } + if (o == kArgM) { fprintf(stdout, "%s=%zu ", kArgM, entry.args.m); } + if (o == kArgN) { fprintf(stdout, "%s=%zu ", kArgN, entry.args.n); } + if (o == kArgK) { fprintf(stdout, "%s=%zu ", kArgK, entry.args.k); } + if (o == kArgKU) { fprintf(stdout, "%s=%zu ", kArgKU, entry.args.ku); } + if (o == kArgKL) { fprintf(stdout, "%s=%zu ", kArgKL, entry.args.kl); } if (o == kArgLayout) { fprintf(stdout, "%s=%d ", kArgLayout, entry.args.layout);} if (o == kArgATransp) { fprintf(stdout, "%s=%d ", kArgATransp, entry.args.a_transpose);} if (o == kArgBTransp) { fprintf(stdout, "%s=%d ", kArgBTransp, entry.args.b_transpose);} if (o == kArgSide) { fprintf(stdout, "%s=%d ", kArgSide, entry.args.side);} if (o == kArgTriangle) { fprintf(stdout, "%s=%d ", kArgTriangle, entry.args.triangle);} if (o == kArgDiagonal) { fprintf(stdout, "%s=%d ", kArgDiagonal, entry.args.diagonal);} - if (o == kArgXInc) { fprintf(stdout, "%s=%lu ", kArgXInc, entry.args.x_inc);} - if (o == kArgYInc) { fprintf(stdout, "%s=%lu ", kArgYInc, entry.args.y_inc);} - if (o == kArgXOffset) { fprintf(stdout, "%s=%lu ", kArgXOffset, entry.args.x_offset);} - if (o == kArgYOffset) { fprintf(stdout, "%s=%lu ", kArgYOffset, entry.args.y_offset);} - if (o == kArgALeadDim) { fprintf(stdout, "%s=%lu ", kArgALeadDim, entry.args.a_ld);} - if (o == kArgBLeadDim) { fprintf(stdout, "%s=%lu ", kArgBLeadDim, entry.args.b_ld);} - if (o == kArgCLeadDim) { fprintf(stdout, "%s=%lu ", kArgCLeadDim, entry.args.c_ld);} - if (o == kArgAOffset) { fprintf(stdout, "%s=%lu ", kArgAOffset, entry.args.a_offset);} - if (o == kArgBOffset) { fprintf(stdout, "%s=%lu ", kArgBOffset, entry.args.b_offset);} - if (o == kArgCOffset) { fprintf(stdout, "%s=%lu ", kArgCOffset, entry.args.c_offset);} - if (o == kArgAPOffset) { fprintf(stdout, "%s=%lu ", kArgAPOffset, entry.args.ap_offset);} - if (o == kArgDotOffset){ fprintf(stdout, "%s=%lu ", kArgDotOffset, entry.args.dot_offset);} + if (o == kArgXInc) { fprintf(stdout, "%s=%zu ", kArgXInc, entry.args.x_inc);} + if (o == kArgYInc) { fprintf(stdout, "%s=%zu ", kArgYInc, entry.args.y_inc);} + if (o == kArgXOffset) { fprintf(stdout, "%s=%zu ", kArgXOffset, entry.args.x_offset);} + if (o == kArgYOffset) { fprintf(stdout, "%s=%zu ", kArgYOffset, entry.args.y_offset);} + if (o == kArgALeadDim) { fprintf(stdout, "%s=%zu ", kArgALeadDim, entry.args.a_ld);} + if (o == kArgBLeadDim) { fprintf(stdout, "%s=%zu ", kArgBLeadDim, entry.args.b_ld);} + if (o == kArgCLeadDim) { fprintf(stdout, "%s=%zu ", kArgCLeadDim, entry.args.c_ld);} + if (o == kArgAOffset) { fprintf(stdout, "%s=%zu ", kArgAOffset, entry.args.a_offset);} + if (o == kArgBOffset) { fprintf(stdout, "%s=%zu ", kArgBOffset, entry.args.b_offset);} + if (o == kArgCOffset) { fprintf(stdout, "%s=%zu ", kArgCOffset, entry.args.c_offset);} + if (o == kArgAPOffset) { fprintf(stdout, "%s=%zu ", kArgAPOffset, entry.args.ap_offset);} + if (o == kArgDotOffset){ fprintf(stdout, "%s=%zu ", kArgDotOffset, entry.args.dot_offset);} } fprintf(stdout, "\n"); } @@ -159,18 +159,18 @@ void Tester::TestEnd() { // Prints a test summary auto pass_rate = 100*num_passed_ / static_cast(num_passed_ + num_skipped_ + num_failed_); fprintf(stdout, " Pass rate %s%5.1lf%%%s:", kPrintMessage.c_str(), pass_rate, kPrintEnd.c_str()); - fprintf(stdout, " %lu passed /", num_passed_); + fprintf(stdout, " %zu passed /", num_passed_); if (num_skipped_ != 0) { - fprintf(stdout, " %s%lu skipped%s /", kPrintWarning.c_str(), num_skipped_, kPrintEnd.c_str()); + fprintf(stdout, " %s%zu skipped%s /", kPrintWarning.c_str(), num_skipped_, kPrintEnd.c_str()); } else { - fprintf(stdout, " %lu skipped /", num_skipped_); + fprintf(stdout, " %zu skipped /", num_skipped_); } if (num_failed_ != 0) { - fprintf(stdout, " %s%lu failed%s\n", kPrintError.c_str(), num_failed_, kPrintEnd.c_str()); + fprintf(stdout, " %s%zu failed%s\n", kPrintError.c_str(), num_failed_, kPrintEnd.c_str()); } else { - fprintf(stdout, " %lu failed\n", num_failed_); + fprintf(stdout, " %zu failed\n", num_failed_); } } diff --git a/test/performance/client.cc b/test/performance/client.cc index fb248854..a3b592cb 100644 --- a/test/performance/client.cc +++ b/test/performance/client.cc @@ -48,11 +48,11 @@ Arguments Client::ParseArguments(int argc, char *argv[], const GetMetric for (auto &o: options_) { // Data-sizes - if (o == kArgM) { args.m = GetArgument(argc, argv, help, kArgM, 512UL); } - if (o == kArgN) { args.n = GetArgument(argc, argv, help, kArgN, 512UL); } - if (o == kArgK) { args.k = GetArgument(argc, argv, help, kArgK, 512UL); } - if (o == kArgKU) { args.ku = GetArgument(argc, argv, help, kArgKU, 128UL); } - if (o == kArgKL) { args.kl = GetArgument(argc, argv, help, kArgKL, 128UL); } + if (o == kArgM) { args.m = GetArgument(argc, argv, help, kArgM, size_t{512}); } + if (o == kArgN) { args.n = GetArgument(argc, argv, help, kArgN, size_t{512}); } + if (o == kArgK) { args.k = GetArgument(argc, argv, help, kArgK, size_t{512}); } + if (o == kArgKU) { args.ku = GetArgument(argc, argv, help, kArgKU, size_t{128}); } + if (o == kArgKL) { args.kl = GetArgument(argc, argv, help, kArgKL, size_t{128}); } // Data-layouts if (o == kArgLayout) { args.layout = GetArgument(argc, argv, help, kArgLayout, Layout::kRowMajor); } From 9622d3be22d062765a39e664725e9aad42f62014 Mon Sep 17 00:00:00 2001 From: CNugteren Date: Sat, 30 Jan 2016 14:57:49 +0100 Subject: [PATCH 16/50] Fixes for compilation under Visual Studio --- CMakeLists.txt | 1 + cmake/Modules/FindclBLAS.cmake | 2 +- src/utilities.cc | 9 +++++++-- test/correctness/testblas.cc | 2 +- 4 files changed, 10 insertions(+), 4 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index bc4a9ddd..74b1e9b1 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -57,6 +57,7 @@ endif() # C++ compiler settings if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "MSVC") set(FLAGS "/Ox") + set(FLAGS "${FLAGS} /wd4715") else () set(FLAGS "-O3 -std=c++11") if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU") diff --git a/cmake/Modules/FindclBLAS.cmake b/cmake/Modules/FindclBLAS.cmake index be6f8af4..6a153de9 100644 --- a/cmake/Modules/FindclBLAS.cmake +++ b/cmake/Modules/FindclBLAS.cmake @@ -45,7 +45,7 @@ mark_as_advanced(CLBLAS_INCLUDE_DIRS) find_library(CLBLAS_LIBRARIES NAMES clBLAS HINTS ${CLBLAS_HINTS} - PATH_SUFFIXES lib lib64 lib/x86_64 lib/x64 lib/x86 lib/Win32 + PATH_SUFFIXES lib lib64 lib/x86_64 lib/x64 lib/x86 lib/Win32 lib/import lib64/import PATHS ${CLBLAS_PATHS} DOC "clBLAS library" ) diff --git a/src/utilities.cc b/src/utilities.cc index 042b3116..24efb14c 100644 --- a/src/utilities.cc +++ b/src/utilities.cc @@ -103,7 +103,13 @@ std::string ToString(Precision value) { // both the real and imaginary parts. template T ConvertArgument(const char* value) { - return static_cast(std::stod(value)); + return static_cast(std::stoi(value)); +} +template <> float ConvertArgument(const char* value) { + return static_cast(std::stod(value)); +} +template <> double ConvertArgument(const char* value) { + return static_cast(std::stod(value)); } template <> float2 ConvertArgument(const char* value) { auto val = static_cast(std::stod(value)); @@ -139,7 +145,6 @@ T GetArgument(const int argc, char *argv[], std::string &help, } // Compiles the above function -template bool GetArgument(const int, char **, std::string&, const std::string&, const bool); template int GetArgument(const int, char **, std::string&, const std::string&, const int); template size_t GetArgument(const int, char **, std::string&, const std::string&, const size_t); template float GetArgument(const int, char **, std::string&, const std::string&, const float); diff --git a/test/correctness/testblas.cc b/test/correctness/testblas.cc index 85e18381..febd7504 100644 --- a/test/correctness/testblas.cc +++ b/test/correctness/testblas.cc @@ -35,7 +35,7 @@ TestBlas::TestBlas(int argc, char *argv[], const bool silent, const Routine run_routine, const Routine run_reference, const ResultGet get_result, const ResultIndex get_index, const ResultIterator get_id1, const ResultIterator get_id2): - Tester{argc, argv, silent, name, options}, + Tester(argc, argv, silent, name, options), run_routine_(run_routine), run_reference_(run_reference), get_result_(get_result), From fbf071ba6299e053f4cf4011168d80bf877f3a07 Mon Sep 17 00:00:00 2001 From: CNugteren Date: Sat, 6 Feb 2016 10:53:44 +0100 Subject: [PATCH 17/50] Fixed a linker error in the performance client under GCC --- include/internal/utilities.h | 2 +- test/performance/client.cc | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/include/internal/utilities.h b/include/internal/utilities.h index bd174ccb..ed17271f 100644 --- a/include/internal/utilities.h +++ b/include/internal/utilities.h @@ -125,7 +125,7 @@ struct Arguments { // Tuner-specific arguments double fraction = 1.0; // Client-specific arguments - bool compare_clblas = 1; + int compare_clblas = 1; size_t step = 1; size_t num_steps = 0; size_t num_runs = 10; diff --git a/test/performance/client.cc b/test/performance/client.cc index a3b592cb..c0c91aec 100644 --- a/test/performance/client.cc +++ b/test/performance/client.cc @@ -89,7 +89,7 @@ Arguments Client::ParseArguments(int argc, char *argv[], const GetMetric args.platform_id = GetArgument(argc, argv, help, kArgPlatform, size_t{0}); args.device_id = GetArgument(argc, argv, help, kArgDevice, size_t{0}); args.precision = GetArgument(argc, argv, help, kArgPrecision, Precision::kSingle); - args.compare_clblas = GetArgument(argc, argv, help, kArgCompareclblas, true); + args.compare_clblas = GetArgument(argc, argv, help, kArgCompareclblas, 1); args.step = GetArgument(argc, argv, help, kArgStepSize, size_t{1}); args.num_steps = GetArgument(argc, argv, help, kArgNumSteps, size_t{0}); args.num_runs = GetArgument(argc, argv, help, kArgNumRuns, size_t{10}); From 40346bb3a551f14afa5465d7708d8d31102e475e Mon Sep 17 00:00:00 2001 From: CNugteren Date: Sat, 6 Feb 2016 12:09:21 +0100 Subject: [PATCH 18/50] Reduced unrolling factor in xgemv kernel to reduce compilation times --- src/kernels/level2/xgemv.opencl | 33 ++++++++++++++++++++------------- src/tuning/xgemv.cc | 4 ++-- 2 files changed, 22 insertions(+), 15 deletions(-) diff --git a/src/kernels/level2/xgemv.opencl b/src/kernels/level2/xgemv.opencl index 8ed0e9e4..908d7d13 100644 --- a/src/kernels/level2/xgemv.opencl +++ b/src/kernels/level2/xgemv.opencl @@ -27,6 +27,9 @@ R"( #ifndef WPT1 #define WPT1 1 // The amount of work-per-thread #endif +#ifndef UNROLL1 + #define UNROLL1 32 // Unroll factor (must be a divider of WGS1) +#endif // 2: For the fast version #ifndef WGS2 @@ -301,28 +304,31 @@ __kernel void Xgemv(const int m, const int n, const real alpha, const real beta, barrier(CLK_LOCAL_MEM_FENCE); // Loops over the work per thread, and checks whether in bounds - #pragma unroll for (int w=0; w Date: Sat, 6 Feb 2016 12:48:42 +0100 Subject: [PATCH 19/50] Changed the order of tuners in the alltuners target --- CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 74b1e9b1..5918d3eb 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -107,14 +107,14 @@ include_directories(${clblast_SOURCE_DIR}/include ${OPENCL_INCLUDE_DIRS}) # ================================================================================================== # Sets the supported routines and the used kernels. New routines and kernels should be added here. -set(KERNELS copy pad transpose padtranspose xaxpy xdot xgemv xgemm) +set(KERNELS copy pad transpose padtranspose xaxpy xdot xgemm xgemv) set(SAMPLE_PROGRAMS_CPP sgemm) set(SAMPLE_PROGRAMS_C sgemm) set(LEVEL1_ROUTINES xswap xscal xcopy xaxpy xdot xdotu xdotc) set(LEVEL2_ROUTINES xgemv xgbmv xhemv xhbmv xhpmv xsymv xsbmv xspmv xtrmv xtbmv xtpmv) set(LEVEL3_ROUTINES xgemm xsymm xhemm xsyrk xherk xsyr2k xher2k xtrmm) set(ROUTINES ${LEVEL1_ROUTINES} ${LEVEL2_ROUTINES} ${LEVEL3_ROUTINES}) -set(PRECISIONS 32 3232 64 6464) +set(PRECISIONS 32 64 3232 6464) # ================================================================================================== From b7900652b2e4b4ed887b259e29ef5e660815c6f7 Mon Sep 17 00:00:00 2001 From: CNugteren Date: Sat, 6 Feb 2016 13:07:19 +0100 Subject: [PATCH 20/50] Reduced the maximum workgroup-size for GEMV kernels further --- src/tuning/xgemv.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tuning/xgemv.cc b/src/tuning/xgemv.cc index c3cf9b7f..9861fb2b 100644 --- a/src/tuning/xgemv.cc +++ b/src/tuning/xgemv.cc @@ -60,7 +60,7 @@ class TuneXgemv { // Sets the tuning parameters and their possible values static void SetParameters(cltune::Tuner &tuner, const size_t id) { - tuner.AddParameter(id, "WGS"+std::to_string(V), {64, 128, 256, 512}); + tuner.AddParameter(id, "WGS"+std::to_string(V), {64, 128, 256}); tuner.AddParameter(id, "WPT"+std::to_string(V), {1, 2, 4}); if (V==2 || V==3) { tuner.AddParameter(id, "VW"+std::to_string(V), {1, 2, 4, 8}); } } From 704a729f5cbcf5b319fe9ce6e0436554d8b5d9b0 Mon Sep 17 00:00:00 2001 From: CNugteren Date: Sat, 6 Feb 2016 13:11:36 +0100 Subject: [PATCH 21/50] Made the database script compatible with Python 3 --- scripts/database/database.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/scripts/database/database.py b/scripts/database/database.py index 01662a4b..66169121 100644 --- a/scripts/database/database.py +++ b/scripts/database/database.py @@ -200,7 +200,7 @@ def PrintData(df, outputdir): # Checks for the number of command-line arguments if len(sys.argv) != 3: - print "[ERROR] Usage: database.py " + print("[ERROR] Usage: database.py ") sys.exit() # Parses the command-line arguments @@ -212,10 +212,10 @@ glob_json = os.path.join(path_json, "*.json") # Checks whether the command-line arguments are valid; exists otherwise clblast_h = os.path.join(path_clblast, "include", "clblast.h") # Not used but just for validation if not os.path.isfile(clblast_h): - print "[ERROR] The path '"+path_clblast+"' does not point to the root of the CLBlast library" + print("[ERROR] The path '"+path_clblast+"' does not point to the root of the CLBlast library") sys.exit() if len(glob.glob(glob_json)) < 1: - print "## The path '"+path_json+"' does not contain any JSON files" + print("## The path '"+path_json+"' does not contain any JSON files") # ================================================================================================== # The main body of the script @@ -229,7 +229,7 @@ database = LoadDatabase(file_db) if db_exists else pd.DataFrame() for file_json in glob.glob(glob_json): # Loads the newly imported data - print "## Processing '"+file_json+"'", + sys.stdout.write("## Processing '"+file_json+"'") imported_data = ImportDataFromFile(file_json) # Adds the new data to the database @@ -237,7 +237,7 @@ for file_json in glob.glob(glob_json): database = ConcatenateData(database, imported_data) database = RemoveDuplicates(database) new_size = len(database.index) - print "with "+str(new_size-old_size)+" new items" + print("with "+str(new_size-old_size)+" new items") # Stores the new database back to disk SaveDatabase(database, file_db) @@ -251,7 +251,7 @@ bests = ConcatenateData(bests, defaults) # Outputs the data as a C++ database path_cpp_database = os.path.join(path_clblast, "include", "internal", "database") -print "## Producing a C++ database in '"+path_cpp_database+"'" +print("## Producing a C++ database in '"+path_cpp_database+"'") PrintData(bests, path_cpp_database) # ================================================================================================== From c76f1d9dbb77746012c688a8d60ed45559ad0a4a Mon Sep 17 00:00:00 2001 From: Cedric Nugteren Date: Sun, 7 Feb 2016 10:59:51 +0100 Subject: [PATCH 22/50] Made the tuning database an optional external download --- .gitignore | 3 ++- scripts/database/database.db | Bin 2094065 -> 0 bytes scripts/database/database.py | 24 ++++++++++++++++++++++-- 3 files changed, 24 insertions(+), 3 deletions(-) delete mode 100644 scripts/database/database.db diff --git a/.gitignore b/.gitignore index de7becef..6bc958fc 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,5 @@ build stash .* -*.pyc \ No newline at end of file +*.pyc +*.db \ No newline at end of file diff --git a/scripts/database/database.db b/scripts/database/database.db deleted file mode 100644 index bf793177d3a963daf9f82884ba198d9f8d6a722e..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 2094065 zcmeF)>4Rn2RUP;g2#jQ7WQ^x2Jqr&w01tT7V@4JdvQRQ~H(e^E0;!Qo3aMn9Zqs0Q zW4C+!Gy31-qrY|Mt9ySga+NmcM!d|3$jp@=YSmhM?R{>Xcka3`UPM;?&bNL3Yj3{& zg*U(P1E2ru*WdbqU;Fx-UwP}pKk?=}Z~oM~|91U>@BjDL-}YPA-~Q1jA9~{}Z@u&8 z7oL``Klm%x-|^&wZ@lxHU%UR!Cm%XX@Ah5)p(o$*#=C~^dS>XC-hSt;ufP4~m%s7h zAN%rGKmQw_eDm!$fBmhmfB5=CPiKGlqrY@BXdpKl;%pAAEP!@A>@O-~7tgeslXuzVgj4zw@Oxzy9?%fAhm{eEqG@fA#He zyz}*Me*T>gUw>qCefa4Y{ciR8kA33N?>_m!ul}dcTz~IJKkp%YF+kWmxKJn-iA9!l`zK?$5+n;>k-+k&+pZb^o-_t+u{^JuLd*@gG^RHb0iKjdC z=GTAyjW@siwJ*MT{U@J%*O~n4(;d71Q%}C*{O`YU{inBo`O5X5+5YA2>p%PCJHPPO zzy8wa-@5*DPrh&aw>Lij)vtW*oA12!#y8*o(mUU{{_{`%*nic({?;3xKX2gmUwHC| zckz6Q*MITJcklFHzxDPPzWVj+zw}Gr`RLK3-*~#|Z+-dt@ssb~{_Txld-E$_`tomH z|K%s&_q8|AukO=VUL&{P9$O z@zbBZ{_9Wv=@{!-?`Q+2ZfAspJ@6&$#`um^h{N&SFf9o07 zr=E8H*!ACjpZ4R|f9ILbPd)AYyHEba?_2ZJKl&4&{mhTP@f5Cq;K`qSf8nzq|Jk3s z{(Il5<@_~XJ@chM9kT7;4}L58m#=^5ee%;8zjXb>-%9@F>%afZi}2~s{N#^+{O3RZ zbDz5Y2T#8H_g&`C|LFBUd{+3e>wom*dw##;v!D6o^^c_Vsq26I%!_&6gr}Q${ZHPf zp10%RhhSlAfNCfAP%A@C(2AL)RY*<%h3Np?vcC zM??A4^^ZNH{Nhjl{PmAN^OZeSe(?GyQu?9mpG@h8uYc;r=K-_`m;ZH{WB^3 z(Dlbt`r+$;`Q$@idzvg?`o@>O`u6oFe(8fxZ@G8A^p&@+fA+0+e)Y)*-+p?7U4QaZ z-~5lCx&G7_zjc7U^AG?2o?<`uTi2id=oi1|iy!&odtvmio`%sU|KIQV-7h{3pf3ra z&%7Hz|LoZSdh|4aKKmoT^>qFH&j)t=wjIBH$MYqg|9r;=kIpG^{_}@6c=TO6erU%J z@A!vz{3APl_l|#b$M4zkBRl@F9lv+SKfdGl?f55l{F6KWsU83Hj(=vyKfB|f+wsru z_!oBki#z_M9Ut%bmv{UtJO0%j|Jsg!eaFACAJN}~`e`LphyyHLF@t^Ma&vyLhJN}CuKepp(#~4{KXxAX~$pQ@xR&ezuob_+woU+{O@;sW5>_!_~wqky5r|}{KAfJ?f7du{`!tz z-0@30{tr9;#*Sa!@hdyNz2jGR{MwG!JAQq~Z|wNaj^EtzU+?&DcKo+H{^pMVhojUi9SEJ zeK`93xbO3G*X!T)di{QD+wr5{Z}s)rcHl4X8*$fp*8RWV{lD+eBl`OH^*`^x>+*i` zbvy5`+t=IAo3~fIpEqy&@VWg4cyq^x`g!wyd9O_l_deZT@!qG~K74M!0eYWq8!`R- zcHgFZ%ZdA5|0DYaysSUJ?*45%aNEy2x_{lj^A6nC@1MNr^Y^0X_xFRg9q#W3ZTm3& zeo)Whwgb0)PWAWHw*9;9@2S1$^Y^0Xzw9~tqW9-T&+qOp3wKd+y37+|6cX?zk6&o-}jBUX}&+S{d;KF^S%28J(R!yf9d{Q z+W%eqfDi51&rP?D=;x-}J{9L(8>(AF`d0pWF}1Pwn`; zz7KiT-v@qq->Ky8`1y%(6$f#?+whz;Pf9Q9%@}pPv=M^`7-_rL9 z+krUx-#^=r&kK8B+t+W~;r@Q=wh#UN)NLR7_vN;IcxZoLZo8`2=kL$=96ro**x&!z zUYGv<&$bWG>-T^9er(&%z8~B6A>WT}YM+1o{ip3D-1YB2^>g2C-|y?cXZ<^S0QB>u zeIs`Gz>eRxW1q*{M)c=k+dlN?VB0?Y+J1lh`i@`R@k=|tsQbUs z-&?%&^}DpczbCZq;P>wv`L}kwtKSpq>$mMdzwg-gq2G6G`|$bw#`waHZ|&IU?6wj8 zdGEFlM}OYi|KHWNU$6TA?`n^Uz8~H<;$?e3-21fm>3Ih}xS!we*zr4e{6jnT`<87Z z?(6%O-lyCC_j$VQL+{gVAO6M$kN)P4_x<@&@6+vt=zY5FL+{gVA9|l|`_R9)wC%$e zH+b}-e_r#>uYPg+Kehk-TfcJs*^fT+zhQs!^nb(tiSK{D=ulXOb|HAk` zV*fAS?SFIr__SSr@qYHZvKy#ou z&>UzEGzXdk&4K1XbD%lU9B2+S2bu%Tf#$&du}ef&v(6N_x+*eVXtRR=^m)DQfA`^j_TjGczUytTvwgU)-0!;goBQ|mym@HrHuvu* z_q*@i_wx?)9P}LY9P~bD4m1av1I>ZvKy#ou&>UzEGzXdk&4K1XbD%lU9B2+S2bu%T zf#yJS;5Iqn_Z{vF@B8;1?)&HN_qFfV?f!4MVL$E~?|c96`s>a0?)&RMownEizA*RW zu6g(W^}hf6zHxi~yAR!m^A7YJ^c?gY^gd_~GzXdk&4K1XbD%lU9B2+S2bu%Tf#yJS zpgGVSXbv<7ngh*&=0J0xInW$v4m1av1I>ZvKy#ou&>Xn09JuS>AIY?}O$*bD%lU9B2+S2bu%Tf#yJS zpgGVScqln=SD%Yd>-6*TEf1&tdC_|Jzx#jQfzxT*hr7myl7qdD?e*_IbRW(;&~wmp z&~tF}``|^tUt7)Z`?>ADUG3A0s(1gp|K}Yzowj|rYwUA-+xPCn{p`bw%GINDo4?!Z zessT{x3Bx({Xg%(>9p;`U1M{9+xPB6_u;$)JqJApJqNuHngh*&=0J0xInW$v4m1av z1I>ZvKy#ou&>UzEGzXdk&4K1XbD%lU9C#UW;P*bZy-tr#8-8R*zt7Nm_vhUm*w01J z$95iie{B2EeAxD(`MK>w^Wo*nhrUnUUa#&$_u;$)JqJApJqNuHngh*&=0J0xInW$v z4m1av1I>ZvKy#ou&>UzEGzXdk&4K1XbD%lU9B2+S2bu%Tf#yJSpgGVSXbv<7?kflS z`?y>F^!IVMedzDwZu`*R*WdQxMd!nPZvKy%=s*(uo-hrNj zo`ar)-UqkMfxh3}@}uu}w|(gQ-EANGes|l47o88c&8_xruTS^kvVC~b=ezT_*Qd|d zZ6Er4-S(l+*UNINx!?Ue??BH%&q2>Y?}O$*bD%lU9B2+S2bu%Tf#yJSpgGVSXbv<7 zngh*&=0J0xInW$v4m1av19zVTeh*|qzaKH7-y4|F&(kNI)}>GD(x-Ll)4Fu)OlX}6 ztuvu@CY;u#PwUdBb?MW(bn8rLoe8Zop>-yl)}>GD(x-Ll)4Fu)OlX}6tuvu@CY;u# zPwUdBb?MW(bn8rLoe8Zop>-yl)}>GD(x-Ll)4Fu)OlX}6tuvu@CY;u#PwUdBb?MW( zbn8rLoe8Zop>-yl)}>GD(x-Ll)4Fu)OlX}6tuvu@CY;u#PwUdBb?MW(^uF)z`|iH) zzWevxz3+P8^}g$U*BodLGzXdk&4K1XbD%lU9B2+S2bu%Tf#yJSpgGVSXbv<7ngh*& z=0J0xInW$v4m1av1I>ZvKy#oukU8M@1}F4;gA@9_!3q7|;Dpn<^l4rCv@U&Gmu{U2 ztuvu@CbZ6k)4KF&UHY^xeOi}poe8Zop>-y-&V-y-&V-pq<$a&h_bGj!(i~_GGzXdk&4K1XbD%lU9B2+S z2bu%Tf#yJS;PuUc>2pf@^f@Jc`kazJeNIWY&V<&P&^i-ZXToV+`m`>6T9-bpOSjI1 z)|t>c6Iy4&X6T9-bpOSjI1)|t>c6Iy4& zX(Zxn>DHOhIulxFLhDR8txKQQrBCb9r*-Mpnb0~D zT4zG*OgODepVp;M>(Zxn>DHOhIulxFLhDS}=V_m(_xU`X&OLoP_w?!9)2DM!x6Xvt znb0~DT4%y(UHY^xeOi}3txLDggw~nRIulxF!f9Rlv@U&Gmp-jax6Xvtnb0~DT4%y( zUHY^xeOi}3txLDggw~nRIulxF!f9Rlv@U&Gmp-ja@AI$Ezx#at_5Ahx_5Aho%;rFI zpgGVSXbv<7ngh*&=0J0xInW$v4m1av1I>ZvKy#ou&>UzEGzXdk&4K1XbD%lU9B2+S z2bu%Tf#yJSpgGVSXbv<7ngh*&=0J0xInW$v4%|-;T%ERc;c4Kh^TVu9JoW6)`o!sp zId8?Yf8ypqK0Gn&IJ~W2KQsr2bVy&}iJJrY@Wia+@V0*a&>S4nA$^6DpMDglb@XRF z^%c*4Jp1TS9nvS9d~~RP#nYF5b*SF=G57O6ru*J~Kkq=#LC-UzE zGzVVA9Ps_7?>`|uQ5~w|D@^~y>51k~x;jiBuAZ3n@lIzFMkME=zAsZNLdE96hizCM5Y{CO3hKRwqy*FD$${;E099B2+S z2c{hG{@DBCVOOIgrmWe8POr;gg@9zT}gSo|yf3 zV)jj3-&CLUoA#wYxph*<6Zv7*@u^Nvocj1ybw0=s>2T83As?i}NmoCL=D_Tu!>PW~ zr+$6veO~l=@erODJ%>GqJ%|1Nr8&?XXbv<7ngh*&=0J0xInW$v4m1av1I>ZvKy%<_ z%K@LW``mqnD}UB;nDbM|6Q{oPCD)fat`6yt4xg899?ZEDPhCIE`o!r_KTI8m>7zsS z36J{pul(wXQ$L?Nq(k~`G3O*Vm!7EpvgJyzx9^Age(1gTL+)cZ-RJ5PuKLvV4id`Hn7n0=|^aPm*}^yB=HJ|SNsKOMg*W2?n>u_{434} z)1P|QahU#zuk@*&>+r2`>Z2#-yrXq}=|9T#!>Lal@+GGy^1-aHxIW0AoIc^?r%yP| zoAhZ8ebsl{diJf>s`J70C+CCeE99S0zxs;vCF+B#Iv-5`QNHS5 z)pMQn<1qab=TGFHbalvw-xT%3?58KD59h;giuz&pPdcAEq^~gjIJ|BD)VG?e4$Z;g zO3yl;nDf*pJ?nf>9n#@ZJ$*P_%~Ma*53`P|L;4ESkHhSzPsp!6ar#j-59-5V>JvYz zn|lPPYD z96o414(W+mPtHH->J#cqO#iB${)wBTo}9kIsc+KFQ-`VJaPsx%AN~19fBx~_&p&!U z^nU35(EFh|&>UzEGzXdk&4K1XbD%lU9B2+S2bu%Tf#$&NbKucwJ3sL8&J(ka!x!bx zxzC%s{-Zdpf3!b+=F#!Qo7U66;<;Y(sh@sRG+*DukLur3RL zC#OU8#H{1+Mfr2?^X9JqC{F7i?N6V1bUg8<_4KcJu9tl3r{5IK*EjK_x;aoEq(l0I zd@y~Jp1$Pz68Y%K`+jtLKhb^YKAd-;=b-1H=b-mNbD%lU9B2+S2bu%Tfy;9szyI`o zD9rlA=_`DE+RiN>4(W;Na8>6^)Hmr_Pp*%inDrIUeq7&#H}&yDb8$$AspE;0pMDh0 z<;N3Ob^i3>6R!HyUlh8ooZshNpLc!UUH-i5eboD?_fhYo=0J0xIq>r2fcL}s{un=s z=0JTooOE@VzKN%v{pu5@f5mgo%AfV@pZN3YGbhoysZX3A^5Kc<6Q&UrgPHX&bP`qfj%v!0x9LjD!5`qig7>Tu;# zPyfUxeVUW{P5b#*IIW{E>o`2>GP@2r`PfM z)N|i+-*eyZqnZQFf#yJSpgGVSXbv<7ngh*&=0J0xInW$v4m1ZY&4KCj`bp2v@o|25 zQ~joW<|mp1)$s|_mweTiejH{$9UiUoLw(7Q>iUji&efMX4*B3wee#);$Uh-})+c@P z@x%1t>WS(w>&Y*@&)eVk3w^(EfA1IE&w0Ne<$UnbX*WPz&KKUp8XkYqp_&9Qi4%HzYratjWpXSgfG%qpzS;zTsIO*z%lb@cbpN=O^ z^_8CW+*b>iUji&c~rSM>*e3(K=8+4(Su74~P2j3Dbu|eR!gJ;!$7rCC@oH z9}Xuy>*-5A`6u7xrzhqd9P;CE(x*B<~`m!Hi;p5>sKIu7+531uZbsVNIb#UzEGzXdk&4K1XbD%lU9B2+S2bu%Tf#yJS;PyF?pFim3=J|UmEkRONqiTscs=TDycRG)n5KMnh%A7&p8&6)h_{OMDN>bO3bbv%(jF@5;t zJMH@f(}$bGheJA~PyOjjzdp#1^CzmOpAOX_{ibOB?4z&d^TC{xoX=dyheQ5Ee#npW zCr^ELr9Zd( z?e4>Q2YNsB9P}LYK4=a!2bu%Tf#yJSpgC}P4jlb{mCyI5_=LW%g{!{FmwkNJ#V4Hl zQ#UV>4`v-#e|*~R%Zc;zL3;Ml;mUuso<95(*NBIT^yxZ=p?nC$CyaPQ4JqJApy$_lL&4K2?tDgh;et3G{Jn8yZ zb?>*+hpQh&>q33z(eVlOLB6B?eEJj3gX&Ws9Zr2&=Sy7qPNyyB^zp;g`PCEo6DMEl zr~P`MIed6x)^T%ieK@2;Iy~*yg~Lf#?{joJU)_i9!+8gK4tfrH4j$fpuzK#PPtQAb z&pjOKn~+aG>;$N45*ou4m}4^H(-*9RZRbtdQQ^K(0o-G}bOc?Wt9dJcLHdLJ|g zngh*&=0J0xInW$v4m1av1I>ZvKy#ou&>UzE9L)irv-{lM=kP0Cogd~LdLrM1(|kHK z7oX5NS=XPKe)a5M@$5@(9jKn%I;r!kLpr2GbMVBh<2et9eCE;dM1Gj{?8noeoXlhgGj=U0bxn0+X+1jM)KBMw`m>HhzMGr3QMed5;BhfjV!b^g?+I)5U6BHuJWbsXkA zdg9~A!xh)h2d#%Eo`!Y!)S-EhFHv2eIvt1fME!JrI<8KKspBwxIWM_BnEiBp>QFtA zA7&lTIV-=qzC?BNAsx~o9nw#c5BTKM2l@1+j>E~9I)BdRqp#2$$d8)`Q=j~N>iUzX zFF8LnCw-}BJ^9L?>+!>^`G2UEw7&OJIO=T7taAYby-UB^o2Tg@{k{n@9^ub!AbemK>0&QZU4Q@=Ut$?4`m zK0JNN`BPu@^Xun_biPD&e#obeLq2$0J?ER3y7}}(b!ZL_C*3?g9O}dQ=rDC0@|lyU z&VLkhPWGvr5BaiAhx~l>M0I}k73WKw=J916SD(=J@I(Hr({Jn3Z|=m+PvocP96H}k z>-tt`9k}wV^Czl9_0#^G#vz|N%sLgsUi(+~9}HwUWY`e4@6H}UMl z&4qMGzbQVB_hH$u4s$)aK6N@&PrmX`_5K{@(Yt@T58a3J4)h%K9P}LYK4=a!2bu%T zf#yJSpgGVSXbv<7ngh*&=0J0xIdJ8E`^aCJE8>hP$K|Fn1=_@McT=D^8M zhy3QzAswcU!_#6P`4aV|K5_oMuE~GYm;L&ny7h6$x5D(}iKoT+IJ(Z!xqRm0@D#6a z`X)}-56y*qIHaFqeRar(Pk7X)|Fn1=_@McT=D^8Mhy3QzAswcU!&B^I-;X|e_fPks z`*7ZYo`ar)o`c>8&4K1XbD%lU9B2+S2QJS6-=D(#eiT>F?^9QGeSC@L9o6}At~pR0 zhjf_wt((Y4pRR*%!l^&~=BJLEGx4c^>YH@)C(L!sf%+z{ zUme#sA;0>nZ}Rtfx80ZBhwj692YL>A4tfrHA2bJ=1I>X~KL@-ohTcynsvpJdUvYE! zPN(huOHSuMisq!Bo^$C(ahjVtZthX;`lh}~&-r}twC^+D3iTyspE?~+JX$}+b>^Jp z=EAHe=Yv_Fc>0s~IlA4)-G}bOc?Wt9dJcLH9^QSB&n?e8dZId1$8U?%oYZl6I&JqA zZhaim`5}GsWj%d2<>tZDe*G}#@vTsQ`pkn_*QcI5{S()(jze=H-^BItC8sCyO_=MY zPn~W)9fy37&JWdbKA83N_4&Eo=iP_y!+8gK4tfrH4tgIn2bu%Tf#yJSpgGVSXbv<7 zngh*&=0J0xInW$v4jjz^pTqmyeL}toeU1+&-=v>T+kLB^bMW*{etM!gP#sTHPfXt_ zu0Q=**H6cxemqefnuGJ>aMINiPy5$(w2yCv*5R8tJ@FLRg{RN_M7~L%>ekW6N5`{H z=Yw>9NQX!7=lZrB={|HH&O7i>&VlekIVp2(kbaDE)pv!8xb%sIKv#8bB}J<&Sp!{O9N=Yy#y zKMnb!A7z|N6G5e=FeKlX5Kj*7MKA8H^etrDVT*!w*I@FKz z(ebR~aPq59xSFpHbB=XW=Z8~0^_$M)w@#w@^vR#TiKo7rYdw8XU-InM$OWmBr$(K3~a}KV4iszg9q^l>I2i0?q`h@z_C%)3H%a^Eb9mtoQ zo;dkV`@Zty{4n*&pT6wZH|grr9CiITKMwhh;?X`nIIW-c={oq-ahUVdca-xTMe9R- zIHV_@hWj>s=D_Sr&Sy??ewcL}nlmAvd3Qg^IZ3zC z)Q>~{qo~h3eW~MFzuK=~KVM?@t?D_~d~?)sXl`=)DfS!ZPt*tbSD15feYm-h4zFT= zkM>XV%*}eP!w31TkE`p)A)h{&dh+Z~ZcggS`OLrC_jBdb2h(pJ9nztG9P%GUedg&) z9nbnzoFl%(>|51yZeE|dxzOC?baRsPC-Oo4E6h2#KHOYLhgY$`Q-9wFK6>|0_o4f6 z-hrNjo`ar)-UrQr=0J0xInW$v4m1ZY&jH_;`hL~-r}X^(6i?)b>NuoZ4_a3p>Vtgx zpgN?()F)1dQ-A8|&pHnI;pw#9&#U@TpSkA2t9>1FAYC6_A01bxC-T8nJ$)0WU&Vg8 z9>@pj@N?16m&gy*^(RhsI@Fh(p4jKzc7Jpqx)0|a=sD;)=sDUzEGzVV&9GKoG zpHAEP_kQ_m=jrED$00xDGxsR!=TB}usGjSkj+>jPZmxQAIzObt)F*DfdUErw;&n;C zI=?!ec#79u-5mYNvoATndFrW~&zE(5iTwK2adYTUU!p!foG-cg>iYT|-EyS+(0w@X zK+i$XLC?X%yASet=J}O+KHt=#`h@1H>x1;f>{HjL4(W;d`0&Z64zn-0IrQXLr|teX zPaTJJn0@KPPcffv4$cSZE6h17Za(Cn(E9v1)Q4Z~UoStd4(WV!Xf7YFt`Dl?e2@<5 z6PgG4aMuAx)0|a=sD;)=sDUzEGzXdk&4K1XbD%lU9B2+S2bu%Tf#yJS zpgC|f2d+-r{gR)*C--?gR6p9!4^R8L&|I7kPV3P1r$2RdI$Y`MP~U0)I&k%?`1<%@ z`t_;f=J2aSb$-5y>*Ld(diJZsteZ!N=Hrk*QC%OT!|a=Q`f+&LUnkrg{HCZs=jI%B zewaS`(fhf+NALdWK6D?>JMd7>!PRMdJy!R<`#n)EO#O5|>zSiY$046OZXTR;^~Ch! zSNndz>`Q*y{rc6D)6>s4A;10=^23~awO_9}e7L$koG*2KP(A&0K6M=OUBz|MA%Eg& z=sT*Lmwx(mT~nP7^ZM~qT=#07oTuMBb*P@0esz7257HA)``2-`BR@U;$@%ob$;WR! zTpyhJQ^%*itWUmEeEpM;4zGrDpl)tn$HY_D53_C^dg7{XE+3@BtJt5_I_B`UT`PK16b#v7xKCPqAyyWy1=DPe>@%8bkC#u8K(66q4@~L0# zbNJ}yKsq0Ng)6^0oaR}7LVfC2`_~1t&SzbmFHs#{#r{uo_@TMDKKcsvO}?Xb>%yb+ zR`bnCpLO8XzJGjBKOa=b`OKk1^%YKYvyU(JZvE^OH7~ZNIm$@7RPhH={ z=`j6tIMwNp|0=HU6kMGjrVmg5v>rb+mw%Jv}h+zF5R`C!g-ope4(Pai#fc=~a5{i)-SPanLB^Twx-531w( zlk-9I;N(}=_fS4}?sYtR_fPks`*7ZYo`ar)o`c>8&4K1XbD%lU9B2+S2bu%Tf#yJS zpgGVSXb!xLIq>+jy>8Iw+dh}3PkhqNf$5{0pO|(1^a=UQSvmp)uwUvfUkheLgk51)`Pk$=*ytIkiy)lDf2&NjC?kkN!Av0?)dB`hhwj692YL>A4tfrH zA2bJ=1I>ZvKy#ou&>Xlt2lD$?-+%i4G&z6P>HK*5vX1j*9}f8vbI!!+=B;?Hqi==g zCYqNzo^$Ci>&eYeoP6d^{;cEX;V^Y`)gd2DU7vdL^sl&n$Uh+;e0D&?(*kd@1x#Fy^neyH3yml&4HIE2lD+g?)|a%&vZQN6W0gzuaMtds84<3 z^z6H-Zk-jH%ZEccJe{_jz~SUiJ^R%sG+!U2>r0-xK02O#IOKX zVb+sRzSNVOLqDCi`!02LIv*X<6ZK7~UmZ6GSEo;$u8$vAhp8v$gX$CVore8XhkVn# zsjgq&K z$ES{~(^r^%_^Qu3{7@gxx5DhxPuGvD)8SO7L;X-ceiZd3S|@Sx<$BqNr_a2j_j7%H z{`dL+@Sgwf<3#s69Y3A6b3AdnK1kOOv(86P)Cc(?9nxXy_~ZTSg8UOt{S^D64$Xsf zc#3{p9cKTE=bYsF6Zz=YPkrK(-#o|%CtW>}Kk*c=r#?PBQ5|Oew9idUzxvdlx_*6l zqQ2B|b-0?Rk53e)B> zR=Rn7=BA!J{WxEuxiI~>dZK#HrSs!_$*<^7^a*p$X~;qS z>M;BHARo@3oSv9I^Z4*YeUq*Z%}cIdJ&`{#eL06eeYiT*kMkv}Lw$TWXWXnAD_@1^@*D|VfOKvi^KG% zPaW#xgOjcf(}yRX7SCJr`1I3pNQd<7r}JeUhm+5oM1E)vq(l8UA3ZreG5ht=`Ef{x z`c{0^pY@!pp2(MaUQgDOo0EO`*hgqeTheX`f{HB#L2H8>YKPZ>7(mQPKWv-KhBqWa=sPn<4b-z zcz*Tq<2g6$e3P!8bLjm!%%gYzbRW78=N;%d=sD;)=zY)}Xbv<7ngh*&=0J0xInW$v z4m1av1I>ZvKy%>sIpA|>=yPk_=h73V4~P1aTPN$OPxYL~m;K54pt^bZROeHNbp5z` zV%Bl}{HfzpJ#~Jlo;cM{r!8O7kL!b}PyKYLADW|%C-P6opLIG^pZF>Efj?0{Kcw@g zt`7Mo)UU2@!t~)#UvleYJ@u)c^Z2qqIUiIv51;CM>X5DLpp!z>W~li{pj|*=st8G&O6X^ z&~wmp(EFe{&>UzEGzXdk&4K2?nompx8?fa>9pkop4X8&zUniFA3ut#KJ`TNCtZEQ>^trI!>_*LE1&+v)9`wvZay@J zZ^g4Oxqf<|quYJbeds=%ccAB>=b-1{;oS$Gf1X?U9Gmp?;fea;>9pMke98F|)2AQu z zUFIb6)A0$-<%j&q=__3I_4)be-9O!j?!$QpdJcLHdJcLYGzXdk&4K1XbD%lU9B2+S z2bu%Tf#yJSpgGVSXbv3Bfz{{p>Zj9oU*XW__mI9qemME3I(7ys|O~|jF{1p4Io~S>OkB-Be&QE{N#i4a3zS5_9&OLfR z*SFx1ga zS9A0w>Z7lC`ljok=e+FCdUE~=(~n<@xn9o8dUAd`J|SNsKOMgm&707Cb^Ns7uXy6B zp8lMlb@L`4oe$D;Uh?#3AI_(L($y!wK0e3~=}=$dsy_L%?-V(leYie2>FVY}e#kfB zDw z#OXTHho6SmL)|>cpY_zUj`NuZ)gd1aC%-!6gQ>6hZTqIV^u$x_f7bIl)b*{n{t5N3 zP(R;_^Fe+{PgIBM_-WrSI#kCKPeUJH_T&0+{!7u^X^wiXJL%@|!8Ea7c$obv~#+@ict>*_S?bI-Z#8sne5B^Y|09KkF0M2h(@j_eCA1Z{j!g zn{(T~>^~j6e(S-UgRfA(b@^9ZU!wlhlk-i;Kh;me`RC7mJdr<$xm?}vyQ`Qjyg;q{S>b&ALN58T^;I6REO%P{dyohQ6FC-KOYWLPtJEK z=G^3`_NE`P{kJ@#x(@-G}bOc?Wt9 zdJcLHdLJ|gngh*&=0J0xInW$v4m1av1I>ZvKy#ou@G|DW=T6(}_URpQ=yU3hr=bx@8`}v^$tN8l$sY5;-@*hQg({-4mp6k;2VEX8Z={pVQ#dYe( zAw5z3D(2Fmb#Zg(cw*LZbD%yvajKiAo@mabXC2SE^qh0G?_1XOC#KIlzO3UbG$%3p zR(1aAda|Dn>c8rJ`sn;8_sjm!aY#>8zuNazoeocX9p|ItiCM?Zf%@>oscxQnqB)bE zb==&(4?Lf?em~H2&~woHpgGVSXbv<7ngh*&=0J0xIqQJJ55` zbI^0p`=B|{9B2+a+#K*e7*6k_)$@HcKRr=h|J7-`AL#t*sb?MM%f6hiPS>Ak9z8i7 zs+&(giu#~F99rjU-#0ox%sQ@r)yHQZq~|>P&toSlb^1iA6KU*W_{x3=}#Y> z?`n8m)cgF~?)UCP_u;$)JqJApJqHi(KJc9L+=6uId53!*CQd$c6U~D;k3Qk)wB1+g zcwWbf>oW&V*JB>k5A~_z=EC&jQ=d7IA2%1OLq5otsGfc4!}%foYIuG4;N(|_e5ZXb zr1Qb7^Fcny59$1H(zAZGU+3hf!|ccV{M_#I?nC$CyaPQ4JqJApy$_lL&4K1XbD%lU z9B2+S2bu%Tf#yJSpgGVSXbv<7j^=>R*?kTVPp9oZQioT&4*3(+r#a?j-%3}X=F-jO zhw82`b@lAW6ZONXo^$#1Wt~1@_T!2A&4+yI$@$ci^Iz@f0q2Kw{kS@PLi72vpPrcW z)KkZ^zT&PU`{{Yz=0Sb@P(At4`?9 z$N3?hj}BLQ)^WI+bF_Zi_w6cPZ|fxT!>n6ZJ$d@ChS!Y_vky0)4`1PFUzg83I#h>z z_=J3L@?FK(e;TgKoUG&O*1;hiUhV5-KMwiynWv6VK04nDtpoXSXf6&BH6e=@at9 ztJr5coaWF^dp{1dABTMUAswckJbhWW-c_7CIy47|IWIY%e)ZJVQ^&K9esyqvbAHat zI?jjZdg`tR=cnJ4=N$deb(qf&v(BfVkB-Ci;ruZ5?9Y1kUG1+QR8KS~=TAO<^{Y7F z>gMsKj<4!`xlZ=sP+#IH&b7IG>QEi>K|Yx4pZawPOks7`yk!Abf|t)%y~GR=1;mlm_9mmy;u9!3+a>Zp?vP# z>v;6;pYB8V;k*Mq2R#Qp2fYuP1I>ZvKy%<#&4KOie)Q<~KIY#xyeZdxSA92KhyPW5 z9s6}{C$Rg_eK_wx&q2>Y&q42l=0J0xInW$v4m1av1I>ZvKy#ou&>UzEGzXdk&4K1X zbD%lU9B2+S2bu%Tf#yJSpgGVSXbv<7ngh*&=0J0xInW$v4m1av1I>Zvz{{Nj+tpwG z_g(ybl1u0HJiXlKsjpvOzw-{P&cU`z4<~nC^!2RvqxJ0-=st8G&O6X^&~wmp(EFe{ z&>UzEGzXdk&4K1XbD%lU9B2+S2bu%Tf#yJSpgGVSXb#Ldu$_*F^K-V|uf1P;zs~n- z?~mRey+3+?GzXdk&4K1XbD%lU9B2+S2bu%Tf#yJSpgGVSXbv<7ngh*&=0J0xInW$v z4m1av1I>ZvKy#ou&>UzEGzXdkcbNl^PuqT9idXxbo8n!s+xu&Ky}J+Hhw~2f9P}LY z9P~bD4m1av1I>ZvKy#ou&>UzEGzXdk&4K1XbD%lU9B2+S2bu%Tf#yJSpgGVSXbv<7 zngh*&=0J0xInW$v4m1av1I>ZvKy#ou&>UzEGzT7H4&3$MH#wcQb1-4=linu}@jmIf ze7@)Mrn$5Fdvuv6H=UpU)p}Xa^^^DfJm2%v{p^1BInnQ{ngh*&=0J0xInW$v4m1av z1I>ZvKy#ou&>UzEGzXdk&4K1XbKrH&f&97FO?m#DYsK^DUddnA*P&l`pF@2P^*PiW zXbv<7ngh*&=0J0xInW$v4m1av1I>ZvKy#ou&>UzEGzXdk&4K1XbD%lU9B2+S2bu%T zf#yJSpgGVSXbv<7ngb6l2Y&Bk+kFWioi+|XvUB)&=g{w!>WBKFeyE=x@859)*Z zpg#D}_P^J&owx2o_u;$)JqJApJqNuHngh*&=0J0xInW$v4m1av1I>ZvKy#ou&>UzE zGzXdk&4K1XbD%lU9B2+S2bu%Tf#yJSpgGVSXbv<7ngh*&=0J0xInW$v4m1av1I>Z+ z4)}W_(BBh*{+Vx{AKFA08V9({YzukxK!+8f@)pOwfhwgtUFCZV}gY*gc zptUzEGzXdk&4K1X zbD%lU9B2+S2bu%Tf#yJSpgC|CIk5VZvKy#ou&>UzEGzXdk&4K1XbD%lU9B2+S z2bu#fQx5ojtyc}NPTM(r)$p``T@M9cwR7X^dDYI1ujiqh8`sh2<(5<3hwj692YL>A z4tfrHA2bJ=1I>ZvKy#ou&>UzEGzXdk&4K1XbD%lU9B2;QRSx)h$g75a9`dT8pR2rT zcvt7W>-72B=WCy@&4K1XbD%lU9B2+S2bu%Tf#yJSpgGVSXbv<7ngh*&=0J0xInW$v z4*Wqm@T&dX)YtQ>{oK^o^Q!&a)YsFTYEJz@Io0#g^U?Fs_p{A`=0J0xIqZvKy#ou&>UzEGzXdk&4K1XbD%lU9B2+S2bu%Tf#yJS zpgGVSXbv<7ngh*&=0J0xIq;%$z~7I#6#adaOVQu|xD+q_dh}oV_2|F!>(TGJE=AXM zDPDRV`Y*i>{ioCR+_)57*QMyXF2zf~9{rboJ^C;Gdi1~OeCzzakLvrVzK?1SGzXdk z&4K1XbD%lU9B2+S2bu%Tf#yJSpgGVSXbv<7ngj380iX9@iazhZ6i=t^oL-8Tem(jx z{d)9Y`t|5{U6-Qkx)fd4rPzFKJ~yA=pU=H7dSCRu=zY-~Xbv<7ngh*&=0J0xInW$v z4m1av1I>ZvKy#ou&>UzEGzacJ2QK|Noc>FH4yXUppTp^QU6-Qkx)fd4rFiMrqyN&c zNB^Z?kN)O=^S}9j_xa!ZuJ>K(PJd*Q4KcU5c*jQgmIH;-z1Y{!70e{g-|{`ujek z?=$*7ZvKy#ou z&>UzEGzXdk&4K1XbD%lU9B2+S2bu%Tf#yJSpgGVSXbv<7ngh*&=D^FD1HbpN?Ya{m zoi;u3BfFlM{do4{*^lc>)R(9)G5hiC$Fm>Tm#8mMUt;#-*^g&Gt}jtvqQ1oJ$Fm>L zeq3LozC?YA*^g&Gp8dGKM16_+60;xAemwheeTn)K^(AIMp8a_CZE z$^oD2C;D7J(dYV!*^g&Gp8dGKM16_+60;xAemwheeTn)K^(AIMp8a_CPys@cvt7W>-72B=WCy@&4K1XbD%lU9B2+S2bu%Tf#yJSpgGVSXbv<7 zngh*&=0J0xInW$v4*WqmkU#gr^XFc8{@e@Km#8mMUt;#-*^g&Gt}jtvqQ1oJ$Fm>L zeq3LozC?YA*^g&Gp8dGKM16_+60;xAemwi}=2Ua)56Y>YkDia7kG`L64m1av1I>Y# zB?r78PV{~_(fi@V?8mbo&wgBAqP|3ZiP?{5Kc4-#zC?YA`VzAr&wf1naeayU67?l! zKc4+~_T%~z^(E>{%zixk@$AR-CF)Dmmze!{_T$-)>r2#^s4p@5@$AR5AJ>ZvKy#ou&>UzEGzXdk&4K1XbD%lU z9B2+S2bu%Tf#$%A&Vl^<8UFr7^(}w858a3J4)ng*&cXBEhZmi1oxiZvKy#ou&>UzE$btU+Ys-V~!~O1q^H}Wj zsn4fApPB>Bf#yJSpgGVSXbv<7ngh*&=0J0xInW$v4m1av1I>ZvKy#ou&>XnW9O%yt zxAWJ1xZizf4mJnxGY5PAdj5L;`hL7Q&>UzEGzXqn4)ptqEl;`+-G}oIJn#QLOV8EI zb*`RQu6EA$x_2MC59b}|Ip{g)Ip}@R9B2+S2bu%Tf#yJSpgGVSXbv<7ngh*&=0J0x zInW$v4m1av1I>ZvKy#ou&>UzEGzXdk&4K1XbD%lU9B2+S2bu%Tf#yJSpgGVSXbv<7 zngh*&=0J0xInW$v4m1av1I>ZvKy#ouAP2U);nAc2`rl(91gb;jiPK^FC!O!6XkMZ@ zD?NRea_dcV=qt=Q>Cbxd^yBHnVfs>6PvlFysZYPYiKm|atm82K$1u=B)JeUCOOD&7rR_=cGUD$J1^e0bW)^Rxbvp)G!$30hz+fH^Lx)0|a=sD;)=sDUzEGzXdk&4K1XbKv$l zu$`Xw{^|Qq_+I7MzpeVRp4=+*3DdXYIfsA3^e5Lx#~~fkVd}~Go;My{hYz|Ae1(&L z(sN$=lJnDH>dDimK4JRtME$8JU-{G%`<&Z$vis0|IPXBuLC-d9~G%Q-ll=F$`OO~^mh>64G1n0@N0 zcBePdzywRG%iVF5{I-~LaAY>IpA~n+v0Y&&HtR{W(QyDc-B{Z)u$iwLpq%FsXqB9-5faiCS4y~ z`LaIo)N{T%%zEwjYxhtId=rHHdp?Y$9V)~vJHwRi5Ps}==sE?kUeiU!pKlRb! zrPq;jaAh)Q%}r!>h$FF36J_GA01i;pK#?-Y-wLw-Cl>(7gulW6_a@tf9<`qPI`nAej&b*PTR)RXfi z@+GEk;`*{a>67oMKl_uL1J!XzhxEi%J^i@8MD@g#Z>mpv&d2%j#H{0xf5rJG)PEGs z;h*?Q&$@Yud=v6#J#{=W`;zmgj^DIC`BP6mt;d(hzryTGp8Z+JkNWtbJ{+e0P(F9= zb!@Lt_o4f6-hrNjo`ar)-UrQr=0J0xInW$v4m1av1I>ZvKy#ou&>UzEyo@=py=w3O zyW>#4)3 zPEQ{W(?^Hu6Q&P`*+)N$tNvBpI!Aqzf7PFLJm=ww{IKr>w;k#}bRW(;&~wmp&~woH zpgGVSXbv<7ngh*&=D_7S;QLLuovQc$^JsnL%gOvueZ`OZra7tKbYAvPoSt}ez4Yar z+j4Ug&7)8JsBR9_2k9_%JduCG^yBHD^z`BB$01+h(K=sZpLg4ib|1PA=N;%d=sD;) z=zY)}Xbv<7Ui}={PWgMkUzG2Uvp(_Ea}J(99P%ZqC-S9^t0$@_PQKJ}{YUxaOWjA4tfqA z-hGhIF?>6%?|*+thy0h~P3Q4XnAgL1R8Jpn4x~@W2h+FG)f11-nSAD{e=`_O$j??BH%&q2>Y?}O$*bD%lU9B2+S z2bu%Tf#yJSpgGVSXbv<7ngh*&qdDMncDUU=@BgPdJMd7>f%|#F?bUkk_fC#-KDe5nJ{;;#PKQ%H^-FoKchhdCr*8In0FL9dqB*xk>rDMw$Klbw z$wxOYQT?W9j=tnab$yB1ufEdNVfN8s)^T{LPk*BM6Q)1;)JI?W)%hWP!lS;U{YU3b zzSQv*=JoNXKJm%F(z8DC+v@rfS6>G|AHKr$<1qW_FzXXfKMwWbke)cz=@Xi>s`J6y z`t?7Q&z*Z6+bhz2=suixpy#0Hpy#0XL35xv&>UzEGzXdk&4K1XbD%lU9B2+S2bu#f zV-EPddVBTW`)BIO-*ayNrk=<*>CdZfPGYWml$(=&dgA1x^G`f=ewg*-d{8}6Ju!XB z`RPYFAJh+1#~~l2C#u7&C+CCex8?faG>;CC`qOvQ{`8v%r#c<-Lpr2G`U?4Pisoh? zJ#q5U`6r$_Kg@b^KB%6ko|wMm{Pd%o59)`h8&4K1XbD%lU9B2+S2QJTn?G(NLzxy!hzJG;$Fm-%|Q$HP= zGx6u8PwPy&byui=!khN<=R7)8$07YFW?%AY4&O~N=jo^8kPhi9OOQI&O6X^&~wmp(EFe{&>UzEy!tt?o$~pg-Y3I!rjA2?9MWOx_=MRv`6gXI zBA?~ubw=0 zb-1eYCF)CD`P8!ySBI%TZ=UN+^XZA%kHhq@xV}C=x1H!dbRW(;&~wmp&~woHpgGVS zXbv<7ngh*&=0J0xInW$v4m1av1I>ZvKy%<|4s3VJ`~T_ld6ndo&IkEm>Jz8K?7yj= z{VSgH`4jn|Iu2KQ)^V6~COv)0^}(rrROd@HKXLM{^qezsb5`~A;ZXm?SNc>xI#)kD zdOz2ANk@KJzE!Pn`Ude$#$_^ODmiOn-8HspAvsgM2ug zboIo^pE^FFx$5|Yd@G##kLu=6{#AX|ryr)D4%P7$rXNpS%~7BF=zNLlFzfiL&Ii-K z;`$)}3i%;F4pYA^*9WKhsq;^me*C6rzP^bc)y;uN`&PbH-Fi30oU1?e6+haSK66*7 z4^IB9<1qVD$8U<}>RWNXo1(di=Fo9C=~?GXOy9)m(46G-L_T=5&Yw8-r;bnkD_uXl zsXzOUaz1EX98P-H`Jj5@i>jLoZ@MnOemLn{gZtD7>`a}8Lx!1A1BHf4X!+8gK4tfrH4tgIn2bu%Tf#yJSpgGVSXbv<7ngh*& z=0J0xIq)*(fX|=d_A0J_znBl|zb&rjWF1e;^`|CuSXo{P?A49-QVKt@FX0M~ACAA6)s>(~raS(P7qcn7-82Z;GoqS;rG|-KkFJ z#}n1zRHySn`hwGZh(c!Aj2UmXe^y4sn zeIK~(NcW-paNdEQgPwz)gWdev6&m(EF_eEbvMwlC+Iv(ous z`thkwPdwT;`OH&KR8Qoi-<0cv`XL?ORzKP|`K&)-_9fSMQ(Yg-{ys;yo#{SwAI>|_ zbI^0pbMWx)gY8tl_w$H+j!m4tLi1tz=`icb`QTKi^FcbCbalv=IMwNi`cqFn`RIwO z{`4o;PftGSd@y~fAFZbkhdCEdOusrkG3$7uKKjJzkbi~zu+PtJC%O;ahw~2f9P}LY z9P~bD4m1av1I>ZvKy#ou&>UzEGzXdk&4K1XbD%lU95|W-K9Aq-n)m*hIu74+ZvU2g za(;T^P3!#7yySGK{=Cp!c-y)9;n6w#aMhQ-_*3M~CV#^@-Du z;xvDytHWIH=>1&Zwu9Y=?!$Qp9?Ch$`+0kH-v7_^CD(COPhawB9$(gR$OluO_@tYY z$bS^|r+?D|gFuW;(8>tAuc#Ho*-nEeyir;fwaahSf;vyLaO`m&Bg^H!V> zrayIcsGgX0JW(GVhxEj(<0~{Lee|QKKmGI->f=kk(r;Ry=I5OB;feZIsPCvR{Wx6B zQRh$0dUF2MahSf;)uB41!_+5EhpYaTPydA3&qtq-?RHDjKTLhavyUHU9fy1{_2esG`f)h*r_Mhi ze`5O8={Tgr)F(da=D?%A$)CD;6Y@j8t|o;c%nXf z^3>H=s1K$;b$mkoSx+5@{1c}iMe`E%PkPqzME!Ierk=b-1H_d#=@InW$v4m1Z|jU4#MY5O`JDn8z?@1fw;em!@E5A}6%UHAREyy(}V zIk)9Q_o4f6-hrNjo`ar)-UrQr=0J0xInW$v4m1av1I>ZvKy#ou&>UzEGzXdk&4K1X zbD%lU9B2+S2bu%Tf#yJSpgGVSXbv<7ngh*&=0J0xInW$v4qTE0eh>Y=@J0Xr`=!6% zemZUEYr@`tz5gz`|9XCUetLfTKCU^?9B2+S2bu%Tf#$&NbKt(dk3G8Xi~fFhHUHA@ zhgbd6_sK`~=3n!#`FH#L>;2RFr}t0qpXNYwpgGVSXbv<7ngh*&=0J0xInW$v4%|%+ z9Q{1_zI?0CiC=Vm^*QpT^`oCRPrggZ)h~Vh+kZDX z+kNgn_kC^O*ER>51I>ZvKy#ou&>UzEGzXdk&4K1XbD%lU9B2+S2bu%Tf#yJSpgGVS zXbv<7ngh*&=0J1cesW-2^}T=o-p96ohwoLM)nC{8UGK+Tujh4zulxGVrR@ZCAG#0c z9q2jeIp{g)eb5|e4m1av1I>ZvKy#ou&>UzEGzXdk&4K1XbD%lU9B2+S2bu%Tf#yJS zpgGVSXbv<7ngh*&=0J0xInW$v4m1av1I>Zvz$H1b?cRI;^n2^@y~_4)bojd0d;e`G zp!?8$IPXBuLC-UzEGzac72e#dQ@1MR;h3{3if1|_JpWOlb0-vou zdPIkp)|(gG4t5{959b}|Ip{g)Ip}@R9B2+S2bu%Tf#yJSpgGVSXbv<7ngh*&hnWN0 zRlWC5pO3=#D%-!&Ve8NCfPI0_Rv+Hy&@cM>I{(=na4z7p)tjG>=IXYC-G}bOc?Wt9 zdJcLHdLJ|gngh*&=0J0xInW$v4m1av1I>ZvKy#ou&>UzEGzXdk&4K1XbD%lU9B2+S z2bu%Tf#yKwz;;)>_j@_z+YaRKFO@qPuh;(M{ZhWY{@sV}!+8gK4tfrH4tgIn2bu%T zf#yJSpgGVSXbv<7ngh*&=0J0xInW$v4m1av1I>ZvKy#ou&>UzEGzXdk&4K1XbD%lU z9B2+S2bu%Tf#yJSpgC}p9N2cNe}Cb&e?Ou3Tkp4<+;2TEJuf{keV^7GXbv<7ngh*& z=0J1c`Q^ZNn)?0hZNHzr@BM2YHV>PJ&o2*qKlOg<{nY!ZInW$v4m1av1I>ZvKy#ou z&>UzEGzXdkFD3`Jsr?-Kb^RQ=um83Kuj}jITzxUQ+I{W5o_C<ZvKy#ou&>UzEGzXdk&4K1XbD%lU9Jv1+@OwcM zK00mZbi$A9^$8#E^$D-``h?SU(WmR8PuE4Cu8W@6W!(v_JE3(av@V?1p-=13r*-Jo znegg?zdrV1Li;eGeVDM%sYmbr={|HH&O6X^&~wmp(EFe{&>UzEGzXdk&4K1XbD%lU z9B2+S2bu%Tf#yJSpgGVSXbv<7ngh*&=0J0xInW$v4m1av1I>ZvKy#ou&>UzEGzXdk z&4EjDVEVmp`t*C>^y&A$>5os_>ocKsCbZ6k)|qgcOP}V_`6lfB*Zc31`>*Gx=cnhV z@8g;S&4K1XbD%lU9B2;QJ_n}n)9Ajhn{fI*lK%L#ozDrc_WFdbW5VgW=&obJ>AL9C zx^(MIIIT-JXToVNz4^DDukJ(l;k*Mq2R#Qp2fYuP1I>ZvKy#ou&>UzEGzXdk&4K1X zbD%l!Fmqt~{FCnU&Ix^6hMD)4Fu)OgMdA^v8Yu-@O6sN1}b6aN2kJrLTYcw|we8bRW(;&~wmp&~woHpgGVS zXbv<7ngh*&=0J0xInW$v4m1av1I>ZvKy#ou&>UzEGzXdk&4K1XbD%lU9B2+S2bu%T zf!8Sq{N9@1NAvq_e($gK?SAO~Z2QpjvF*dFc7M3fi`(n_s_aAG&up(}_o4f6-hrNj zo`ar)-UrQr=0J0xInW$v4m1av1I>YFbHL{x{oHfgmwxWK?ZaJt?)jqcN9Vh^y{@?r z-T!aBFS$=IiofXn@BHoc?LKrL&O6X^&~wmp(EFe{@N5pesP{Lo+T4Dhyq%kVpS60C=MTDndOmtS zdOrF-qdCwVXbv<7ngh*&=0J0xInW$v4m1av1I>ZvKy%=A$pN3A_vimx?%nmzzhC$L ze_iggew}?^(f1X7U(pZvKy#ou&>UzEGzXdk&4K1XbD%lU9B2+S2bu%T zf#yJSpgGVSXbv<7ngh*&=0J0xInW$v4m1av1I>ZvKy#ou&>UzEGzXdk&4K1XbD%lU z9B2+S2bu%Tf#yJSpgGVSXbv<7Uj7{L--&?!dmYLB_bVnn|Gy&XOWt`~e%OcCb^rPt z>2u^hpCfV!X8v8O_k8z!-{<-6`Rn<62ZvKy#ou&>UzEGzXdk&4K1XbD%lU9B2+S2bu%Tf#yJSpgGVSXbv<7ngh*&=0J0x zInW$v4m1av1I>ZvKy#ou&>UzEGzXdk&4K1XbD%lU9B2+S2bu%Tf#yJSpgGVSXbv<7 zngh*&=0J0xInW$v4m1av1I>ZvKy#ou&>UzEGzXdk&4K1XbD%lU9B2+S2fkGf`2VQF z{Qs%q{{Qmnqdz`v`>PK1C)YRWowx1htG<8DrR{a@K6D?>JJ55`bI^0}>fHz4=fHff zU#d4Znj6iHS1&huU-!Q5eck)IIdC^Q;C&eE`?R~+_nzZvKy#ou z&>UzEGzXdk&4K1XbD%lU9B2+S2bu%Tf#yJSpgGVSXbv<7ngh*&=0J0xInW$v4m1av z1I>ZvKy#ou&>UzEGzXdk&4K1XbD%lU9B2+S2bu%Tf#yJSpgGVSXbv<7ngh*&=0J0x zInW$v4m1av1I>ZvKy#ou&>UzEGzXdk&4K1XbD%lU9B2+S2bu%Tf#yJSpgGVSXbv<7 zj^%*=-=qJpBlO?b$NhKr`5_+;`4jn%&gC~Jd9Igred)74fAXo1Umco1)%hTQ;;LW$ z@o9T~%}eB?TZfLTuTWp2InW$FIu0iv9r8o-9`9d2& zlk-20ubYpqpC9t!kUx?C=v;ntlIMC^*Oxx)^CzGB_|>8LQ=Jd;C$9R{ANM|eH>g(o zV85aL!&k@;`K>#>e*BP6e{w$c|_`=RHc=b-mNbD%lU9B2+S z2bu%Tfy;B?@o76pzCZPSXmY;k`&D)Qke+?&^a<04!|bC&b==pLIu0N2`;vWlqPf|J zo2w6>>U4f+or#;vhx6l*o~RDZgLJ6>@xE_#Nav&D=BAI%pPX(E&Ijp}FYD=3hvxF( z>hN)#Bl?8=S?}|1%c<_e|IglcfLB#)-=~ofQbEvmdPD~q>pXt1g^%3(rVr`jD%kqeIex1&InvbdHW~(1bULxlM@-A>SBF_1E%OtzoYh%gr}JrEa5-mSY3YJA8k~A-e*)g zdDhSTw5uzh*VC?Pe%2Rtola*y@~nrJJo9P0y7Z`W$kWCWqPMzy%`-nO^YcDvbvpBr zXFgh{(`sI0mP5FPNX{#M&}lU&nl!XMUz@%>1#0_z^MlYs++&L(F{SY01+ve^h-@*Xiq{ z@@bx!^+uJ$d`u_SHY#27F|?=ViKDEpFA7Ipzq)#Lc{)GaN$1lzs($8UxwPbY9eHBb zOP*GzGas!kS7YX5Ike12tMieM;XP{0a-uA+XF73IJ(_2Iyq-2HA9-F!%iSt8^YMD}%uk+{`E_~}W_i4hUx41Pn*a%q{auhaRN zPRr|=kG#g3*Vi#WE%R%uuVea}@=@iJk18jo^pp8mKd)aMUR^$3&+>JC&95$<>3l4QyuOa<+A<%niz;8|(^!|wax||o%c0fj#JoS|iz=V#yiQ}5 z!}Q;ad7ZYp9^N0VPG`Ne%ty@fc^xfzosXE73rm4N`IJ-_rZMRnV;$8iFJOa(?+F}XF1GI%X*2KpFFX?o;iU?U zd`$T#uV=c>Ppq$Fep=>Zd9>u2PxEV<9(5hBXS%jJo%r`!mK#&;$LFGDxy(;YOP-e3 z(UM-!>Rx!N*aTh_z;QT1tFV_gn0>m!b` zEH6W~XH@>x@vJw>M%6=}mi4R-vmTwF*U{>9VqPCp&rd!oAJesEeYCtj%IfQ*Fw3LW z<;B$f>vUq4&-}!+Iv?wcvidq+&+>_RJ+Zbdhxs(t`H7h?%JM!~E-mv%m80|N{Jd^; zmgO*?#^j^QkE!>nuP3IBDle*hUPsI8qwt?)-P!LWBlXYQ!P_C(0`3Ry2kr;n4_pUa z2V4hS2V4hS2V4hS2V4hS2V4hS2V4hS2mXpWpx2?fj!mo6xqhv!&d2Mc@-dy)5$pV# z*ZHH;V+rxcsQmgmV&6~#JryQXmvXC zX{+<``lx(N=XJz7zvgxRsPx!s+Dl(Y%<^g1^nP?cUau|lk&k&l%}D-ZyJ*XHisE%T z+mk#kujhLvW_lE_^AT&ya${^0!~*#hnd z?g#D%-Va;{TnAhSTnAhS{yQCrCFFX}Q}ujQW9HX9=dUb>R_D_^%hS9rkC>MEY01;B zj^};pe5=Dz<@35VwJay5+Mk%|yia1C&ia^6tS!@Nb-KpP$NF_TdFIpU9ZF|EwxNHLo$tb)6HP@^H?$-cHMPcUrE?(=uNajwPi3>3q!3a<$d@iFH0=rpHhp z%h#6qHP7p{Wq$I%_d4e1^|6TdU_G?V&+AubnLp}!ov!mSo%IkiANiPSU#8RYI$HAD zvb-ps>8rD>N8cBDT3rt7)jadlGChXp*4Hr~E%OuW{KPDe`Du9_EqPu?yr%ux^JS#| zc{_MJBwOIm;s?5avtP5HGd;>OzfLEwZB#zZv;J5@{DqkLbh*qI#p`rk4$G%yI&dfTrn5X^+Nk_Go%uAcG3#Z1V&>D9>Duyo=GQo;+L7txiCHc&%OkI`&ZjZ+ zYo3@^=ZmR+9+gk?EQj~YbZw*3S9d+rqwX)N9@fX}bw1*#bn>j1`DvL>OdG{V-H*<% z^XdADbv})GKeSBOnDy&)=F^t-ljrpslV^Tn<|9w6(>1T}Pv>Vkd197B%yjY^Gau7e zXPIA@!|OC3LqB7>zMlD`c&77uVr_N0#=keezMl6NTg~(EI$B;wOdEASOxJdG`I)}D z>vTTeA9-zA9{Jykb@{wrTb3VF;|O_{qb>8ZJf;)#dfKRTU5?Jr>&Qpt|Go7xKP~H_ zCC_@vN7b+M@p@WbPt5$ZESLFcnV;#*Po9|hiJ4BGHj3wUw9HS->$GKlZS{2;Grum6 znE9EHR;Lrw#v=Tf=`5Ex%CbJ@C#EIO>sfCUCQr+9XjjMU@^yYuAZ-GCwVO+9+O^7lmW$z3K9Jowm%Ud6vg?ZFPP=hqlb8%hNos zBd_h6*3Wz_Pg|DDe8jZOPfQz?pXpK8F`btASYAxMH|Eop`SkV7N1m3vzK*=MydPR! z4)beX=VyJ)&vb2>uB|Sg>AaqJb(Z;~%Hw@TrIXi|_5NAbo&7#CQvbXiyd9D);C|qK z;C|r!z;(cNz;(cNz;(cNz;(cN;P0*jy%N&T*Ol#q`qz~`1NE;fdj;xWS9V>!ch1NQ^>1Mdf}1Fi$E1Fi$E1Fi$E1Fi$E z1Fi$Lb%5)0>&kL{Ze3Zf&#f!V^|^Ir`zEA6tSjq!^k6CYDJpa0~z1;s&9nk$R)eIT`Yd`dH zSlZp&!P_C(0`3Ry2kr;n4_pUa2V4hS2V4hS2V4hS2V4hS2V4hS2V4hS2V4hS2V4hS z2V4hS2V4hS2mTu!@cn=BAK(9%cJTdw*OUK7PkcOb|8f8ExYTvPb-;DNb-;DNb-;DN zb-;DNb-;DNb-;DNb-;DNb-;DNb-;DNb-;DNb-;DNb-;DNb-;DNb-;DNb-;DNb-;DN zb-;DNb-;DNb-;DNb-;DNb-;DNb-;DNb-;DNb>OeA1Brz6<2ALv_dCr#D1S}u@BRMq z3Y5R5Ht`R8-oLtk`u$7)^mg!eNVb6cf%}2`!Mf`QtBZeD7w4=lzFA!yv##rR{c-(q z{aJVVaBx3$x5?04F-|7gp8pe_AbTYpb$ z=RJ8ldOIdt!2RH_=?7Y0iM75GYkeiw`bwNgNW18A^>uxb@1K_ckM1}BwD$bH&-+KW zznA~l)J1=9{@#);;C|qK;C|r!;E&dU-y665)AIk(?eFEg&b!XL&btn{4!91u4!92d zwRPa179aoK=XL-7Yx}pqKYxE7r+A#=I^a6sI^a6sI^a6sI^a6sI^a6sI^a6sI^a6s zI^a6sI^a6sI^a6sI^a6sI^a6sI^a6sI^a6sI^a6sI^a6sI^a6sI^a6sI^a6sI^a6s zI^a6sI^a6sI^a6sI^a6sI^a6sI^a6sI^a6sI^a6sI^a6sI^a6sI^a6sI^a6sI^a6s zI^a6sI^a6sI^a6sI^a6sI^a6sI^a6sI^a6sI^a6sI^a6sI^a6+PtXDWozt~t`y|9~ z)|TbpU0qxD3f$k?vTLi|SkBsNH(BEh_V?8NsqE6NRP5Hq(|APddWxCOFpVz z@=^7YXFXAt^+Z|L6J=RXl#QyFd{n*Uqv|CeRWEtg6J=RXlx00pmi0v0sCvmq)k{9A zUh+}(l4m_pmi0tg))Qq}Pn3ydAt9k}cqV;C|qK;QhdL zz;(cNz;(cNz;(cNz;(cNz;)oCqyt=kin3gPin3gPin3gPin3All8>sFd{n*Uqv|Ek zdZH}riL$IG%Cep)8&xm)sCvmq)k{9AUh=Fb%Cep)%X*?L>xr_ix30JUB)xTicYk+( z_j!ivfa`$kfa`$kz+XcLqUJ~Bqvl8Cqvl8Cqvl8CSx=N@JyDkRL|N7oWuxjPA5|~; zsCvmq)k~iBL|N7oWm!*@Wj#?gs$TL@^^%XOmwZ&clx00pmi0tg))QrYoce2w zQ~sX(J^8%C=M}C4t^=+Et^=+Et^=+Et^=+Et^=+Et^=+Et^=+Et^=+Et^=+Et^=+E zt^=+Ee|{Z^+V>?NweL$lYTuW9)V?oy))Qq}Pn2anQI_>Y*{FKSN7YL{s$TL@^^#{j zQI_>YS=JL}Sx=OWs+W9Jz2u|nB_CBUc^@ZyobYkt&p%FhKlOg<{nY!Z>wxQk>wxQk z>wxQk>wxQk>wxQk>wxQk>wxQk>wxQk>wxQk>wxQk>wxQk>wxQk>wxQk>wxQk>wxQk z>wxQk>wxQk>wxQk>wxQk>wxQk>wxQk>wxQk>wxQk>wxQk>wxQk>wxQk>wxQk>wxP3 zbwK|ff&P7gzk52{!L93}>!RzT>wxQk>wxQk>wxQk>wxQk>wxQk>wxQk>wxQk>wxQk z>wxQk>wxQk>wxQk>wxQk>wxQk>wxQk>wxRP|D^-^{LJsg?pN+t|Ce96U$|emUwGW% zI^a6sI^a6sI^a6sI^a6sI^a6sI^a6sI^a6sI^a6sI^a6sI^a6sI^a6sI^a6sI^a6s zI^a6sI`HSxf!}*BNay$W`R96{{yzMD_&Dq1tm}a5fa`$kfa`$kfa`$kfa`$kfa`$k zfa`$kfa`$kfa`$kfa`$kfa`$kfa`$kfa`$kfa`$kfa`$kfa`$kfa`$kfa`$kfa`$k zfa`$kfa`$kfa`$kfa`$kfa`$kz`D`_{ry=+Li!{Be!R9Zq(@mV&)YBA0`!|GKOw)a z{NC#oKmDh-LzG^vE&iY0zW)5)&yp?Re(=xsgSDlnQRTRQ{j>ec-@m{AWDB?-xF5J5 zct3C*a2;?Ra2;?Ra2;?Ra2;?Ra2;?Ra2;?Ra2;?Ra2;?Ra2;?Ra2;?Ra2;?Ra2;?R za2;?Ra2;?Ra2;?Ra2;?Ra2;?Ra2;?Ra2;?Ra2;?Ra2;?Ra2;?Ra2;?Ra2;?Ra2;?R za2;?Ra2;?Ra2;?Ra2;?Ra2;?Ra2;?Ra2;?Ra2;?Ra2;?Ra2;?Ra2;?Ra2;?Ra2;?R za2;?Ra2;?Ra2;?Ra2;?Ra2;?Ra2;?Ra2;?Ra2;?Ra2;?Ra2;?Ra2;?Ra2;?Ra2;?R za2;?Ra2;?Ra2=oyMExC9{@rzL`S;eNtd}SHqT7u)s@=%{)7p=I=+?(2AD4Vwavg9T z_&e%A)VRL3c<(pfZ~l(`#{Js;+Wp$&Le~M;f&YaLtgUz~svP%Y_v8PCAG@EqpShoT z+~hjoI^a6sI^a6sI^a6sI^a6sI^a6sI^a6sI^a6sI^a6sI^a6sI^a6sI^a6sI^a6s zI^a6sI^a6sI^a6sI^a6sI^a6sI^a6sI^a6sI^a6sI^a6sI^a6sI^a6sI^a6sI^a6s zI^a6sI^a6sI^a6sI^a6sI^a6sI^a6sI^a6+XVZa0Zy&g%!=ba{ubp+m9djHDxVU+x0;=?d`^7ZdgGov zW83-h4N?V7ji-vceM&r;d+Q(O#fSg0Z{^bu%||&i;+v)_sEl9VXu_~x_MI1hXVJr-nZU& zTG@>79Fl)-eA2I%o%GQ;3qt+d_n9`L&7^7ZWNDx8F)w~slbnAQZ!$MN`0<^pZ)Ivc z^_W{%;<@I=vr;!QFOG9-S+vxh~@sG|)KG|G6XJtI~n(EDnH%>Nt z<+OP9%~n6b+<32Kvt2zko@)NXr^k~|_vLxh@!n>K^7vfI(i=^f9v3@5e(kJ%-fMYn zCF+?GKXu0P=NF8c7T-OUVNU#+>Zf?^tf}ya%J`WnhnW`dU47N8eP+iOCtv@1)6l+^ z;eIYoz1Z2Iea=ta@KpFkWjy7Eugr?it@fJ3KbRJ0`uZtHoeF=N8(%o8XZ0JaXf@Z(wW71dAM^QfuuNdnM)2=yXRda!>G3Y9e>L}?YPOzyZ_`oU&!IiUk1OM;YTl`gUtaCi zcQu(E-?MA7hN+<)Qg8&{&)oR3WcT}YYVaT4r}V>l@UJE4Ppi<5Rq(&6_!rer)1%Ll zc0z9g_cRmlv1WW{oFU`ilu*9t*bM08 z^iZDY;?#I*xWAR{HzSM-9G@8fNdB4OJ~*yRJXHzZTY&qT7RG1sM~>Sy<2lE5j$0DX z%|O3e5aOz{Q#UXT_dg|`3X^`F9>#T!Gcul4#*@*?BlBj!A2JdW$E|?QtPK5J+C3wY z3U3~vzOw&kCS?3x8T5_)Nc?M6{IO)1lDjJCv)t#ZFdj+&TZQ{vg>kbgj7RcbYVx!E zR;j>+{bUuMXBF-{BO&p@in!#X-c=!fSqZJWlZ;DfhZQA>)a$5InvnU&0*v1a zP(Jm5^MHjxFQs1;CnSEa!g#Ln!pYMhY%vD;=+Q2jL!uL#+8y#k&t+{3iGXsg!u3D(0^pUG!44G67_I?F(b@Fq(A%=eF>H;{I}yFo(cc?}Y&_i|@>LdKm&h?5&9q+hm3h~BnLNc>-tko#{b^}SyLKW&L| ze$X|?p zSeTG?iDI-*A>L0B+N}_NU4-(B;7|1vGM_6-Nc>xzka=kl;`>7AWKlxmjrs|RCyLN6 zET<69S%mSe2<=f6{6ze?C?WA=efUMOwCj^a(yncap?k#%nJ*P4WS&|W%H{LR`a~h_ zs}TNIl#uwLFd^e5+o>qrx9C<8$}JA%ivJfSWZWpm`!7t${G=!$>qEuxucB~&qF04@ zE?!@hkogzewFvW}V(425$}56i7m4m)TLix?lJ-2HB2kfg;0oy%Ww3X)Lw~72JGM;7 zxLT1&t;Y;l5Bktj-b1fe33)#H<@$+K9Mry*_-VhEc;1!?(Tx^~)N;@HEfS)eEfTW+ zRDtoJT_P24oLqtDXbByyKsoDS9BB!^Y?VmGnP*lcQuFQoJIH$;)Kc_i`&OZx-;&Ez zcU4F`UDX2bw<3|!)7}-L!w0v8zf~k8K4=x5KQ(^s+8+I#&(#X!2k!UV^~C?LXazrL zh4yTT_Glq`_hAd@Plb$Ahg9G_w-G;kyCNZROiRhPeoJYu@oglY+p4Ab<@X#fDrDUF zshzar%PkWUm$gjD`d|gdp&0x(N7`*crqn;F4*E+bo|`(B3mwcxf6h)woRSUy$xcYT zSSKOtT)C*P4s;+3{+WgJ9BJofb*<#9w})&gDQiGT^sa zGLEj7nUM9pEQ}AC;l3sAsE={1An2OJ0maaVGQ=fixV{waUxxcCMf;UW9C~Ca-eZ}x z=XIqC@vl-mZyDZa8T_vS@|8&cc)C9BzkcH3_log z`uZN?XA|S%M^zi7+Tq6m_eg&{=31$5i(N$L##}GsR;+s>HJ>_tTX`R6Z7TinxBaA_ zJ$1h5#oZexQeo>&SITqlyiv*zMsIqT_{YR)kDV5R$b&h z9-okQTYo3v<8is4&Ns?)PrF*$anePparNG*_atOFbz|w52kjyK>Z|>v-@JH}_{+Xq zie7cuU&hzrof4^a?Ez;z;v@VNBP`d7+zJ+_y2uG3kbcR^>l&mT^a@gk$AJoktDiGEeyjDFEY?(4Nv zrCrbMD0i>Kv8F#cW5HDuJi7W;-?qfi*aj1iFdx5?mMK$-N6^$CF5E_XUTW>Uh>|)x?S4slN%*2ZP!cs*B+0-pAQlL ze)kq>&tGqs{(kyR5>H&$OUC2!1EjxPd}Hc9k01J2D&9J}@CnIx)Cq}H9GY>r#2t%o zl=rydfihk^ccbXh`!~vaIOHzrN6+0LI{NOdsrYdC&wZs|=iZe_?LXXjgS6i>y9j^X z9pX>9_ek8)aGzBD-8See{r;LA;XivzJ1yKp^tSI!5>NiLL&`7SKIT60n|+U!`#7(c zlr!Q{=`Xh*jW}fg)OAgdxkKXK!MjTTcw#^ClZSfBc=GaXVmsb0e4RTaeyF%F)$b}c z?wj(9%mH^wKhHZnRqwt1pAbFXc@NQzWe27D`R=#$6+hVZa=E_E3-bO}946%++FQnf zm3<_h-n_T8)2a8Rbm`&#eMKLxI9mGahdW67Kl+r6|7YGJ{+7F^#P=udBYyn&jiPJ6 z+$!Vu&UeWDmfj=pd9R(N-1Bad{&k()?@isFlzR8NPx@1}-bnnrljuOt-K73QFOza| z50HF2^b$XBdY8mSyWSx8(fC$L|L#V4e@`DTYpF} z`k1ua%kbx;?v-&qyO+dCmz^v3_2PjN_cXX!^zwjx#lQOY75}~eR_Whg-X?zY^a(Pr zsB??>U!S|BU+r{{^yAZRNcE4$)_+3sU3{zPM2lm@A77T|xWCEMG7oxrZ;4+TO=I-KYulb`Ospl+boNx z<^$iR{(kX~t5(h1MdGbYtb;J#uaI&6(oGPzWQu>~7KxwdSBPIt>Lj{7BSXfOIfc?L zlUs}abxDM{E47}qw1M=m%FU9`pQ>NhwHL=_Uu_}Q@%TIo;}ZWZz&ic{tOxyu_Q*hd zxE$-<+-F#T^`Y6gei7Cs7Gd9R0oLCZV4X1s^O#j(Uqj~cOR-L|0PFe-u|KvrW+T2_nY_PLJ4IP(w+zvpJu*Yw z&3IM(g8N>X$X6u&_wa1Q%h{s0jj}QS&BS~z6MmS5aYgCIp+8I$d@nU z;c@xN_gww2CXSZ-&Bna0o{U#-<%rJQmnrl8?rjlgHWEFW*a-8shKR4T;Ac6)&&tI6 zYlQu|GR&juC#oy||F&N$9-Yxl#;@l#K%CZ2;=d_fu>R0m`ty&ah;KSeI}UD#`9eL> zwSPAfJ-((X?tf#f_q3MxIkHni_ElQRJagAl(Z7>AW87~m@xb+kc-{t>@3xnApS`iv zU)V}?XuFMN{Ad)zeH2T(zSRZuzjo3;k1v(yxUgK_&zbAvKHH<8$8cX8V%?y1vfVN= zu4s_7|KIjYtrztxk@m`JD)YGFw(#Fgp;K)!Z*DE`Z(tYEg=0HNI~KH&IPcw!;csQ) zfA4lcKWZoI7q@ki^`tt$bK8h+kJ>`&+oyw!PfxX$>)zj7;`>oeWj&|cW_V6s-$~X7 z&TJ=fcA|~w{`1|@J{=L~Z7B60zJuIfzvj|UziNm1V|y6~>UKk1zaGk6f3@u=`yV5* zuKg|a<$LU#jf{(bPQrfbWa#_|?1N6iKEd}`w;qZ8oA01^mC(U2u}=CWbo48%Cx3|gb z&`_SNgAIf34wv!j(BareAA$1)BjR#yWiZ-jaJXL90f(YphK2i)^{AoYzWSslCd04~ z`8CcV)I68$^AAD055@ZUFsug;#Xia~j2|OHyUBXw5Ughp!8-1c&_1kJ_KAjI-Fg`I z*++!uVL5URX-K#~S)U(@a~#7$`LeD!9Q#Wn!gI;K;b4rLLqhp%cb3O`cz?2wVxLR) z9fk(KkahAQpX-3#JkH(K zlq35JqY!6I!an0DoI4qbe#rh^bDgeV{PvshT(X}xDlWt*>;p~+^-CNw3j2AZu#faD z?qhUlx1CcPz9VryXH0m08J}zJQ~cu_?C*@m{(DXRl7AxhF-M2@$NLn&`WE|*HTBCr z)JXLI8oyw>$$IWc=)gGm?P%AJ{bNo5c*luFJ;~I({MlR53;|r2>w15x;GVZ-4A&G6QP6SaUWB$e?J!cf#VRL zj)p#u4EibkX>8C7S$74Tw9O|ckPr`ecfcqYgb7$kx-s3TE7!Mtq81l>h z)C9y06N2uDfBzUyt;4qNiuvHd5@+UZDeFJ)^$6=tsde!^_Lg~am#q-TZ7%Wc;cX?Z z%U1wQOXuPSc7vJ6) z^OHSfeo(Kg%vT>hU*i57y2?DHyo>B3j@UfJZK?h0Yj7Wr_m=s{8wW|;ci*u_IEPgV+<~dXI^0s}X{v-Fs{HLR|-^||9?vEZU@8Q{m#KW&|CGTPA zmQr8OD`lO0@z$aJQaUrDlf3WKI?KFt%Ptas&uxcx+6wcdZqklNcae3VC%emfP1Dv` z$L=om71hMysrcrr9x^X4>@Mq_M|Y6>xVsb9`8uPYv=cu)G9l0R_!Zbs+f@AhkWQEn z?1gz)T>NnRuF{`&-(2cHY%kfT?yv>$o{3cWdH6ozxic`&lKu2|5Kp}Voq7Q}{UYMv z*P!z+KtGSsMN4!j!bmvd+@hWnHCikHLl$+?N=p?l9E&V4CZrpr0?=i|Jd<p?+RZe_*=otJm~D zUB32f*2nf^eXO7L$a$YPgPyP)`h~;^Z=wGW$9otP=4a9_!*H%|bcpxmoZv9TNmDTI z9*gspGcm7NhV$vO5V!q`czYhsF)c*9&c%Fe7UtVO!{6qG^M7*gXcpqGpF-Ry>uUT? z^zNd%twQ^2sI^w*Uxc?d9yBvO>!+di7 ziRap8Vx3@SxGy=M@>8f+&j0=#&P&Vu?l-JIWMSXFOva7N79x)PEiV0v=Od5tsMCgeMzg=lY{kI$0vuVw3yF3RgV#__A$;~c_5oQwYr{+FFTuAj6J?Y7Hx4}ZZX#Rc#gTQ#HZ)9#=a!? zNq9~;PvW5$TjRVQ{e$OL^W?ctXoG#6McBVs1wYP{_15#-Nj%!667!&+(Eh7%?m1K9 zntJ@sa8~%9Nc1%m`%#sczs$yd70+>I;#^fF=FhV*KU#@-W0u5mm6Nf)@iY9l3jUOd z_4P@l_l%o)!%2z{B}0tohp3KmmQu@&I$jF_#gx4P_p3P zQ^NOiqB|KVH%s>EUz>vU!r7QNufluE4$m*^oBV!n72ZP*<_A;Yzw`0@8Q90jf&WiI z|ClSHSawGewjMAI&P;>Zt8saiAMyS ziUa%Z8E|TyathWZ_BmYYzoED658kqe_(30>+Zl6~_+Qt}vF|z_dRU2mIuYw^)ctRu z+v6|}d;=XG58eG9aoi7anXgVk{5AzTlZQAV5BvXlICoqZG`9}@OeB#s-6 zb-*!UeOT%l8<+1!_^1q@2>THqU|s)x+{arOAKt)sc9XI0J{kK8uVY+#6Ycvp#<$l) zdD0GV;{M-5-2Eo58-(wMUqd{)81+pL@vy98S7N>SN4)PJLOd(qzki2)mmd))eH+#- z<@>D(Vf~!zi=s0%>zOjo7$4R#Wxg>1^Mji8Q5k2)BW@axICvb|w$?L&y6kTZz`FlH?8kh7`Am)f$h@*2#?g<_KK+B0akD?-uTSCE zA0V#$C|oD|EFU4BtSLvn1L%)+vYl6E)>X{P7XiO+F6oBl|x!{fSuiD{9(L=FK05c1Y>+uOFlTzmK@7 z=6?8md@hzpo>aKPTj87>&gD<&jCxw)`k3Gi5_0}#2__bL;J7U*+_TWIp~x7p(8M!a962*}v%84*M}JW&Sy+N2n*Y zfBIoe&L2+SM9QzhpWiL}$m(`DVi|L%x;TcN!=NPKl_&grxgeqN(+AE|Ss4K~2MxgFZG0`rJ&G9PLglXHMaAB*q7T7>Jx-(vxbAGg3f zuYE%LS9^RP*;M>xD|}~@0snXu=U2-%2<4>u)#45C-eU4xC+>!F8)1K~0{*puoDZ7W z66e0!2tFZ(eVI0rZ_^eMC(iDLICnji(;V}v=2FiCtt7td+Z5;UI$(XMM9v=_(FpU% zrn0Xw1m&I5Ui@(QdNRLxz9tS$#s5E*3cty@vL2INA@iG|O~Z4i<^{c*%YEkbLLA*M zjE}O8IvnfO--Pi&)(84yyc~hJYit--WSy=*#`BROo)EqJ2Wqouw;_|U!T&0dnyfz}7i(&id z=a+slE{w0T&Nv{P+mUtY@pvEahx%pxdN|f~$A)o{??>XunsXhD1B4$C&fUnkI|1t> z{jsk?8+g}`OU?`t)G~*#TXEO-n_ZYPA55d1>zA`ABXOVML--Y-u zHNX93P<8=c|7V@q)DLr-&E6!FiF%A+C_`kUtIcQhCm4VLw9JcQV$k#=tKp2i=hK zIbXoP24LOy%MkDI{mA#}{X;&9k49sDG$!bZthNr&&~39J>zoTr|d_56#SRvWTgJ}5B*ZU^L;O1 zj#s=NTFwjj9yovC{cyg<>p5@Xb+o)cmc#eY{47sf{hTb14$P1O2tea(|Zfa35Iw`!$RYHRZ`T_6GF&jj*1?eO&2( zZ-;XUERXxWELU{-&2T>~m;2E&?z|b!W$-%D-`4|AEIRc@cy5-%=jJ{9z2j`o4LeeBZpD^%1i?wjVL? zlhSr>aZq;uS2|6%`PdDKnzBc@X?BrX^j`ZfC{`xkYM{gnNk z{h9G2%cm|do$&?xAJeIuOlSFge&%O<&FACw)B(nqykFiw#~YT<{H%xVD&H#(!oKr+ zpr4AD>^hKkJwN*FVHZ1%K!S8@gLw{Qs$`yUDDPNwSzb896f9KSK$>ea}U(P|z z5Bof_J~}z<%ZR@&L_ePro?rZZ2IkL;<(&58oVQFyyG#W?2XR2n^UHi-8rCBgVE=7O z$R}~d46G;cci3v`m;O5w`+JMA{yG`^ag(vnF%9$N`Qe) zbFmLOBz*56>je|=T|ix&U;jF+BXONb#`!#)=N<-suc^OPYND8rb4(-9ZX?4!mHZA< zJ)FZDfp!}S|D1^D$dmQN9R`Q`MW4rE{+TQ1&EFe_ahml^!27DHU;JVu{B;6szO4Ul z#(q8ue#7q?a&bOqF#K*rn9s|5svGJTJs%O)spR}k-rBg-)#&>yVN|teCM4X z-k<352<%&qgKpNv_qfBLfBYVBJoY{6;r!W%uudg$!33PQDa3m3@bLYV%x@-zb6WEI zg@Z9~9f|deiO{n=oWma$-XHfv<#(*=;roOU==Y;=j`4e}_Y}zb><_~+-i(2t@Vrxg z*dLd1V+7_I^$2LO3t*p_|)yde$FUI%g!?Er=I_ROqjRjce9~{=t zsI&6jX+50F9~RbEnJ(jhAnvo67TeHT$Dn*OGZhfB4-*^sn)upEG`y@n&>bpAo+*z`4Xc?7P&% zK0`y<@3^5M&dufGoK_yrS$0H!#qaG+X(s2T_NkKovtKj$eqq}T`8|h2wvzLsLwA<_ zgt3jFm-*O#;O}KMz&=YI`QCf4#^U!2@jH`?_&t9PewU5EhggXBUncb}Z!P;d#Rd2t zs2TLH6#GJTu}@J)_CcvY*kjv9#x% z4dfitBU!Q!)1t1Nqv=$JcFD#5MmEl6X5qX42KbJ=7{7PW9P56+%6i!7M&b7qHN1L}l1_D!{&5A?j%;?RQvAe(z!D&e)eK zmHogo3bCJ86#Pfx&EnAhsrYrfhB&ue2je6Cy#T-GfZsREEs*|S{W~J5eW-=_F6fmK z_(yJd|EYDgBg^G^TeZgf&&BUD6Oh`3I|bsMg?FhAE>INy;t?6c555_fRl7&xQ{P!{Q$&~Uxx3a z>7ShMbKWfXJrV2ulQ8a0#Jb}ojQ_u&otELe$}+^a%MhP^3|;y(?5oN5zn@~>^i{B2 z&)~X+#IuueuAk@VIRBU&<^!THqu|f|aX*|F*L=4t{>bm+$6$RgALC~Mbf5)(f3r&B zxUuDk|FZDAHPnT=7$=)U7whA9cFJ+Sq8#H)4$kKn%6WrDJP%nV@y48(jDLd~Adadp z=TEv9;rs}Hr?myXcWo^DPCN7WAPb<|x%hsxF~+}^7!Ua!NHNZn^LJcxaNitXa}YO_ z$oY-O%jNfJ#+Ty!LIZqnk%RggV*D$_{H#pct)dP5t`L5ijr%Ep?($q{J&gC+_`U_> z@N0Du?_}ZkD0AgGwknZ%MkUU*G-!zS$;Wfo!FLGd_E*L;Tqc@l^@N|2+Keaea(W{GIbWoLesm?@9JO`Fr*C<-J$b{LZq>H&$UjQD6M& z?NW&^K5H)H%&YBiAN3G-lwjPdgYmEwzpqpu?OKfS>sQ37nCHFT0Df73@e%RYgQbXL z_HFiE*z2^W9R3JND@={bS2g=u0bHmxtd~EJeImAO2PI9!2kSB~EL* zA?Ato@V!h4emA==-XDIixBBxDj_g4qMlU9s4v=rY#Gv3)0 zaY-4@C$&U8#Q3Td{!)s5T8Q`*-$fiy3O`>4{menT^6!oC_hgHu+++DW$$1j*KG+C; zvI=oM*8z&*cbNCqX^QuogZ#yK{{^_OEZkQvY$oFHB7EP(xU)fsyHew4w}$BdMfiTX z6zyDwc)JPa#pRes6rueZBhJkWekkj5nTY53d$+~%{w|?@l_Sn=iGEXpd3gbTw<{0# zS06f7FT|fRE*8uE9p6&k%eRf>yxRFKF%K%mJmOcGPu4CVe8T#KZSU36&<4Y#`e;MXI<)SOqzsD^5)Va8>=6B0w+%AQl zmSg^viTOYw=Chbb4dePv0oD=tT~86}E5Y|YO=Wy~sUG@g8RotC{q>7u(9LZ4NjChl z5%^`8ck=gq>!Lq4!TZX>bJRt9<;#0JvIO(B9K_Q#^M+JB*0~(_Q&;q9dNcUba>T>U z;I9S9*HG#!Xn^-r2lKOh=+AG6Z}OoJjiuf8YmM=v82*_l`p~g4<`E6SZxhBTiQDp} z9fz@BmEu0?!w<@lKM&(s&F@Z299)8VBEGjfxeR`si}9r${G%NHo*ndB)>%0&mtdUC z#q$``HV=MZANt=Oe#zg@&qF)p z;QG247wSXz`McG1an7b#-qW%s$X5scQjGDd4E-k;^HBbteI3LD^`YnWp#ycpc#(?N z^Pm@(Rv?b!?`7vguNdd>cl)OF`sXW_Q{5S7DJb_;0I+G?=tWn_`B$c3o3J^-%KeF{aEG) zbueBx#r-2b?N%;&a5sNXKO67ADaL^Ud{5jMI>hlRU!HT581#VaX_?610C8)MtmE9< zB#i5_{#T6eo9c%5B;yv>zq6r}o1tIQ|JeUCL|@7h((jIHjdst#`Sk*f|8+2K@%L9t z!+KO|-)d@O>6a&#;@mg?en|n|S2p@N;&owG9NAq>rApAP#5b_O%Z3*$2d`p`B+IfKQH5H z0miX>v{yZ}b6uG~Y!^eE*8sZ7zkjhR{GF6k|I9BzIYo$%_;)VKF~0D*n#p@!xf%Lf zL#&f$LqD>h|5=E~IerzxkDEhB3(#J`y&khU`XT=w4}X_6L;SpX1FUm${+N$`lZ)RE%|o1D9&}drWpYt&9`rRA z{gQv5umJr&7x!5x`fyffd}m*ddgLI?s z?_NFC9Qu_Hzbn9V){}AQyF!fPso!t)IkjBwcegFE?%NQ4 zlPS+Ny##SX7W!oq%-fq_eB%7OA@rpl=52N1ANA3mSf9VUOx7XZY$Ex-!un11@BYd@ z4!>8)Kpc{Xb%`cHZ)LwYAM=!aX~zR%q6?Yj=r=|1i%j@eG4#G1^Z$y_|76{*0oE6) zuuqqVep(OLWgtGrI?9_B(r-`U??`82KY{xY{JUuNLmVpSS8@>lBhDOCh3_*<(BDcB zR~MjP@$YAGzMp~kBr~i#$T_>Z=udUvpLOvb^PwNBus(?RptJ<X#e`M z9@e2d#w)bb8(n1_sw%;I$ijRgOZ=g6L&OJ#*jH!)f8pPsfFw0p=-n@jP|Ix`phcH45W_>=!r0 z`zgnGioa)deLKWcCAc2@fLAp_zOuwQN1ae|Lg^kE*Y2Nr)b+j3JfOoKnR{gRe762| zC-lB?ziyXiWW=iFG5If+`oCLm2XO>%hkr&pJXR>r>DJpJ*#hnd|13X<$>aL?@9mIk zf#2&7t{37s-VXm+JN#aMTif~n*Y9m@-@liak@~l`+Tnlg4_=?VKeof#dXHX?wCi83 z9q0#c<^BEB+TpL}umA1+`ui0>|8LvTulIILwt)M=+VO*bnlAbCir;uU{I%L)UF+St z_Lp`2{D1ZO*Y*4RtJnWmfB)@E^Y3@9pgET-^fhXV6phhdx?9b$OzZlxi-9lRZqE#Q9Oe&BxK z{lIm=b-;DNb-;DNb-;DNb-;DNb-;DNb-;DNb>Oe41L_Z}ADH~Z>P4wPtX_WOVP_uH zVUG;|&s)G-z*``?1rDrRt$dw3c1ry_^Wb!NNIE<;9Uhhr4^M|jq{BYx@W^y{R60C5 z9UhYok4=ZirNiUX;R)&R#B_L4Iy^ZYo{|nvO^2tY!_(8@8R_uMba+-eJUbnplMc^K zhv%ik^V8u4>F~mIcu_jMI2~S+4lhlIm!-qY)8Q5A@XB;}RXV&n9bS_TuT6*7rNisf z;SK5V#&md7I=nd@-jWV)O^3Io!`stg-*k9KI=nL--jxpTPKWoT!+X==ed+N2boju5 zxyk;!QKybuoOtGgwZTKR!Naw|BelW5YlBB?gU4!v$7_QpYJ(?hgQsePr)z^}YJ+EM zgXe04=WBx(YJ(SRgO_T9murJpYJ*p6gV$<<*K30}YJ)dxgSTpfw`+rUYJ+!cgZFBK z_iKX>YJ(4JgO6&1ezn2JwZSK~LI2udKy5IvHW*YJd|DfPRvUa?8+=h4d|4ZORU3R= z8w{=uhSUZ_YlC66!SLE(L~Ss#HW*bKjIIsF)CS+w24ic3akatt+F(NT$oR*mq<72Ojy{5Bnamkz&Ahd-plAJgIFbT}m)R;I(L>2O*)oSqJ6q{Er%@TYY6b2^-r4riys zIq7h2I-Hjd=cmI3>2P5>T$BzMr^6-baA`XHB^@qHhrg!7-_qgobhsiNu1tqj>2OsV zJS!tJBO{&3G)$IZvJI1Cm^y~ZHB6pi>KZ2BF!c;mV3nVwh6Hlo_Uh zVag5D&@hb*)7UUg4Aayw%?#7rFf9yIVVIVNX=Rw0VOkrejbYjvrk!Eh8)iMjtZ$eO zhS|U{8yaRK!)$DrO$^h~Fq;~tlVLV9OlQM%G0f(M*}^be8m6mZx*4XsVYV{N)`r=} zFxwhtJHx~clQ2vV!)$Mu9SpOhVRkai&W72=FuNLNH^c01m^}=$r(t>;X0Ieu6Mvtz zx5B*?-bdly3h%4%ehTle@V^v3K;Z)wK1kt%6+T4aLlr(u;lmX^Lg79NAF1$B3LmZT zF$y26@No(sukZ;9pQ!Lj3ZJa-DGHye@M#L4uJ9QOpQ-R!3ZJd;ISQYv@OcWKukZy5 zU#Rd!3SX@7B?@1v@MQ{LuJ9EKU#aj_3SX`8H40y=@O27bukZ~D->C3S3g4{oEehYN z@NEj;u5e$4?@;(oh3``MZiVkr_+EwYQ}}*`A5i!~g&$J*VTB)2_}>aYs_ ztMGdYzpwBI3V*2ZM+)~-_+y1XQMkXt0~8*p@Sr5Fy&iqmr-u2=FrORd3&VVAn6C`; zwP6MuW{6>i8fKVbh8t#tVMZEelwn3EnOgD27=^!4c&x(X6dtee1cfImJW1hi75+}) z?-l+*;U5*Atnd_tE0Z|QKc*UHnqj6JW`<#A8s;a%{A`$6hM8@cIfj{Qn0bboZU&u5}mSM6D zlVg}VhRHQdo?+@5Cf_ji3{#L~YW3(sg^Ls}R=B>xB?^}+T&8dXh07IgsBj~N8!OyI z;id{VQ@FXpEflU$xTV6a6pks}TH!Vdw^g{E!tE7aPvP|y?x64n3U8?JMhb7N@Fog( zRCrT`J1M-G!krcFlEi6>y18MtFwB;Q>1vp6hUspYtqilZVYV^MwuafxFmb~q4Aa9f z+Z$#F!|Z67oeZ}#0)470ys z{$-d040E7i4l>NahB+k3)Y8*K6+TSi!xcV4;XVoAFc2)3LmTRaS9)=@Cgc^ zsPIV&pRDjH3ZJU*X$qgN@EHo9sqk3}pRMpY3ZJX+c?zGe@C6EAsPIJ!U###Y3SX-5 zWeQ)e@D&PQsqj?_U!BBhdU}mvt~JbchPmD_HyGwd!`x(;n+=`G*aJjES`xzjLr8Rl-o++&z~4RfDi?l;T>hI!C14;kiR!#rY`e;eje!#rk~ z#|`s@VV*S1Q-*okFwYp~S;IVMnCA`if?-}X%u9xO*)Xpd=2gSIW|-Fv^M+yGG|XFu zdD}4W80KBWyl0sA4fBCvJ~YfnhUsURj}7yQVfq_pfMEt2W{_b%HOyy*`P?vH80JgE zd}Wxg4Kvs+W~yPP8D_d+W*BCsVSX~q&xV;*&8&^FD_KVR_%++qHODY> z4KvR$^9{4WFbfT{$S{iyv&1k<4f9Jiv)0pQ&(`!r2PvC|pP3T!r%#uB&jq!u1p`P`FUxB87_;uCH*3!leqADcnHea)lcz z+(_ZZ3O7->slv?^Zmw_(g)0g_+dxh6iczuOCD7=Bf8!Eh! z!W%2RiNYNf-c;dE3U8)xXN9{cyt%?#D7>Y@T@~)8aCe2bQg~~Hw^4Xog||~Uu5d!( z9tv--@D2*^sPIk-@2v1H3h%1$ZVKD%cxQ!oQFvE{cT;$G zh4)Z+PlbCbyqCgzE8I)reH8Ak@V*M~r||v?|4ZQm6h2VlgA_hk;X@QYRN=!EK3w4= z6z-$&kqRHB@X-n%qwui`AE)r~3ZJ0xi3*>j@W~3FqVTB-pQiBX3ZJ3ynF^n!@YxEV zqwu*3pQrHo3SXe`g$iG!@Wl#WqVS~(U#9To3SXh{l?q>_@YM=mQ;q+xb&zBKGQXOY zz7}-;1^@h2@FgF_JCd1rpm|Lou|Eo3sK3sa+-fCLlHpAR*n7)R&!!UOm z<}SnBZJ2uubFX3UGtB*ldB89a8s;IxJZzXp4D)ZpJZhN74D+~Qo-oXlhIz^`PaEbL z!#r!4=M3|_VO}uIi-vj0FfSYC6~nx0nAZ&Rx?$ch%$tUJ%P?;n<{iVlYnb;8^S)s| zFwBRB`N%N+4D+#JJ~2#x!wfLYK*J0&%%_I=%rKuD<_p7oX_&7J^R;0H8)k@Mh8kv= zVTK!KgkeS+W|Uz@8)l4QzA?;L!;CY`c*9IE%tXUXGR(Jz`OYxk8|DYY{AifThM8iR zO2bSw%rwJHH_Qyf%rwkThWXhrvkWuaFmntu*D&)8Gv6=^471QMiwv{aFiQ-x)G)so zW|?7rHOy~@S#FpWhFNKtD#NTYOhz3YgJo7Tf1eo0pTt|hTVPFEAbqxwWpp9iFgb>) zW0+jS46~(Sx*DdNVY(Y;E5mGUm~9NRtzot^Ox!RD!}KuB_J-NP zFgqG%C&TP)m|YCBt6_FC%1CLGs+l#>@IPywKhFwf`Y6}i zwDi7)+0QWh8|GhzIlwRn8s;Fw9Bi0F40EVq4l~T*hB?A8eGGG?VU9A)(S|w3Fvl9^ zIKv!om=g?hqG3)l%*lp1#W1HD<}|~cZkRI+bEaX=GR)bAIma;P8sTIVm>UdpqhW3`%*}?m#W1%T z<~GCJZkWD?xx+Ab8s;v;+-;b940Eqx?la8&hIzm+4;tnn!#r%5M-20C!#rx3#|-nh zVV*F|lZJW9Fi#ui8N)nlnCA@hykTB2%!`J3$uKV)<`u)dYM9py^SWW)FwC2VdCM?w z8|EFuyla^E4D-HWJ}}IOhWW@a{S5Q5VLmZTf5Qwg%s|5oGR&uj`OGk%S2O?X*eX3z z`NG`OmxlSuFkc&HuwjN6W~gC?RWs}M`G=c(8ey1`h8bm;(S{jgm~RX-)-dA?Gu|*0 z3^UO%lMM5%VZJlW_lEhwFh3e*vSFqerqVD|4KvL!(+xAjFf$GFlVN@~%q+vqHq0Eu z%r(qB!^}6#0>dmc%p$`qHp~*kEH%t8hFNBqUk&q{VU`Y?%6nDKSi`Vag2Cz%b>8X=s>6 zhG}e=CWdKhm}Z7)ZkQH^sW41S!?ZF?%rLDD)5b7u4b#pr?G3Y@Vb(WH2g7V&m<Nd}f%>4fBOzzBJ5N zhWXksgAFspFhdP9%rL_ZGr}+<4KvCxqYX30Fy9zvtYO9(X1rl07-phjCK={i!+d9$ z?+x>VVSY5sWW!7`Or>F_8fKbdrW$JY?vj6S!$SH471EIzZ&K@!z?$<3d5{4OqF3)873o7$Mczn$udl~VR8&p$1u5u z$umq{!{i&Lo?!|MQ)rkX!xS5)zF|rXQ)-wp!!$5VxnUX_rjcP98>We2ni{5=VVWDJ zg<&cT)6y`l3==aHC< zm@5r)m0_+n%r%C&)-cx@=6b{2V3->XbCY3iHq0%Cxz#YY8RmAw^fk;KhPl%)cNykx z!`x$-dku4+VeU7~1BQ9fFb^5#VZ%IPn137QQNuiDn8ywCgkhdE%u|MW+Az-;=2^o$ zXPD;=^MYYsG|WqedD$?p80J;Oyk?l!4fBR!-ZacxhI!jC?-=G?!@Osh_YL!bVLmj> zM~3NVn2!zfiDCL1W`JP^8fK7TJ~hl|hWXquUl`^~!+d3!uMIQUFhdM8)G)&gGu$vE z3^UR&qYN|JFk=k!jbX+bW}IQh8)kxGCK_gvVZJrYcZT`iFh3aPN5f1u%oM{^8fL0t zrWt0sVP+U+reS_E%+H3IWtiE9nPZr_hM8xW`G#3wn1zN}WSGTJ}J%u2&l8D^DXGOB->C4E0P(=b_v$u>-mVd@wr*D!g8scV>g!_+fOfnf>_ zQ)HN8!_+rSiD60&Q)ZY3hAB5pL&G#OOk=||F-%j#G&4+d!?Z9=g<)D6rj=o0hG}h> zHil_yn0AI~Z*Y-pH`470IeHZe>`!)$7pPKMdcFr5w4#W0&2W(&h? zX_&5t>1LSjhS|z6TN`E@!)$Aq?FQ0hI!dAuNdZ4!@Opg*A4TAVcs;%TZVbtFz*=V zUBkR*nD-6yfnh#0%twalXPA!-^NC^l8)krE1{!9NVLmm?XNLLQFkcwvOT&C+n6C{p z*f2v3Gt@A{3^Uv?BMdXrFry4J+Aw1b^NnG~8fKhf#v5jWVI~@8l3~6z%y)+Q-Y`EH z=10R!Hp~>mR2pWgVWt^ox?yG*W~O0&GR)70nPr&ShM8lSxrUi%nE8fTV3>u5S!9^S zhFM~mrH1*%Fv|?{t6_dK%yPr5Fw9EBR2gQKVKVadYD;F4sXoP-(Rrgz9k)2~tSp7I z70yw(j>5SL=P6uQ;e3VbDO{j%p~6K97b{#};Sz;Q6)sb_fx_hqH&nQh!i^PfqHt4% zn%J!uu(_zrz1g_yC0uRQMo; z4_5dPg%4HuFoh3S_y~pjD14;CM=5-?!pA6ltis1Be7wRZD14&ACn zN3pJX8_tG6fCNGUM2O-JNeJ%l?(Xiv-QC^Y-QC^Y-QC^c3w&qK=6-%_&6!zq*33R@ z@gLV+MY{U!bXUDq-JPydx=HCSrH7QBQhG`0Ev1i?zEb-AD8C$U^fw-0JkWTM@nGX2 z#zT#V84oueVLZ}!l<{ceF~(z!$NeyVKgLU$AZ4PINm3?DnIdJXlxb3?hZlRDVTP~26%C;ZnmuK{L;~mC3jdvOEHr`{r*La`te&Yki2aOLIA2vQ>eAM`u z@p0o5#wU$W8J{*jV|>>5obh?%3&t0XFBxApzG8gU_?q!`;~T~|jc*y>Hojwg*ZAHK z;}!M3lm}8CN_iyZv6LrLo=SNp<++p>QeH}VCFQl0H&Wh8c_-z)ln+uqO8F$^vy?AV zzDoHf<+~JzX!bGvLrMfG5v4dvi6kYm6elTBq(qexO^UM=7b(%D#Q0Huc|CPCj%n;> z>~0*(IJR*dV-I6bV=rTG{4<_$tfk5 zl-%LvpI=4&y~ck&0{qR$Bcq&GNwg{2gcQdCMYDaFIfe}1q3>q|L4 zCFHhBN+~6!w3ISZ%1S9GrM#31QYuQRB&BkA`LB<_f4=6gd$uY+Zt0gjT~*_1#?_5$ z7}qqeWnA01j&WV%ddBsQ8yGh?je8jPH11{G+qjQ$U*mqp{f!404>TTRJlJ@M@lfMo#>0(A z7>_g_Wjxw=jPY3GamM3~Cm2sOo@6}Pc#82<<7vjzjb|9oG@fNV+jx%gT;qAh^Nkl6 zFEn0cyx4e&@lxYu#>eAxJi@loSr#>d0`pYLsdFXi#ri60|~xy?x_r=*;g zaz@HoDd(h|mvTYMMJbo0Tn;b4AKAah2>g2ddgaF>^~>?>RpV>M*Ntx&-!#5ueB1br z@m=G4#`ldM7(XJ*f<9EjIjXxNFH2!4#+4zg` zSL1KS-;Eud`TO&SaRlRt#*W63j3XO68AmaWY8=hj+1SN6x^WC+SL2w*ZpQA$v5aFI z$1(OW_B8e~_BM`d>|^X}>}Tw69AF%19M3quaRTFn#)*s*8z(VNYMjhCxp4~Pl*Xxy zQyZr-4l)il4lzz^9BQ1-IK6QOMmMi$<{^97Zkd(qwibyFcrI?iBQc8rE|K8UByRFT!Eh!^V zN=j)dWu%mqQcg;FDHWttlu}7bWhqspRFzUqN_8nUq|}sBOG<4ib)?jlQcp^KDGj7F zl+s8_V<}CfG?mg!N^>bKq_mXMN=lfN)>7I?X)C3jl=f0ONa-l0la$USl%NEs+)kd(nvhDaGIWtf!VQbtG_DP@$D(Ne}p87pO+ zl<`s~NSP>Ql9b6(rbwA8Wtx=fQf5e*DP@+F*;3|6nJZQeH}VCFQl0H&Wh8c_-z)ln+uqO8F$^vy?AVzDoHf z<+~II7ki%W4=E9(M3mwvC6biLQkMcuMh-;w>ev6dx(RQv9U&O9_w?C?%eh_)-!`Nhl?el*Ce!NJ%OsnUv&GQbL`qsIp;FRGNiQXXl#Eg`g_r;BV=Er1WR`8pA|VCIsUoGSlxk9{OQ|8Hrj%MzYD=jjrLL5EQtC@-Af=&{Mp7C}X(FYm zlx9+zOKBmcrIc1u!lbm8(nd;KDea`Rm(oE>M=718be7UZN>?e}q;!|kLrPC6y`=P( z(nm^PDgC7Mmoh-gKq-Ty43;uP%1|l8qzsobLdr-fqojDf6Vvm$E?0LMe-+ES9oF%2FxIq%4=RLdr@h ztE8-!vPQ~UDeI)Hm$E_1Mk$-5Y?iV`%2p}cq->Y6L&{DmyQJ)vvPa5ZDf^`CmvTVL zK`Do%9F}rK%26rDq#T!WLdr=gr=*;gaz@HoDd(h|mvTYMMJbo0T$XZ0%2g@Xq+FMB zL&{Alx1`*ba!1NtDfgt@m-0Z$Ln)7>JeKlA%2O%Nq&%1MLdr`iucW+|@%2z4hqVl6Q;L@qZz*x5_(<`U;wQylN`RC=DeVAwq$HP;LP|<0sidTql1569lwc_#QqoEZm6A?MdMO#CWR#LgN@gipq-2$n zO-gntIi%#2l1oZ%DS4#im6A_Neklc{6qHg(N?|EQq!g7>OiFPnC8U&;Qc6l`DP^RT zl~PVhc_|g7RFqOlN@Xckq*RqsO-gksHKf#(QcFs0DRrdOl~PYieJKs3G?davN@FQa zq%@V%OiFVpEu^%R(n?C0l-5$(NNFpjos{-cI!Ng#rIVD-Qo2a#Dy5s0?oxV4=_#d` zl-^SMNa-u3pOpSm21pqwWssD?QiezwDrK0I;ZjCO87XCyl+jYgNEs_-oRslWCPr!q=xhdtAl-pA7NVzNJo|OAi9!Pm8<&l)fQl3b8D&?7!=Tcrsc`4#e zos{=dK1lf}<&%`pQocy}D&?D$?@}CM$oU^B5u`+v;wUAOl*m$?q(qStRZ27|&Qe^Y zM3)jnimQ~EQrx7tONk{Vwv;$hJfwI^@si>#C9V`7DZWztr1(n-kP;{*o|O1f5=coX zC6ScGQj$nXDkYhedNhu|jl+;qvNC}b>EG0xrS}CDY(n(1#C4-cVQZh-&EG3JS ztWvT`$u1>_l$=s>Ny#lGkCePp@=3`rrGS)zQVK~aETxE)qEd=UDK4dil#)_PNhvL* zjFhrc%1J3NrGk`-QYuNQETxK+s#2;+sV=34l$ug%NvSQRj+DAm>Pe|DrGb=&QW{BV zETxH*rc#w-l5$we5h+Kd9FuZf$_XharJRy-TFMzIXQiBza$d>>DHo+&l5$zf6)9Jx zT$6HL$_*(urQDKoTgn|Ncct8ua$m{=DG#MQlJZ!}6Dd!nJd^TV$_ptk!;Af!QLkjJ z*HYd{c`N0el=o6TNckw`la$X=zDW5h<(ri6;pKn((;UBV@AnA&9)aH@@OuRQ`3O{W z4F7wa!9gLRVGge0XXgJye?SqSh>#-`35pCkK~bQnP&CLHa)F{lF(6kcCgcXWL$RRP zP#nku@`St~ZzwM01NlOJkUtav1w!$l_)r2UA(RM83?+e*Ldl@yPzop|lnP1>rGbK= zU?>Di3xz`Ip!84%C?k{!$_!`)FUCzK1y4dsFILiwQlPywhQR0t{z6@iLE z#h~I)38*Ag3MvhifyzSVpz=@!s3KGesti?uszTME>QD`+CR7Wm4b_3_LiM2fPy?tT z)Cg(}HG!H!&7kH`3#cX33JQZ-Lv5h8P&=qS)B)-Ub%HuWU7)T|H>f+*1L_I&f_g)J zpuSK)s6R9S8VC)7217%jq0lgBI5Ywp35|kALt~(^&^Ty3Gy$3jO@byvQ=qBPG-x_B z1DXlVf@VW=pt;aIXg;(6S_mzI7DG#*rO+~HIkW;=39W)wLu;V5&^l;6v;o=(ZGtvK zTcEAbHfTGv1KJ7gf_6iDpuNyOXg_oSItU$t4ns$vqtG$vICKI!37vvYLua6~&^hQl zbOE{uU4kw{SD>rVHRw8Y1G)*_f^I{1pu5mL=sxrSdKj*MKHK@{Yy4fmOYWa<`R_G; zkHFt|1pYd+@CeV_$Iuh#DfA3_4!wY0La(6L&>QG2^bUFteSkhfpP|6aN&qE<5xX=$^qqsazVMFJWyUJACw;|02PD^L4~0rP*JEDR2(V+m4r$`rJ*uV zS*RRT9;yIUgepOmp(;>Ss2Wr~Tz}vF-|sE^Jp%uH1pYcos)66_noupMHdF_y3)O?_ zLk*yYP$Q@@)C6h@HG`T%EufZAD<}+V4Yh&VLhYdTPzR_Z)CuYgb%DA<-JtGJ52z>9 z3+fH^f%-!Ip#IPRXdpBQ8Vn7AhC;)j;m`@j@$^d1A zGC`T4EKpV`8%0OkIa!`4w0#p&I1XYHrKvkh?P<5yVR1>NN z)rRUob)kAteW(G{5NZT9hMGW4p=MBXs0Gv#Y6XQst)VthTc{n>9_j#fggQZ;p)OEY zs2kKB>H+nHdO^LRK2Tq%AJiWj01bo&L4%aLit%O!VtD!aF z`uEsl|LeB$uf6J8jKw-=J+uMZ2yKEkLtCJ&&^Bm0v;*1+?Sghgd!W70K4?F506GXA zf(}DRprg<+=s0u&ItiVEPD5v)v(P!{JahrN2wj3MLsy`y&^72fbOX8x-GXjIcc8n_ zJ?K940D1^Lf*wOppr_C?=sENPdI`OPUPEu7x6nK2J@f(k2z`P+Ltmh;&^PEiQ~)Xn6@m&wMWCWkF{n6H0xAiWf=WYWpt4Xo zs612wst8qrDnnJEs!%nkI#dIy3DtsXLv^6KP(7$V)BtJ-HG&#LO`xVwGpIS#0%{4h zg2JHIP#dT%)DCJ7b$~iTouJN87pN=L4eAc{fOJJTo210|N!O#$B zC^QTj4vm0DLZhJ3&=_beG!7aMO@Jmslc34a6lf|m4Vn(kfM!CopxMwIXf8Alnh!02 z7D9`l#n2LHDYOh)4y}MzLaU(F&>Cnhv<_MiZGbjHo1o3m7HBK94cZRvfObN=pxw|O zXfL!6+7BIo4nl{Z!_X1vD0B=u4xNBbLZ_h9&>83~bPhTXU4Sk^m!Qkg73eB-4Z059 zfNny!pxe+L=q_{*x(_{o9zu_x$Iuh#DfA3_4!wY0La(6L&>QG2^bUFteSkhfpPeuYCtuiT2O7M4pbMa2i1ofKn(PpB8v8|nk~h5AAL zp#ji9Xb?0Q8UhW4hC#!j5zt6z6f_zd1C52oLF1tb&_rkwG#Q!#O@*dG)1evAOlTG~ z8=3>nh2}x?p#{)FXc4p+S^_PFmO;y*70^m(6|@>!1FePDLF=In&_-wzv>Dn0ZH2Z$ z+o2uMPG}dj8`=Zyh4w-Fp##uC=n!-mIszSqjzPzv6VOTM6m%Lo1D%D=LFb_h&_(DH zbQ!t=U4^be*P$EGP3RVM8@dDCh3-N3p$E`I=n?c7dICL#o8FGQ5Lopy%C?@0v zxkIs_*ianE1M-BtAa5ux7evb1}Gzx3Cav*fwDr`pzKf%C?}K)$_?d#@ATjLdBrsPzk6cR0=8$m4V7a<)HFV1*jra391ZLfvQ5)!u9LtR;y!a4X7qm3#tv( zf$Bo_p!!e)s3Ft{Y78}jnnKN>=1>c$CDaNEgIYsvptevus6EsH>Iij$IzwHcu246q zJJbW}3H5?{Lw%sWP(P?YGyob14T1(kL!hD1Flab50vZX8f<{AQps~<6Xgo9lng~sT zCPP!8sn9fNIy3{C3C)6LLvx_H&^%~9v;bNNErJ$9OQ5CDGH5xp0$K^Jf>uLoptaCC zXg#z6+6ZleHbYyWtJLC`tGe9T;6cKWSB0-TMCnyRO z6^aHqLoQHsC~)Q5Aufspg<@d6dy_eC4>?| ziJ>G=QYaae97+MDgi=ANp)^ns6bywxX`xUk9h4r*0A++SL7AZ}P*x}#lpV?e<%DuU zxuHBzUML@wA1VM9gbG20p(0RGs2EfnDgl**N7HS8zhdMwV zp-xa|s0-8;>IQX(dO$s)UQlnS57Zax2la;rKm(yc&|qi?G!z;J4TnZRBcV~yXlM*H z78(bQhbBN1p-IqWXbLnHng&gWW zE1^};YG@6#7Fq|bhc-YPp-s?cXbZFz+6HZhc0fC!UC?f5540EB2knOrKnI~i&|&BZ zbQC%U9fwXpC!tf&Y3K}e7CHx=hb}-Dp-a$Z=n8Zdx&~c`Za_DoThMLj4s;i~2i=Dr zKo6lu&|~Nc^b~pqJ%?UEFQHe^Yv>L17J3K0hdw|bp-<3f=nM1}`UZW696T`pgCal? zAx9_@6d7`YqCiohXpl4H0!4>nK(0_s$PIFbVnMN?IFJYA33)-@P+Z6d@`d~$e<%P7 zgyKQ*p#)GuC=rwxN&+Q?l0nI#6i`Yi6_grE0|i0BPzaP33Wd@^>7fizMko`M8Oj1> zg|b1}p&U?7C>N9)$^+$v@Ou9P22ew&5!4uJ0yTx2LCv8SP)n#46b7}1 z+CXigc2Ik$1Jn`f1a*eGKwY73PtFX>9lw9le`f^#`eu{9cue|1 z{h?S_7?x)Vz3?S=M1`=JBSLFf>47&-zSg^oeTp%c(a=oEAsIs=`B&Ozs)3(!UA z5_B250$qi!LD!)h&`szTbQ`(@-G%N!_n`;SL+BCo76g=oR!DdIP zrH0Z#K~OLh0;PpQp>$AsCPGvOrm(Y*2P62b2@a1?7hFKzX5jP=2TYR1hiz z6^4pHMWJF)ai|1T5-J6ihRQ%?p>j}pr~*_GssvSrsz6ntYEX5k22>NO1=WV?Ky{&d zP<^NY)DUU}HHMl%O`&E`^FI|56q+%t1=JF11%*Map*B!ks2$WE>Hu|wIzgSGE>Ksf z8`K@@0riA>LA{|qP+zDY)E^oE4Gh;m-r@ao><3}#U}y+56dDE%hekjnp;6FiXbdzK z8V8MsCO{LRNzi0y3N#g(22F=%Kr^9P&}?W9G#8o&&4(613!z2OVrU7p6j}x?hgLu< zp;gdoXbrR$S_iF%Hb5JpP0(g&3$zv525pCSKs%vb&~9iCv=`b3?S~FP2cbjIVdw~S z6gmbShfY8zp;ORl=nQlgItQJHE0r-FjNF83KfHjLnWY+P${T1 zR0b*wm4nJd6`+bxC8#n~1*!^FgQ`O{pqfxEs5VpwsteVF>O&2nhEOA@G1LTV3N?e8 zLoJ||P%9`5Y7Mo4+CuH1_D~0?Bh(4%40VCJLfxS5P!Fgl)C=kj^?~|A{h8$UG{ng`8?7C;N3MbKhs3A7Yi z1}%qHKr5kD&}wK6v=&+it%o*18=+0mW@rnv71{=Ehju_apx=nixj zx(D5d9zYMFN6=&F3G@_t20e#fKrf+J&}--o^cH#ty@x(PAE8gsXXp#`75WB!ha9|d z#T1GFMT8uoNKjukP8$YiUGMoF(Eg|9f}3ThT=dTkSF8?c|&m_AIKN- zgZ!ZYC=iMV#fK6=386$#Vkile6iNmqhf+W(p;S<6C=C<@1w$cFS|}7s2c?HHKpCM- zP-Z9#loiSbWruP=IiXxoZYU3w7s?0ahYCOip+Zn$s0dUPDh3sYN z2bG5^Koy}%P-UnJR28ZQRflRoHKAHiZKw`Z7pe!H>9zxlzLGz&n zf9mIXfrZc_Xfd<|S_&xoA=mK;Rx&&Q@u0U6zYtVJ*26Pj;1>J`3KzE^g(0%9u^bmRkJ%*k@PoZbf zbLa*15_$!_hTcGLp?A=G=mYc-`UHK3zCd50Z_szhAui_OPy{F<lo84VWrngqS)puDb|?pw z6UqhUhVnppp?pw&r~p(DDg+gViaJJTo z210|N!O#$BC^QTj4vm0DLZhJ3&=_beG!7aMO@Jmslc34a6lf|m4Vn(kfM!CopxMwI zXf8Alnh!027D9`l#n2LHDYOh)4y}MzLaU(F&>Cnhv<_MiZGbjHo1o3m7HBK94cZRv zfObN=pxw|OXfL!6+7BIo4nl{Z!_X1vD0B=u4xNBbLZ_h9&>83~bPhTXU4Sk^m!Qkg z73eB-4Z059fNny!pxe+L=q_{*x(_{o9zu_x$Iuh#DfA3_4!wY0La(6L&>QG2^bUFt zeSkhfpPkB@`AjfxR4Lz3;99*PyiGN#e?EQ37~{fA}BGG1WF1egOWojpp;N5C^eJ@ z3W9>65GXAa3Z;Y6Lm8lqP$nodlm*HPWrMOqIiQ?SE+{vY2g(cOgYrWKpn^~#s4!Fn zDhd^YibEx!l29qAG*kvE3zdV)LlvNkP$j4`R0XOERfDQSHK3YMEvPnB2dWFzgX%*K zpoUN*s4>(8Y6>-jnnNw1mQX7w3~CLvf!adtp!QG)s3X(~>I`*(xCg;lCNvA04b6e(Li3>c&;n>7v(CA8CUgtB4c&q6LieEi&;#fp^ay$kJ%OG=&!Fef3+N^E z3VIE_f!;#zp!d)R=p*zA`V4)6zCz!i?~sEp=6_HGC?eztMS>zjPEZsmDijTJhFqZN zPz=ZwiV3+v?occ!HWUZ)fIJ~D$Qz0a`9QvqALI`OK!H#^C_a<`N(d!_5<^L#q);*_ zIg|oQ38jKkLusHOC>RQX(n6t7Iw(Dq0m=wvf-*x{psY|fC_9t`$_eFyazlBbyih(U zKU4rJ2o-_~Lq(vXP%)@DR01jqm4Zq`WuUT9IjB5T0jda9f+|B*psG+cs5(>wstMJC zYD0CPx==l+KGXne2sMHlLrtKjP&24G)BY(a;!ZEHn-p4^4n3 zLX)7$&=hDYG!2>#&46Y?v!L0~9B3{y51J1xfEGfFpvBMLZ6_|&==?{^bPtBIrw4z2StD)LXJ=*C^F;( zMS-G1(I98Y1&R*EfLx)NkQ?L<#e!l(aUc)K6Y_$*p}3F_{iGR<3{q>2IT-f&9P#!2Rln=@e6@Ustg`mPv5vV9s3@Q$lfJ#E8pwdtos4P?t zDi2kFDngZ@%1{-kDpU=s4%L8aLbagUP#vf)R1c~THGmpIjiAO*6R0WF3~COwfLcPW zpfIR4)COt`wS(G29iWa-C#W;j1?mcQgStaKpq@}Ks5jIH>I?OQ`a=Vtf#Le~6BL86 zbTBjo8VU`AhC?HukZn&?V?HbOpK! zU4yPeH=vu)E$B9M2f7R0gYH8Qpoh>S=rQyJdI~**odkRucciVQhHQJ|<$G{_lpfuch(AXg|RKNJ83Lh+#ZPy#3+ln6=;C4rJc$)My=3MeI%3Q7&7fr6l5 zC}-`Jntz0jMBU2r3K}fr>)K zpyE&os3cShDh-u^%0lI!@=yh+B2)>g3{`=uLe-$^Pz|UiR12yN)q(0l^`QDt1E?X? z2x<&9fto_ipyp5us3p`23WHihZJ@SLJE%R>0qO{Kf;vN8psr9is5{gH>IwCNdP9An zzED4?KQsUu2n~V;LqnjU&@gEDpZfX9+Y!)6XcROW8Uu}m#zEu%)X&f1PJkvtlc34a z6lf|m4Vn(kfM!CopxMwIXf8Alnh!027D9`l#n2LHDYOh)4y}MzLaU(F&>Cnhv<_Mi zZGbjHo1o3m7HBK94cZRvfObN=pxw|OXfL!6+7BIo4nl{Z!_X1vD0B=u4xNBbLZ_h9 z&>83~bPhTXU4Sk^m!Qkg73eB-4Z059fNny!pxe+L=q_{*x(_{o9zu_x$Iuh#DfA3_ z4!wY0La(6L&>QG2^bUFteSkhfpPkB@`AjfxR4Lz3;99*PyiGN#e?EQ37~{fA}BGG z1WF1egOWojpp;N5C^eJ@3W9>65GXAa3Z;Y6Lm8lqP$nodlm*HPWrMOqIiQ?SE+{vY z2g(cOgYrWKpn^~#s4!FnDhd^YibEx!l29qAG*kvE3zdV)LlvNkP$j4`R0XOERfDQS zHK3YMEvPnB2dWFzgX%*KpoUN*s4>(8Y6>-jnnNw1mQX7w3~CLvf!adtp!QG)s3X(~ z>I`*(xCg;lCNvA04b6e(Li3>c&;n>7v(CA8CUgtB4c&q6LieEi&;#fp z^ay$kJ%OG=&!Fef3+N^E3VIE_f!;#zp!d)R=p*zA`V4)6zCz!i?~p?v=6_HGC?ezt zMS>zjPEZsmDijTJhFqZNPz=ZwiV3+v?occ!HWUZ)fIJ~D$Qz0a`9QvqALI`OK!H#^ zC_a<`N(d!_5<^L#q);*_Ig|oQ38jKkLusHOC>RQX(n6t7Iw(Dq0m=wvf-*x{psY|f zC_9t`$_eFyazlBbyih(UKU4rJ2o-_~Lq(vXP%)@DR01jqm4Zq`WuUT9IjB5T0jda9 zf+|B*psG+cs5(>wstMJCYD0CPx==l+KGXne2sMHlLrtKjP&24G)BY(a;!ZEHn-p4^4n3LX)7$&=hDYG!2>#&46Y?v!L0~9B3{y51J1xfEGfFpvBM< zXeqP|S`Mv%RzjLZ6_|&==?{^bPtB zImE;K4~hUqgdCwrP-MsniULK2qCw7(3ltrS0l7jkAvee!iUq}n;y@mdC*%crLvbM= z$QSa1{Gk9S5Q+!IhY~;up+rz(C<&AlN(Lo|Qa~x8R8VRt4HN_gLm^OFC=^NurH3*= z8KF#2W+)4k70L!>hjKtUpH>9zxlzLGz&n&_ZYtv=~|fErpgr%b^v}N@x|d8d?Lbh1Nmqp$*VRXcM#<+5&Bbwn5vW z9nel_7qlDN1MP+OLHnTt&_U=BbQn4U9fgiT$DtF@N$3=G8ae}=h0a0ep$pJO=n`}p zx&mE=u0hwK8_-SY7IYiB1Kox0LHD5t&_n1E^cZ>qJ%ye@&!HF4OXwB!8hQi0h2BB$ zp%2hU=o9oA`T~80zCqt1hxnKQLJ^>dkRucciVQhHQJ|<$G{_lpfuch(AXg|RKNJ83Lh+#ZPy#3+ln6=;C4rJc$)My=3MeI%3Q7&7 zfr6l5C}-`Jntz0jMBU2r3K} zfr>)KpyE&os3cShDh-u^%0lI!@=yh+B2)>g3{`=uLe-$^Pz|UiR12yN)q(0l^`QDt z1E?X?2x<&9fto_ipyp5us3p`23WHihZJ@SLJE%R>0qO{Kf;vN8psr9is5{gH>IwCN zdP9AnzED4?KQsUu2n~V;LqnjU&@gB?Gy)n4je(2pxhBLr0*a&@t#ZbOJgFoq|q7XP~pt zIp{oe0lElXf-XZ>psUa|=sI)*x(VHaZbNsVyU;!8KJ);32t9%xLrc^a6Sb zy@FmtZ=ko(JLo<10s07if<8lEps&z3=sV<)0P{a60u&K)gd#zaAtxvb6cvgFIYTZ` zbSMVo3dMxnAa^Jh6dQ^Ic|e|!7vv4ag?u1i$Pe;|0-!)B9uyx+040PHL5ZOxP*Nxv zlpIO{rG!#Jsi8Db5EKlBKxv^+C>@j@$^d1AGC`T4EKpV`8 zlpiVp6@&^wg`px)QK%SH94Y~ogi1lBp)yces2o%tssL4lDnXT@Do|CZ8dM#s0o8LjiDw`Q>Yo#9BKizgjzviP;00S)D~(7wTC)D9idK8XQ&I* z73v0chk8IgpIe0Q20#O$LC|1m2s9KL1`UTsKqH}1&}e83G!_~MjfW;c z6QN1aWM~RB6`BT3{~;#_2Zzw$OrdFmLc?a*Fw=%vHq5qRjtz5dm}kR$8y48G(1t}e zEVf~Z4NGlUX2WtDR@ku8hE+DKwqcD8Yi(F(!+IMw*s#%tO*U+{VT%o0ZP;eRb{lrs zu+xTJHte=xj}3cm*k{9j8xGiT(1t@c9Jb+z4M%M_X2WqCPS|kLhEq12w&9EoXKgrV z!+9Gn*l^K?OEz4#;ff7cZMbH`bsKKjaMOlcHr%%1jtzHhxM#zC8y?v3(1u4gJhtJ9 z4Nq-&X2WwEUfA%`hF3Pcw&9HpZ*6#I!+RS(*znPYPd0qE;foDlZTM!xcN-iMnjOv` zHbk%?q79BVM6w~W4Nf*hu_3Ar(QI(G!NrE?HpH;O)rOchxY^)tLo6F&+YrYF4;wsf z@Up?%hPXEP*x+k}pAG&t1lSO0Lp&Se+mOJ9gf=9yA+ZffY)EQDG8>ZHkiv$PHl(s4 zwGC-(2(lsAh7cRl+7N0(Ivdj4kimwGHe|9Pvkh5n$ZA728?xJw!-kwT93+fH^f%-!Ip#IPRXdpBQ8Vn7A zhC;)j;m`0p!?7R=ppn7dJH{*op@3U!0J zLp`9LP%o%A)CcMd^@I9D1E7J>AZRc&1R4qrgN8#RppnohXf!ki8Viks#zPaJiO?iy zGBgF63QdEiLo=Y6&@5;+GzXds&4cDc3!sJ2B4{zR1X>C$gO)=ppq0=nXf?D3S_`d% z)GgN{Qdpp(!k z=rnW&It!hH&O;ZVi_j(LGIRyH3SEP)LpPwC&@JdTbO*W%-GlB!51@z8Bj_>o1bPZR zgPub#pqJ1q=r!~PdJDaS-a{XtkI*ORGxP=e3VnmVLk@}mF$4U0g!Bg#0g4DYLXn`z zkP{RIiV8)8oFNw|IurwPg@j@$^d1AGC`T4EKpV` z8lpiVp6@&^wg`px)QK%SH94Y~ogi1lBp)yces2o%tssL4l zDnXT@Do|CZ8dM#s0o8O8%EnO#)h#rjI&|94HImbXu~8M zCfhK@hN(78vthaoGi;b?!z>$S+c3w5xi-wRVZIFuY*=W+A{!Rlu*8O?HY~GYxeY69 zSZTwmAHgyFJsUrdRaZl6ptaCCXg#z6+6ZleHbYyWt;3UfS@=hSxT{vEi)^?`(K)!v`Ba+VIJS&o+Fq;j0bbZ1`@2LlQG$f7lSghKM#e z+7QWx$Tm3H5XFY5Hbk?**#;LIqT3L|23H$m+Tdn``;YMBx$tvV9}9{N#eqB^Psj`M zhT=j#kT2v1`9lFvAQTUZ4<&#SLW!WnP!cF9lnhD^rGQdGsi4$Q8Yl<~hC-mUP$-lR zN)Kg#GD4Z4%up66E0hh&4&{JyLb;&aP#!2Rln=@e6@Ustg`mPv5vV9s3@Q$lfJ#E8 zpwdtos4P?tDi2kFDngZ@%1{-kDpU=s4%L8aLbagUP#vf)R1c~THGmpIjiAO*6R0WF z3~COwfLcPWpfIR4)COt`wS(G29iWa-C#W;j1?mcQgStaKpq@}Ks5jIH>I?OQ`a=Vt zfzTjmFf;@j3JrsXLnEM(&?sm$GzJpqbDtXf`wl znhVW?=0gjhh0r2sF|-6)3N3?{Lo1+_&?;y(v<6xWt%KG>8=#HQCTKIX1=Unvu|kL^dS0A&Cu1 zZAfNAavM_EkkW=!Hl(&8jSWFI1ltf|Ls}a`ZAfQBdK)s>kkN)rHe|LTiw#+A$Yw)! z8* zZ763$c^fL&P|=1;HdMBuiVan5sAfZT8*124(}r3$)V86H4Rvj(XG47(8raa#hDJ6t zwxNj)O>JmqLvtHi*wE63RyKs$(AtJJHng>&oek}6=wL%f8#>w0*@iAQbhV+I4c%?% zVM9+FdfCw1hCVj*wV|I4{cRXv!$2Db*)Z6KAvO%PVVDiWZ5Uz0NE=4kFxrMOHjK4l zoDJh`m|(+18z$K>*@h`LOtoQ}4byFyVZ%%tX4x>?hB-FOwPBtO^KDpQ!$KPt*|6A# zB{nRzVVMofZCGK$N*h+$u-b+-HmtQ_oek@4*kHp(8#dXn*@i7PY_(yV4cl$lVZ%-v zcGKo~BQ_kh;g}7_Z8%}WNgGbtaN345Hk`HLoDJt~ zxM0IY8!p*!*@i1NT(#kv4cBeBVZ%)uZrO0#hC4Rgwc(x(_kV={xZ?K!dI&v&9z#!{ zr_eL#IrIX03B7_|LvNtB&^zco^a1(^eS$tiU!bqhH|RU$knGpd{R4^sMT8uoNKjukP8$YiUGMoF(Eg|9f}3ThT=dTkSF8?c|&m_AIKN-gZ!ZYC=mL8Y~541 zU2CIl>#T6bnRCXrZQHhO+qP}nwr$(CZRh-JJ$s#9%h>gAj73P<$u> zln_b;C5DnfNugv=awr9q5=sT7hSET3p>$AsCPGvOrm(Y*2P62b2@a1?7hF zKzX5j|B?Q#&iuGq04fL-f(k=LprTMQs5n#tDhZW>N<(F!vQRmwJX8Uy2vveALsg)v zP&KGJR0FCB)q-k6b)dRXJ*Ym^0BQ&|f*M0jpr%kWs5#UEY6-Q1T0?E1wop5$J=6i} z2z7!wLtUV*P&cSM)C1}X^@4gseW1QjKd3)602&Alf(AoFprOz(XgD+i8VQa1Z|Rfa z(E-DlfMIOFFfL#iA23V^7$yb`lLCgx0mGDlVQRoIEnt`)Fw6)TW(Ewi0*2WE!<>L& zZon`vV3;2;EC?7D1`LY=hQ$HHl7L}pz_2V}SROE}2pCod466c$)d9nrfMIRGu(CA8CUgtB4c&q6LieEi&;#fp z^ay$kJ%OG=&!Fef3+N^E3VIE_f!;#zp!d)R=p*zA`V4)6zCz!i@6ZqEC-e*Y4gG-v zCC6zK3Je8-f=1>c$CDaOP z4Yh&VLhYdTPzR_Z)CuYgb%DA<-JtGJ52z>93+fH^f%-!Ip#IPRXdpBQ8Vn7AhC;)j z;m`0p!?7R=ppn7dJH{*o!Jyz!2`)FUCzK1y4dsFILiwQlPywhQR0t{z6@iLE#h~I) z38*Ag3MvhifyzSVpz=@!s3KGesti?uszTME>QD`+CR7Wm4b_3_LiM2fPy?tT)Cg(} zHG!H!&7kH`3#cX33Th3tf!adtp!QG)s3X(~>I`*(xCg;lCNvA0 z4b6e(Li3>c&;n>7v(CA8CUgtB4c&q6LieEi&;#fp^ay$kJ%OG=&!Fef3+N^E3VIE_f!;#z zp!d)R=p*zA`V4)6zCz!i@6ZqEC-e*Y4gG-vrNsFU3Je8-f=1>c$CDaOP4Yh&VLhYdTPzR_Z)CuYgb%DA<-JtGJ z52z>93+fH^f%-!Ip#IPRXdpBQ8Vn7AhC;)j;m`=U%4<4ATOJ=>fxxfMI69Fe_k~9WcxZ80H2H^8$wX0mFiTVPU|qC}3C|Ff0ie zmIe&V0*2)Q!-{}mWx%j1U|1b6tO*#_1`O*0hV=o%hJayXz_2M`*c>oy2^h8p4BG;R z?E%A%fMI9Auq$BL9Wd+(81@DX`vQjj0mFfS;b6dUC}21oFdPXOjs^_J0*2!O!-;_5 zWWaDLU^pExoCz4t1`Ou{hVuc#g@EB=z;G#GxEwHC2^g*h4A%mN>jA@!fZ=Aqa4TTA z9WdMp814oP_X39d0mFlU;bFk=C}4OTFg*G1@PEHu2|a_JLocA0&@1RQ^agqhy@TFE zAE1xWC+IWu1^NnogT6yQpr6n$=r{BS3Y1F!bpHzq3@j@$^d1AGC`T4EKpV`8 zlpiVp6@&^wg`px)QK%SH94Y~ogi1lBp)yces2o%tssL4lDnXT@Do|CZ8dM#s0o8LjiDw`Q>Yo#9BKizgjzwZp*B!ks2$WE>Hu|wIzgSGE>Ksf z8`K@@0riA>LA{|qP+zDY)E^oE4TJ_kgP|eNP-qx592x!SE9fS@+hoK|TQRo!7!(`|0sRGqghD}~p)gQbC>#_XiU9o${R90AMT8|6aN&qE<5lpiVp6@&^wg`px)QK%SH94Y~ogi1lBp)yce zs2o%tssL4lDnXT@Do|CZ8dM#s0o8LjiDw`Q>Yo#9BKiz zgjzwZp*B!ks2$WE>Hu|wIzgSGE>Ksf8`K@@0riA>LA{|qP+zDY)E^oE4TJ_kgP|eN zP-qx592x!SE9fS@+hoK|TQRo!7!(`|0sRGqghD}~p)gQbC>#_XiU9o$ z{R90AMT8|6aN&qE<5pb%Z)WouMvJSEw7*9qIw~gnB`}p*~Pw zs2|iH8UPK120??NA<$507&IIj0gZ%4L8GBD&{$|3G#;7&O@t;vlc6clRA?GB9hw2n zgl0jrp*hf8XdW~lS^zDC7D0=lCD2l68MGW)0j-2qL93xP&{}96v>w_3ZG<*Ko1rbx zR%jcv9ohlygmyu@p*_%EXdkp6IshGn4nc>ZBhXRk7<3#u0iA?SL8qZJ&{^mlbRN0@ zU4$+{m!T`rRp=UY9l8PCgl<8%p*zrB=pJ+*dH_9y9zl5eB0-U%C{R==8WbIh0mX!3L9wAYP+TY;6dy_eC4>?| ziJ>G=QYaae97+MDgi=ANp)^ohC>@j@$^d1AGC`T4EKpV`8 zlpiVp6@&^wg`px)QK%SH94Y~ogi1lBp)yces2o%tssL4lDnXT@Do|CZ8dM#s0o8LjiDw`Q>Yo#9BKizgjzwZp*B!ks2$WE>Hu|wIzgSGE>Ksf z8`K@@0riA>LA{|qP+zDY)E^oE4TJ_kgP|eNP-qx592x!SE9fS@+hoK|TQRo!7!(`|0sRGqghD}~p)gQbC>#_XiU9rnAL%#N{=wCMp@>i}-`Jntz0jMBU2r3K}fr>)KpyE&os3cShDh-u^ z%0lI!@=yh+B2)>g3{`=uLe-$^Pz|UiR12yN)q(0l^`QDt1E?X?2x<&9fto_ipyp5u zs3p`2Y7Mo4+CuH1_D~0?Bh(4%40VCJLfxS5P!Fgl)C=kj^?~|A{h&p@3U!0JLp`9L zP%o%A)CcMd^@I9D1E7J>AZRc&1R4qrgN8#RppnohXf!ki8Viks#zPaJiO?iyGBgF6 z3QdEiLo=Y6&@5;+GzXds&4cDc3!sJ2B4{zR1X>C$gO)=ppq0=nXf?D3S_`d%)GgN{Qdpp(!k=rnW& zIt!hH&O;ZVi_j(LGIRyH3SEP)LpPwC&@JdTbO*W%-GlB!51@z8Bj_>o1bPZRgPub# zpqJ1q=r!~PdJDaS-a{XtkI*ORGxP=e3VnmVLqDLO&@bpW^al!*0p~v`Fcbs|3I&6L zLm{BQppZ}~C^QrX3JZmU!b1_DzoCDif1!v_Bq%Zz1&RtqgQ7z*pqNlBC^i%aiVMYq z;zJ3bgis0r-FjNF83KfHjLnWY+P${T1R0b*wm4nJd6`+bxC8#n~1*!^F zgQ`O{pqfxEs5VpwsteVF>O&2nhEOA@G1LTV3N?e8LoJ||P%Ef4)COt`wS(G29iWa- zC#W;j1?mcQgStaKpq@}Ks5jIH>I?OQ`a=VtfzTjmFf;@j3JrsXLnEM(&?sm$GzJpqbDtXf`wlnhVW?=0gjhh0r2sF|-6)3N3?{Lo1+_ z&?;y(v<6xWt%KG>8=#HQCTKIX1=p@3U!0JLp`9LP%o%A)CcMd^@I9D1E7J>AZRc&1R4qr zgN8#RppnohXf!ki8Viks#zPaJiO?iyGBgF63QdEiLo=Y6&@5;+GzXds&4cDc3!sJ2 zB4{zR1X>C$gO)=ppq0=nXf?D3S_`d%)GgN{Qdpp(!k=rnW&It!hH&O;ZVi_j(LGIRyH3SEP)LpPwC z&@JdTbO*W%-GlB!51@z8Bj_>o1bPZRgPub#pqJ1q=r!~PdJDaS-a{XtkI*ORGxP=e z3Vr*J{?9ko{^#?V-*NQ^^b`68{f7QPfimF)2nB|MKtZ8kP;e*&^cNHo3I&CR!a!l6 za8P(C0`xcZ5A-h-5sCywhN3`Gp=eNaCrH0Z#X`ysbdME>w5y}K*hO$6ep=?lgCOgg& zdQg3+0n`v`1T}`5Kuw`$P;;mS)Dmh1wT9Y2ZJ~Bhd#D4{5$XhWhPpsqp>9xjs0Y*& z>ILi7UuD{A)7F@_*0gn|tv79hX&X)3WZGuawwSinv~8wsH*JS$J5AeV+HTYKn6}rn zeWvX@TVbhM7cGR?ErX4r!glQ*DJ7wBw)6ST7*0gh`oj2`*X%|hqWZGrZ zu9$Y!v}>kaH|>ULH%+@`+HKSBn0D8+d#2qt?SW|zO?zb8W7D3P_SCdzrad?9g=sHM zdu7^d)83f&*0gt~y*KTHX&+7dWZGxbzL@sav~Q+;H|>XMKTZ2(+HceTm=-AW|NHOJ zf0-88v>>JhH7%HF!A%Qc+Fz!HG%b{Ap-l^8T3FMqMJf`I}EuU%m zO)FqpLDLGER@k&6rWG}1CUL(>|W*4VTrrZqLKnQ6^UYhhYT(^{F< z+O#&NwKc7sY3)txU|L7hI+@nlv@WJ~HLaUz-A(IZT2Iq@nbzC1KBo0Gt)FTAO&eg^ zK+^`9HrTWwrVTZ1m}$dJ8)4c=(?*##+O#pIjWunYY2!_sVA@2}CYd(bv?-=dHEo({ z(@mRU+Dy}CnKs+BIi}4uZJufKOHEo+|+fCbH+D_AUnYP=sJ*Mq7ZJ%lTO*>%PLDLSI zcG$EdrX4lym}$pNJ7L;M(@vRo+O#vKoi**8Y3EJ5VA@5~E}3@Ov@51vHSL;d*G;=& z+D+4LnReT>JEq+=?Vf4(O?zP4L(?9a_Sm#1rad+7nQ6~Wdtus3(_WeO+O#*Oy*2Hf zY41(@VA@C1KAHB}v@fQ8HSL>e-%a~r+E3GdnfBYXKc)rBVxIq*7TB~PrUf-Em}$XH z3t`${riC;ulxd+&3u9VX)54h+-n0m&{cYMmru}PLMAIUf7TL5YrbRU^nrYEZi(y(! z(_)zx+q5{Q#WgLSY4J@nLG^V9BEuCrU zP0L_fM$Tt(|G@P3vG< zN7Fi)*4eZ!rgb%~n`zxm>tR|?(|VcK+q6EW^);=ZY5h$bVA?>_2AMY4v>~PqHEoz_ z!%Z7u+DOwznKs(AF{X_*ZJcT2O`BlaMAIgjHrcc(rcE_%nrYKbn_=2a(`K1A+q5~R z%{6VFY4c56VA?{{7MZr#v?Zo3HEo$`%S~Hh+Dg+_nYP-rHKwgKZJlZBP1|7FM$scG7dnrYWfyJ6Z*({7n|+q65T-8Jo= zY4=TgVA?~|9+~#ov?r!LHSL*c&rN$_+Dp@3nfBVWH>SNc?VV}wP5WTlN7Fu;_Sv*A zrhPT-n`z%o`(fHo(|(!u+q6HX1>JhH7%HF!A%Qc+Fz!HG%b{Ap-l^8 zT3FMqMJf`I}EuU%mO)FqpLDLGER@k&6rWG}1CU zL(>|W*4VTrrZqLKnQ6^UYhhYT(^{F<+O#&NwKc7sY3)txU|L7hI+@nlv@WJ~HLaUz z-A(IZT2Iq@nbzC1KBo0Gt)FTAO&eg^K+^`9HrTWwrVTZ1m}$dJ8)4c=(?*##+O#pI zjWunYY2!_sVA@2}CYd(bv?-=dHEo({(@mRU+Dy}CnKs+BIi}4uZJufKOHEo+|+fCbH z+D_AUnYP=sJ*Mq7ZJ%lTO*>%PLDLSIcG$EdrX4lym}$pNJ7L;M(@vRo+O#vKoi**8 zY3EJ5VA@5~E}3@Ov@51vHSL;d*G;=&+D+4LnReT>JEq+=?cRUdf8O^L;4K07%}+is z?V)LpOnYqF6VslW_RO^BroAxjrD?BBdu`eq)83l)&b0TYeK75#X`f8{Y}yyozMA&U zwC|?RnwHMA^rmGnEu(3fOv`Lq7Spntmd&*6 zrsXg#r)jxN%WYa7)AE{@&$Rre6)>%!X@yKHY+4c1ikeo;wBn|fFs-C%rA#YrS{c*I znpV!V@}^ZVt)gj_Osi~K71OGkR?W2PrqwX5rfIcIt8H2x)9RX5&$RlcH88EAX^l*4 zY+4i3nwr+kwC1L@Fs-F&txRieS{u{an%2&=_NH|(t)pq3OzUh~7t^|$*3Go;ru8tb zr)j-R>up*e)B2j$&$Rxg4KQt>X@g7~Y}ydhhMG3awBe?WFm0r1qf8rZ+8EQunl{d~ z@up2MZK7$DOq*=l6w{`fHqEr@rp+*IrfIWGn{C=0)8?8s&$RibEii4NX^Tu-Y}yjj zmYTNAwB@F)Fm0u2t4v#M+8Wc=nzqig^`>nwZKG+MOxtYQ7Spzxw#~HdrtL6or)j%P z+ilt&)ApLS&$Ruf9Wd>nX@^WZY}ygij+%DNwBx3oFzuvir%XF-+8NW%ns&~#^QK)e z?V@RyOuKB_71OSocFnZwrrj{@rfIiKyKUMX)9#vf&$RodJuvN|X^%{MY}ymko|^W| zwCAS1FzuyjuS|Pw+8fi}n)c4L_ojU??W1X*O#5uw7t_9)_RX~Kru{JOr)j@T`)%4E z(*k8T&;LvdY+4Z0f|?f0wBV+NFzqkXLYfxJw9ux7F)gfV;Y^5OKn;j)6$xj&b0KVWiTzHX_-vRY+4r6vYM97wCtwkFfFHPxlGG#S{~E# znwHPB{H7H!t)OXzOe<_!5z~sAR?M{Grj;8snpV%W`ldB7t)Xd+Olxdf6VsZS*37i#rnNAw zrD?59Yi(K^)7qNW&b0QXbug`?X`M{#Y+4u7x|-I_wC<+$Fs-L)y-e$ES|8K;n%2*> z{-zBuZJ=p`OdD+45YvX5Hq5l)rj0Odq-mo}8*SPc)5e-M&b0BSO)zbuX_HKwY}ypl zrkXa*wCSeJFm0x3vrL<9+8op7nl{h0`KB!}ZJ}w4Oj~T)64RENw#>BUrmZk-rD>~7 zTW#7J)7F}{&b0NWZ7^-4X`4*jY}yvnwwkuhwC$$tFm0!4yG+|{+8)#Pnzqlh{iYo- z?VxFgOgn7a5z~&EcFeTnrkybDq-m#2J8jw-)6SZ9&b0HUT`=vUX_rj9Y}ysmu9|ku zwCkqbFzu#kw@kZj+8xvGns(2$`=&iG?V)LpOnYqF6VslW_RO^BroAxjrD?BBdu`eq z)83l)&b0TYeK75#X`f8{Y}yyozMA&UwC|?R znwHMA^rmGnEu(3fOv`Lq7Spntmd&*6rsXg#r)jxN%WYa7)AE{@&$Rre6)>%!X@yKH zY+4c1ikeo;wBn|fFs-C%rA#YrS{c*InpV!V@}^ZVt)gj_Osi~K71OGkR?W2PrqwX5 zrfIcIt8H2x)9RX5&$RlcH88EAX^l*4Y+4i3nwr+kwC1L@Fs-F&txRieS{u{an%2&= z_NH|(t)pq3OzUh~7t^|$*3Go;ru8tbr)j-R>up*e)B2j$&$Rxg4KQt>X@g7~Y}ydh zhMG3awBe?WFm0r1qf8rZ+8EQunl{d~@up2MZK7$DOq*=l6w{`fHqEr@rp+*IrfIWG zn{C=0)8?8s&$RibEii4NX^Tu-Y}yjjmYTNAwB@F)Fm0u2t4v#M+8Wc=nzqig^`>nw zZKG+MOxtYQ7Spzxw#~HdrtL6or)j%P+ilt&)ApLS&$Ruf9Wd>nX@^WZY}ygij+%DN zwBx3oFzuvir%XF-+8NW%ns&~#^QK)e?V@RyOuKB_71OSocFnZwrrj{@rfIiKyKUMX z)9#vf&$RodJuvN|X^%{MY}ymko|^W|wCAS1FzuyjuS|Pw+8fi}n)c4L_ojU??W1X* zO#5uw7t_9)_RX~Kru{JOr)j@T`)%4E(*or*&;LvdY+4Z0f|?f0wBV+NFzqkXLYfxJ zw9ux7F)gfV;Y^5OKn;j)6$xj&b0KVWiTzHX_-vR zY+4r6vYM97wCtwkFfFHPxlGG#S{~E#nwHPB{H7H!t)OXzOe<_!5z~sAR?M{Grj;8snpV%W z`ldB7t)Xd+Olxdf6VsZS*37i#rnNAwrD?59Yi(K^)7qNW&b0QXbug`?X`M{#Y+4u7 zx|-I_wC<+$Fs-L)y-e$ES|8K;n%2*>{-zBuZJ=p`OdD+45YvX5Hq5l)rj0Odq-mo} z8*SPc)5e-M&b0BSO)zbuX_HKwY}yplrkXa*wCSeJFm0x3vrL<9+8op7nl{h0`KB!} zZJ}w4Oj~T)64RENw#>BUrmZk-rD>~7TW#7J)7F}{&b0NWZ7^-4X`4*jY}yvnwwkuh zwC$$tFm0!4yG+|{+8)#Pnzqlh{iYo-?VxFgOgn7a5z~&EcI-dx|9wN;ar2WWOgm}X zDbr4ycE+@`rkykGylEFqyJ*@a(=MBK#k8xYT{G>vX*W!}Y1%E*Zku+;w7aI=Gwr@< z4@`S#+9T5*oA$)Cr=~qK?YU_$OnYhCE7M+^_QtfgroA)my=fmz`)JxH(>|N_#k8-c zeKYO5X+KQ+Y1%K-ew+5kv_QGc6F}1fn-;{hpr!>gEx2hRO#92Ukfwz)EwpK2ObcsT zIMc$L7QwW?P5Z~Re@%;MS|rmVn-;~isHR0TExKtjOp9q+EYo6}7RR)>ro}TYzG(?e zOK4gm(-NDO#I&TQB{MC#X(>!gX<91NQk#~>w6vzBGcCPo8BEJ)S|-yno0i42tfpl% zExTzsOv`CnF4J3 zw6dm^Gp)R76-=vWS|!sen^wiNs-{<-5J7Osi>HEz@e7R>!ourqwg8zG)3iYiL>{ z(;A!B#I&ZSH8ZWbX)R1^X<94OTAS9!w6>Rqw6Ug* zGi|(S6HJ?E+9cB^n>NL?sisXcZMtbQOq*%iEYoJ2HpjHNrp+^LzG(|gTWH!M(-xby z#I&WREi-MoX)8=yY1%5&R-3lQw6&(KGi|+T8%*11+9uOBo3_QYt)^`=ZM$hZOxtPN zF4K0Kw#T%+rtLFrzi9_dJ80S=(+-<<#I&QP9W(8?X(voOY1%2%PMdbdw6mt2Gwr-- z7fico+9lI2n|8&ttEOEu?Ye0GX<9DRa+{XN zw7jO}GcCVq1xzbwS|QU4n^wfMqNWuyt+;6=Oe<+xDbq@uR>rilrj;|TylE9ot7uv! z(<+-*#k8uXRWq%+X*Eo%X<9AQYMWNaw7RC%Gp)X94NPljS|igMo7Tj%rlvJBt+{C} zOlxUcE7Mw=*2c89rnNJzy=fgx>u6dh(>j~h#k8)bbu+EIX+2EqX<9GSdYjhAw7#bG zGp)aA156ue+91;gn>NI>p{5NpZMbP8OdDz1DAPupHpaBErj0XgylE3mn`qi3(9y7#k8%aZ8L4VX**2YY1%H+cAK`xw7sV7Gi|?V z2TVI?+9A^pn|8#sqoy4*?YLvX*W!}Y1%E*Zku+;w7aI=Gwr@<4@`S#+9T5*oA$)Cr=~qK?YU_$OnYhCE7M+^ z_QtfgroA)my=fmz`)JxH(>|N_#k8-ceKYO5X+KQ+Y1%K-ew+5kv_N^x^FPx9n-;{h zpr!>gEx2hRO#92Ukfwz)EwpK2ObcsTIMc$L7QwW?P5Z~Re@%;MS|rmVn-;~isHR0T zExKtjOp9q+EYo6}7RR)>ro}TYzG(?eOK4gm(-NDO#I&TQB{MC#X(>!gX<91NQk#~> zw6vzBGcCPo8BEJ)S|-yno0i42tfpl%ExTzsOv`CnF4J3w6dm^Gp)R76-=vWS|!sen^wiNs-{<-5J7 zOsi>HEz@e7R>!ourqwg8zG)3iYiL>{(;A!B#I&ZSH8ZWbX)R1^X<94OTAS9!w6>Rqw6Ug*Gi|(S6HJ?E+9cB^n>NL?sisXcZMtbQOq*%i zEYoJ2HpjHNrp+^LzG(|gTWH!M(-xby#I&WREi-MoX)8=yY1%5&R-3lQw6&(KGi|+T z8%*11+9uOBo3_QYt)^`=ZM$hZOxtPNF4K0Kw#T%+rtLFrzi9_dJ80S=(+-<<#I&QP z9W(8?X(voOY1%2%PMdbdw6mt2Gwr--7fico+9lI2n|8&ttEOEu?Ye0GX<9DRa+{XNw7jO}GcCVq1xzbwS|QU4n^wfMqNWuyt+;6= zOe<+xDbq@uR>rilrj;|TylE9ot7uv!(<+-*#k8uXRWq%+X*Eo%X<9AQYMWNaw7RC% zGp)X94NPljS|igMo7Tj%rlvJBt+{C}OlxUcE7Mw=*2c89rnNJzy=fgx>u6dh(>j~h z#k8)bbu+EIX+2EqX<9GSdYjhAw7#bGGp)aA156ue+91;gn>NI>p{5NpZMbP8OdDz1 zDAPupHpaBErj0XgylE3mn`qi3(Th3uEIU&($g`;F|kvfs&mFZ+Y+kFr0> z{w({8?60!F$^I_;hwPuSf64wW`;Y8E`Tp4lX-{?7w7(lpRWT zXxU+8hm{>pc6iwlWdAMuAKCxPjwm~l?8vgC$c`#In(XMZW5|vvJC^L&vg62(D?6U- z__7noPAEH(?8LH@$WAIdne61UQ^-y!JC*FzveU>;D?6R+^s+O^&L}&R?98&W$j&M| zo9yhebI8sqJD2R-vh&E!D?6X;{IUzkE-1T@?835($Sx|onC#-ROUN!MyOiwGvdhRW zE4!TR^0F()t|+^b?8>sM$gV28n(XSbYsjuCyO!+Qvg^pME4!ZT`m!6yZYaBv?8dU2 z$ZjgTne67WTgYxHyOr$LvfIdRE4!WS_Od(3?kKyH?9Q^g$nGk;o9yngd&ur7yO-?V zvir#HE4!cU{;~(i9w>W|?7^~!$Q~+tnC#)QN5~#2dz9?avd73CD|?*m@vDn(XPaXULu@dzS3kvggR2D|?>o`LY+tUMPE!?8UN|$X+UYne64VSIAx| zdzI|fve(F7D|?;n^|Cj}-Y9#M?9H;b$lfY@o9ykfcgWr;dzb9pviHc|D|?^p{jv|p zJ}CQ;?8CB;$UZ9jnC#=SPslzg`;_d{vd_pqEBl=6^Rh3W?8~yR$i6E3n(XVc zZ^*tW`gEBl`8`?4R%ekl8q?8ma7$bKsOne6AXU&wwb`<3k1vfs#lEBl@7 z_p(38{wVvC?9Z~l$o?w(o9yqhf5`qR`?He`QCM9Z7a%*->Oil^soXblEXv$CMpQc5K;k zWXF{qPj-CS31lafok(_K*-2z4m7Pp>a@i?lr<9#ac52yaWT%y#PIh|P8DwXaok@0P z*;!;~m7Ps?cG)>(=aijGc5c~uWapKgPj-IU1!NbLT}XCe*+pa*m0e7BaoHtgmy}&f zc4^sVWS5m)PIh_O6=YYGT}gIj*;Qm$m0eACb=fs!*OXmLc5T^pWY?8lPj-FT4P-Zz z-AHz0*-d0OmEBBsbJ;Cqx0KyVc5B&fWVe;wPIi0Q9b|Wu-AQ(5*edkCZ)1_GsB-WRI0SPWE`& z6J$@6JxTUt*;8asl|4=NblEdx&y+n&_H5a6WY3j7PxgG-3uG^py-4% za@i|nuavz?_G;N{WUrOIPWF1)8)R>ky-D_F*;{09mAy^&cG){*@07hu_HNmGWbc)| zPxgM<2V@_VeMt6U*+*m_m3>V1aoHzipOk${_G#H?WS^CNPWE}(7i3?QeM$CZ*;iy= zm3>Y2b=fy$-;{kz_HEgBWZ#v2PxgJ;4`e@-{Yds>*-vCYmHkZibJ;Iszm)w-_G{U1 zWWSaDPWF4*A7p=&{Ymy`**?-IaNA|z6Bg&2>JF@I3vZKn5COf+97_wu^ zjwL&`>^QRH%8n;HzU&0D6Ut5`JF)B}vXjbACOf(86tYvwP9;0F>@>2|%1$Rcz3dFK zGs?~+JG1O8va`z0COf^!pb%FZV{zw83C3(77eyRhscvWv@u>;$}T6nyzC0HE6T1UyRz&mva8CjCcC=q8nSE3t|hy+>^idR%C0B7 zzU&6F8_I4ZyRqyhvYX0oCcC-p7P4E)ZY8_5>^8F7%5EpSz3dLMJId}PyR+;rvb)Oe zCcC@r9^`#l%I+t-zw80B2g)8Kd$8;wvWLnZCVRN-5wb_h9wmFU>@l*( z${r_syzB|GC(52Ad$Q~)vZu>^ZXM%AO~CzU&3E7s_5Fd$H^# zvX{zUCVRQ;6|z^#UL||A>@~92%3ddXz3dILH_F~5d$a5^-vg%HAh?zw86D56V6y`>^aIvX9C>Ci}ST6S7arJ|+9K>@%{@%04IiyzC3IFUr0o z`?BmSvaia%Ci}YV8?tZ8z9sv%>^rjW%DyN2zU&9GAIg3t`?2gNvY*O+Ci}VU7qVZ< zekJ?0>^HLC%6=#Nz3dONKg#|j`?KsXvcJmyCi}bWAF_YS{w4dj>_4&t6_opb*@0yT zksVZaFxkOnhmif3?2xiU$qp?$jO?(o!^sXWJA&-LW&b1lU)d35N0J>`b`;rBWk-`8 zU3LuFF=fY+9b0xB*>PpZlO11n0@(>=Cz735b`sf1WhaxJTy_fCDP^aUomzGp*=c2` zlbv362H6>9XOf*+b{5%LWoMI}U3L!HIc4XPom+Mu*?DE>lbv670oes*7m{6Ab`jY{ zWfzlOTy_cBC1sbAU0QY-*=1#ylU-hR1=$s4SCU;>b`{xGWml73U3LxGHD%Y5U0Ze? z*>z>tlU-kS1KAB_HG!zls!xKY}s>U z&y_t-_I%k3WG|GxNcLjcOJpyVy-fCU*(+qPl)Xy!YT0XKua&({_IlYHWN(zcN%m&h zTV!vQy-oIZ**j$Kl)X##ZrOWe@0Gnz_I}w1WFM4$NcLgbM`RzBeN6Uo*(YS5lzmF} zY1wCFpOt-1_IcSCWM7nhN%m#gS7cw6eNFat**9e0lzmI~ZP|BZ-<5q&_I=q8WIvSs zNcLmdPh>xp{Y>_A*)L?jl>JKfYuRsPzm@$?_IueMWPgJNgZ`prj2Pz~d0NH_M2az3Ab}-q&WrvXcm+X+TL&**;JB;kGvct&^FFS(lzh(a; z`(N1+Wk-@7S#}iJQDsMy9bI+|*)e6uk{w%i9NBSY$CDjjb^_T6WhauISauTGNo6OK zom_Sb*(qhGlAT(18rf-Or<0vtb_UrQWoMF|S#}oLS!HLFon3Yg**RtBlAT+29@%+i z=aZdZb^+N1WfziNSauQFMP(P0U0ikv*(GI{l3iMM8QEoJmy=yyc7^}l=`yviD7%vE z%Cf7-t}45l?CP>>$gU~7mh9TH>&UJvyPoX&vKz>5D7%sD# zD0`6X!LoW?Cr95$lfV?m+alL_sHHWd!Ow6vJc2UDEpA?!?KUaJ}Uc| z?BlXe$UZ6il8&&WP2`<(3avM@c#!$_^(xyzB_F|Cara?0;oPlpRTSWZ6+WXF^pOLlD8ab(Ap9ZzM%WT%v!N_J}5X=JCBolbUo*%@SKl$}X-X4zR}XO*2z zc6Ql0WapHfOLlJAd1U96olkat*#%@5lwC-6VcA7w7nNO1c5&GyWS5j(N_J`4Wn`C? zT~2m+*%f40lwC=7W!Y6^SCw5&c6He`WY?5kOLlG9b!69-T~Bs>*$rejl-)>nW7$n) zH{kV`Pt&Jx=y`*%M?>ls!rI zWZ6?>PnA7Q_H@}ZWY3g6OZIHpb7aqzJx}(0*$ZSZl)XsyV%bY%FO|Ja_Hx-PWUrLH zO7?2mYh~lXJnt1eNOgy*%xGAlzmC|W!YC`UzL4L_I24e zWZ#s1OZIKqcVyp{eNXm%*$-qtl>JEdW7$t+Kb8GV_H)@UWWSXCO7?5nZ)Crf{Z96K z*&k$ol>JHeXW3t5f0g}B_IKGoWdD@?OZIQse`E(LBKQBY1IrE~JE-hnvV+SGA^R`c zA!Ub>9a?r6*=?3R%8n&Fw(K~v z=d$7%1$Lawd^#q)5=aKJH6}-vNOugBs;V0 zEV8r8&L%s%>>RRl%FZP_x9mK!^UBUAJHPA#vJ1*CB)hQeBC?ChE+)IU>=Lp|$}T0l zwCpmn%gQb%yS(fQvMb82B)hWgDzdA}t|q&>>>9Fb%C055w(L5x>&mVtyT0rOvKz{7 zB)hTfCbFB#ZYI0A>=v?H%5EjQwd^*s+sbYyyS?lVvOCJ|B)hZhF0#AI?k2mt>>jdv z%I+n*x9mQ$`^xSoyT9xKvIoi@Bzv&zA+m?c9wvLZ>=Cj@${r>09W%AO^Aw(L2w=gOWZd%o-ivKPu;Bzv*!C9;>wUM73F z>=m+C%3dXVwd^&r*UDZed%f%pvNy`!Bzv>$EwZ=D-X?py>>aXq%HAb=x9mN#_sZTU zd%x@hvJc8WB>S-JBeIXmJ|_FP>=Uw2%04CgwCppo&&ob0`@HN6vMS@LE3&W3 zz9##+>>ILg%DyH0w(L8y@5;U>`@ZZ4vLDKRB>S=KC$gW)ekS|5>=&|M%6=vLwd^;t z-^zX``@QTBvOmiHB>S`MFS5VN{wDjo>>sj!%Kjz$x9mT%0~M9~f7yX$2az3Ab}-q& zWrvXcm+X+TL&**;JB;kGvct&^FFS(lzh(a;`(N1+Wk-@7S#}iJQDsMy9bI+|*)e6u zk{w%i9NBSY$CDjjb^_T6WhauISauTGNo6OKom_Sb*(qhGlAT(18rf-Or<0vtb_UrQ zWoMF|S#}oLS!HLFon3Yg**RtBlAT+29@%+i=aZdZb^+N1WfziNSauQFMP(P0U0ikv z*(GI{l3iMM8QEoJmy=yyb_LlLWml42S#}lKRb^L`U0rq!*)?U?l3iPN9ocnd*OOge zb_3ZBWjB)DSauWHO=UNe-CTAH*)3(alHFQ%8`*7Tx0Bsob_dxVWp|R@S#}rMU1fKZ z-CcGM**#_VlHFT&AK86n_xsQOKc8^_&;JJZmn#Eg50pJf_F&mVWDk`+O!jcuBV>=1 zJxcay*<)mnl|4@Oc-a$VPn11L_GH;pWKWenP4;xzGi1+{Jxlg%*>hyil|4`PeAx?R zFOsaWFM7%O!jfvCuEb3 zdD$0aUzB}G_GQ^uWM7qiP4;!!H)P+GeM|Oj*>_~$m3>e4ec2CWKa~AQ_G8&kWIvVt zO!jlxFJ!-z{Yv(0*>7aOmHkfkd)Xgkf0X@6_Gj5&WPg?YP4;)$KV<)u{Y&<5*?(jQ zDkdiY*@0yT`TwZ)%OI?)KkV1$mM)Q&Qj`<{L0S<66p-%jMnX~qQBp}s=>{dFR8mQc zR$3aQ8^vy)|2?z!vybn)&oOJ}dV8&N=C|lXbYeOQos>>SC#O@;Dd|*nYWhJs4gC+-IQ)dH>X?BE$LQtYq|~HmTpJ4 zr#sLc=}vTKx(nTvewpq@cc**MJ?U5IUUYA|58apUNB5^+r3cWj(F5s0^y~Cs`VIO` zdI&v~9!3wRN6;hbx9GR&QS@kf3_X?}M~|o9p(oH2=}GitdI~+2ewUs`Pp4QJZ^k(`K`crxfy_McZZ>M+AJL%8p&*?AdFX^x7UG#2x551S(NAIT(&|lNv&iPtYgnQ}k*241Jb9N1vxJ(BIJ)=}Yuw`U-uOzD8fCZ_qdCTl8)E z4t6VZw3 zBy>_b8J(O?L8qis(W&VN=`{31bXqzcou1A>XQVUHndyh=EOb^n8=albK|ex2O6Q~> zqaUYp(Yfh7bY40iou7V!E)^s{tX z`Z>BBU7miPu0U6$E732|FVdCiDs)x)CAu12ovuOGq-)W&={j^>x*lDhZa_Dr8_|vF zCUjG}8Qq+2LARt^(XHt=bX&R|-Jb41cceSfo#`%gSNdhT8{M7mLHDFzp?lH2={|H{ zx*y%2ew7|TzeW$F2hp$7gXuTuH|ZhtPUT(X;6}^jvx#J)d4cFQgaIi|Hlwd-VJCQhFJ^oL)hH zK(C}%(W~h-^ji8u`Xl;ddL6x<-av1pH_@BvPv}qSE%a7;8@-+0LGPqLqd%v=pueQQ zqIc1|={@vbdLO-?K0tp>e?uRn57CF|BlJ=F7=4`nmOeqBq)*YO=`-|M`W$_pzCeFR zU!*V5m+33?Rr(rzoxVZeq;Ju;={xjY`X2o~{R4fU{*nHP{+a%T{+0fX{+<4V{*(TT z{+s@X{+IrbPEeiuKm7omkWNG=rjyV~>11?rIt87QPDQ7tAEeXJ57BAqbaZ+;1D%o1 zL}#WSrnAsl>1=d%ItTp-{V1K2evE#c&PC^@^U!(ed~|;L3AzCNBwdg$L>H!?qKnW) z>8I&pbaA=_U6L+Em!`|m&(P1(W$EYWa&&q6dAb5!k*-9)K)*;=rmN6Z>6hqgbalE0 zU6Zaw*QV>xb?JI^eYyeNkZwdbrkl`B>1K3ux&_^mZbi4I+t6+4c658X1KpACM0cjU z&|T@5>27p)x(D5peueHu_on;Med&I5fBIE=0R0+0kRC+8P7kKvpx>m2&_n5A^l*9v zJ(7Nlew!XekEX}aW9f19c={cB0zHwQL{Fxt&{OGm>1p(IdImj{o<+~5=g@QMdGvgG z0lko3L@%b7(C^Xj(@W`P^m2Ly{Q236OdI!Ce{*3;d{(}CJ{)*m3@220|V9 z`dj)0eUd&!pQg{yXX$hFdHMqV9et6$L|>+_&{yee^mY0MeUrXL-=^=-cj|hy zN;(ytntqT@Lq9~PrPI;r=?rv6Iuo6lewfZeXQi{z+36hgBlM$mPWmzWaXJ^Bo6bY$ zrSsAG=_lv{^pkW!x)5EMeu^$a7p0%3i_yjD5_Czr6kVDwLq9`5OP8geqs!6d>F4PR zbVa%n{Q~_WU74;zSEXN~tI^fz8gxy%7G0aJL)WG2(e>#DbVIrk-I#7dH>I1=&FL0& zOS%=^nr=h4rQ6Z%=?-*9x)a@*?m~C{U;lq!xWCLvH@Z9BgYHScLieJ3(|zc^bU(U3 z{VF|xevKYT529bE2h(rRZ_-2Pq4Y3%I6Z2S(_`qd^f-Dv{SG~Wo=8uk zC(~2tsr0+_G4V_vrWOrSvj-IlY4ZfL=+j zqF2*v=(Y5R^hfl^^g4Pyy@B3HZ=yHTpU|JuTj;IyHhMd~gWgGhMt@F!L4QeqMem|_ z(|hQ>^gen&eSrR&{)RqCAEFP_N9d#UG5R?DEq#JMNuQ!m(`V?j^f~%GeS!XtzDQr9 zFVk1(tMoPcI(>t_N#CMx(|72*^ga4}`Um9)Iyya_fzC*0qBGME z(^=@ObT&FWor8Xaew5BhKSn=J=c04ddFZ@!J~}`B1YLlBk}gOWq6^bc(M9N@^wV@P zx;R~eE=iZ7OVefOXXt0?vh;IwIl4UkJY9jVNLQj?pkJgb(^crI^h812CdO5v<{(xRducBAeYv{G~hxAAE$MiaSJ-vb6NN=Jy)1T0v(p%`Q z^fr1sy@TFKe@1^!e?fmqe?{-2chh_5z4ShMKYf7yn*N49NFSmP(?{r|^fCH4{Vjch zK1rXVPt#}Uv-COoJbi)wj=o4=qA$}|=&STK`Z|4szDeJrZ_{_^yYxN!d-@0ZKK z6a6#&3;iqo8~r=|2mL4g7yUQ=5B)FwADy5U_ka2UIw75iPE04ElhVoP~s$L5&BU&C;b@xIGu~mP3NKW()sB8 z^b>Re`boMVU5GABKSdXzi_%Zi#pvR63A!X*iY`r;p`W3jrOVRK(dFp!^z(EDx*}bP zet~|Gu1r^>i>^)Aq3hE1==yX6x*^?&ZcI0!o6^nb=5!0XCEbc{ zO}C-j((UN>bO*X4-HGl@ccHt|FVo%V?sN~jC;bZDi|$SLq5IPP=>GJp^Z@!bdLTWB zew`jnzd^rA521(B!|37k2zn&_7X3CoiXKgmp~uqW=<)PB^aOe$J&B%7PobyM@6yxg z>GTYGCOwOuP0yj{((~x~^a6Szy@*~+FQMO~-=~+-%jo6w3i<i{4G|q4(1J=>7Bo z`fK_d`XGIXK1?5>kJ88J74Xq^y73cIyaq%&P(T`^V3hz1?VT~f^;FeF#Qx=gf2=yO&6n!(3UtbS=6zU5Bnq*Q4vx z4d{k+Bf2r&glm|=$Z5^dNw_Wo=eZ8=hF-5h4dnNF};L-kA9zC zN-v|A(<|r?=#}&;dNsXR(h2BbUqqoyL=$-Uu z^yl;!^q2Hk^e%cgy@%dQ@1ytA2k5WqZ|H;cA^I?Vgg#0iqmR?y(kJMX^eOr@eTF_u zpQF#y7wGTki}WS>GJS=Lgw^ey@}eTTkF-=n{$f1vNvKhi(ZKhwX^ztX?a zztexvf6{-^f7Aca|I+`_3F>hFryrmb(uwHAbP_r#os3RSr=U~Psp!=7gLE4DAv!Ic zj!sW!pfl2$=*;xPbQU@*osG^;=b#^8J{WM*SE>4%AOVXw2(sUX68TwheEd3l^jxJ9>PgkHT(v|2J=ojhAbQQWP z{SsY`u1?pWYtpso+H@Vd?*ID#`@+2*C-vzDbVIrk-I#7dH>I1=&FL0&OS%=^nr=h4 zrQ6Z%=?-*9x)a@*?m~B^U#7d!-RT~5Px=+Q7u}ogL-(co(f#RH=>ha>^gwzL{W?9E zeuI9K9zqYLhtb3TZzoESAVJZhMT+zp5i~OBt)RDqMg@%y8WS`&Xk5_vpm%~M1WgQ@ z6f`+#O3>7xcY~${O%Iw8G&5*c(CnZ&L34xV1+84Ax=s?idLEi)&3_28aIOs^w(V$~N$Ai8NIuUd- z=v2_@pff>dgU$t=54sTaUC_m#OF@@|t^{2Tx)yXj=tj`Zpj$z=gYE>~4Z0Wfeb5g< z_k(^6`YGt=pkIQ14f-wU_n<$5{tWsn=|Kt8o&;vmUgAxTL4oVV~ zG$>h6@}Lw!DT7i4r4D*9C{55qL1}~11*H$l5R@?}Q&8rhhl8>NWev&}lsza%&?7;Q z2IUNTEa>r|TtT^m@&x4#$`_PB=!u{LK~Dx13@Q{RH4kbL)H0}5Q0t&JL2ZND1+@?A5Y#cKQ&8ui zEh7w|Dacc1_ZqpG%#pT(Ca~igWd>wGiXTA z(4b*K!-GZyjSPA#=n}a?H`ZQ=u(AJ=BLED3N1nms^Ea>x~FM_@d`YLEw(C(l; zL3@Mt1?>+y5cGA>H$ex34h0(dm!MyRehd0N z=#QX3gZ>KoJLsRFe}n!DN>DHCzn}+#5(Xs-N*t6VC}~i#pyWX*f>H*h3Q8UHU{IQ% zhl0`ur3*?Qlp!c%P^O^FK@SII3CbFjEhu|Xj-W?^9u3MF^jOg2LAio*2jvOM8Br5+Cg=K>IT&dsvp!Ks9{i}pvFN> zf|>?33u+$JBB*6htDx3FZGzecwF_z=)FG&2P^X~IL0y8n2E82AEvS1?kD#7GuLSi9 z>K)W4sBciep#DLx1`P;$EoflSprF@-1_!+n^k&eIprJv-f`$i;2pSpmR?yo)qk={U zjR_hXG%jd-&^tjBf+hw{3Yr`=C1`5UyFt@}rU%Ukni(`JXm-$?pt(Wwg60P;2wE7l zC}?relA!m3-Va(Dv@B?O(2AfBf>s8t3R)esCTMNYhe00&eH^qdXnoLzpp8MBf;I1|14I z9CRe;Xwb2s<3ZmBod`M^bSmg{(3zmKLFa5f==q=uK^2241-%gTVo>FvDnV6) zUJ9xfR6VFhP|cuPLA8VG1l0|y7gRr}K~Te>MnR2(nglfsY8KQys6|lApjJVxgW3eO z4QdzEKBz-b$DmF@orAgrbq#tss9R9?pdLXzgI)>h71TSZPf*{WenI_%UJV)$^jgrs zpg}>e2MrE-Bk0YbAwfffh6N1|8WA)y=&hi)gGL374jL0QHfUVX_@H-!CIn3kniMoS zXiCu3pm&3&1x*i{5i~PsR?zIAIYD!S<^|0US`f4_Xi?DOpd~@?1-&1%G-z4S@}Lz# z9|WxoS{1Z9Xid=CpbvvS3i>!`UC{cV4M7`&HU(`C`XuPnpe;dLgSG{2584s5Gw8FR z&x5`Q`ZDONpj|<`gZ2dN4cZs9Kj=Wv*FoO|9Sk}YbU5fp(9xh{LC1r>4LT8YGU!y$ z>7X+~XM@fKoe#PY^j*-!pi4oQgRTT!4Z0R|J?KWz&7fOBw}b8k-3_`I^nK6|LHC1x z4EibP=b&GLehvC9==Y#Mg8mHpE9mc_e}euE`Y$L!gRuXC9tcVplqe{1P?DgeLCJ!W z2c-x~8I&q0b_It#9tnChC}+@P zL5~OJ3d$XnCn#@FzM%X;PXrYRdNQbBP@$m0K~Du02`U=&bWpLN;z1>XN(Pk*Djihj zfBN5hvj6`Zpl8DAvq5Epo(n1$R6gkWpb9}1gDM5R5cFbD<)A7-RfAp%suol|s76rD zpjtt-gX#p;4XPJZKd3=a!=Oe%jf0v5H4SPO)I6v~P|KiJL9K(@1hoxn7t}teLr}+{ zPC=c6x&(C%dO4_DQ1_r7K|OQUL9Yi54tgW# z&7dJcLxY9|4G$U-G&1O|ptplY1&t0G6ErqxT+sNScY-DaO$?e8G&yKW(A1!JgQf*d z51J7)GiX-O?4UV8bA#pu%@0}-v@mE<(BhyaLGJ~RM6?5GeKvA&IO$h zx)AhT(8ZujL6?KB1YHff7IZ!6M$pZmTS2#j?gZTpx)=0)&<{cPgMJM9Dd^{*UxI!O z`Yq`9pg)5C{J%>0zZdDHikIl~m-t)!BmNcti3AP#wf`d?5D7&hkys=VNkuY|T%-^w zMJkb6JSftLheTSDPNWwZL`IQGWEKyLEF!DOCbEkh;t}zv$SEEZkBeL)x5y*%ihLr! zctR8qPl|$~kSHvk5=BH&@w6x=ii;AWq$njyi!$OF@vJB-o)hIndGWlcAS#MV;sx=d zs4S|8s^TS4O;i^(L`_jk)E0F_T~SZe7Y#&1(MU8FO+-`COf(lQL`%_1v=(hdThUIm z7ac@L(Mfa`T|`&$vgjtdiyoqU~m@DRq z`C@@sC>DvuVu^T9yf2oDWn#HlAwCc*#VWB{tPyL)hvFmgu~;Y8iw$C<*d#WKPsFEU zi`Xi*iS1&C*eN~}pNlWVm*Ok2OY9bV#9pya>=y^b*Ww#-P#hA6#Sw8-923XIx8j62 zDNc#g;*2;e&WZElg7{8c6qm$haYbAe*Ti*kL);X%#BFg$+!go4_u>a}U;HS35KlgKO{7Fk4AkxgV5Im9F4QIS(TCLR~LL~fBsVqT*>$OcWO-L`hLflon;gGvZlMRy-%liSpukQ9)D`mBb6;MNwH)5mm)YqME2K zYKWSmmZ&Z2h`OSls4p6bhN6*ZESiX>qM2weT8NgSm1r&6h_<4gXfHa5j-r$3EV_uU z;$_iIbQe8DPw|TAC3=fKqOa&D`iob^0P&g_CZi(CCj<_rCiSNY^;=cG%{3L!BzldMOZ{m0Hhxk+c zCH@xwh=0X@B0*#Kk9a^N6p2J)kwhdF$wYFILZlR_L~8M%NFyE+X+=7bUStp%MJAD1 zJS?(^tRkDpE^>%R#G@jocuYJla*5m`kH{ zJyBmY5Di5m(O5JQO+_=&T(l4^MJv%-v=MDZJJDWr5FJG)(OGm6UB%0yo9Hfjh@RpV z(M$9eeMDc;PxKeBiUHy^F;EN=uZzLr4e_QJB8G}#Vz?L~MvAw@+hUX$EyjqkVw@N+ z-VqbTL@`NB7E{Dj@vfLAri&S3rkEvWi#cMhm?!3o1!AFCBo>P$;yv-cSSpr@ldtQ8-MkHp7fomek6h>c>C*epH~pNcJFtJo&CiydO8_)L5*z7SuEuf#5~ zTkH{g#XhlL91vfNZ^S`yNE{YN#8Gif92eh;6XK*eB~FVo;;c9)&Wj7;J8@B55|_ml zaaCLs*ToHSQ`{1_#T{{1+!No6AH;p}qxebuEPfHcir>WV;t%ns_)GjP{t^F*|3rc& z?jP}hNGKAC#3G4EDw2uhB85mPQi;^!L6Js0B+`m>BE85UGKx$hvv^o!5m`kxkzM2v zkBCP_PVw0P@V}=x|L@0-k9(F&n{zqKRlKnu+G3g=i^SiPoZxXe-)@_M(I6C_0JGqKoJ%UKZU%chN)i6t9S0 zqPOTH`ig#{zj##)5U+`WVvu-U3>I&QH^mS!R16ct#RxG{yd~Ziqr_-2MvN8X#CY+J zm>?#KNn)~?BBqLW#WXQp%n&ohEHPWm5p%^nF<&eY3&kR_SS%6miTA})u}mx%E5rw4 zrC23ai#1}c_)vT#J{If5da*%l6r03m@rn3UY!O?f&c8T3$ zkJu~riT&b$_*#4;4vItKus9-)ieuuq_*R?{C&ejoTAUGQ#W`_aToB)hi{g^FEUt*F z;+nWFZit)WmbfkMh`ZvR_+I=V?u#GAPvU3si}+RiCVm%xh(E<&;&1Vf_*eWV5;S!I zi3dbNkw_#KNkmeSOe7a6L`soLq!tf~G~ywVR-_Z@MFx>kWD=Rh!y=2wDzb^}B8PZH zJSuXE$He0zm&h&hh`b`7$SE{pQB*uFiizT)geWOWiPEBsct$)c z%8KViIZ<9bFDi(NqLO$)yeKM*Dx#`*NmLWnMGa9?)DpEt9Z^@*6ZJ&{(NHuJjYSjD zR5TOKMGMhVv=XgF8_`y@6YWI@(NS~~okbVXRlF>^iSD9@=qX+iy+m)(NAwl_M1S$B z7$9B~1H~Zmx)?0p5O0bhVyGA?&S#7ePBtQKp;TJfRy zNPH~TiS=TG*eEuM&EgaBsn{a6ifv-M*dca`&&22A3-P7+O6(H5#U8O&>=XON0r9o? zMjRA}#9?tn92LjJaq+D7F9%5@sg+}s*4(;rl=)qi#npNs3+=+2BM*8BpQn*qN!*mnu`{qrD!Eu zi#DRIXeZi>4x*#zBsz;OqN{jWbQ9f057AS+B6^A5qL1h+`icJHRWU%kCI*T@;&m}t zydmBcL&Q)qObizz#7Oa$cw3ATqs168R*Vzl#XDkxm?$QR$zqC_D&7^-#B?!3%oMZ4 zY%xd774yV=u|O;oi^O8FM7$^77fZ!5v0SVWABdG=l~^s-h_&KF@sapgtP|_S2C-3W z5}U;*;#09jY!%zYcCkb36rYLD#TVjB@s-#mc8fh?uh=K{iv!|o@r^hr4vE9!h&U>a ziR0p1aYCFFr^IP-Mw}Jr#CdT+d?zl7OX9M)BCd*S;<~sYZi-vtwzwniihJUF@q@T8 zeiT26pT#fYSMi(pUHl>b6n}}o#XsU-@t;W0-2EdS5D7&hkys=VNkuY|T%-^wMJkb6 zJSftLheTSDPNWwZL`IQGWEKyLEF!DOCbEkh;t}zv$SEEZkBeL)x5y*%ihLr!ctR8q zPl|$~kSHvk5=BH&@w6x=ii;AWq$njyi!$OF@vJB-o)hIndGWlcAS#MV;sx=ds4S|8 zs^TS4O;i^(L`_jk)E0F_T~SZe7Y#&1(MU8FO+-`COf(lQL`%_1v=(hdThUIm7ac@L z(Mfa`T|`&$vgjtdiyoqU~m@DRq`C@@s zC>DvuVu^T9yf2oDWn#HlAwCc*#VWB{tPyL)hvFmgu~;Y8iw$C<*d#WKPsFEUi`Xi* ziS1&C*eN~}pNlWVm*Ok2OY9bV#9pya>=y^b*Ww#-P#hA6#Sw8-923XIx8j62DNc#g z;*2;e&WZElg7{8c6qm$haYbAe*Ti*kL);X%#BFg$+!go4_u>a}U;HS35Dv1}wi=wiqBC3j)L^V-e z)DSgAEm2$45p_j9QC~C=4Mii-STqq$MKjS{v=A*tE74lC5p6{~(Oz^A9YrV6S#%Ly z#ml0b=q`GQp5hhJOY|0fL|@TQ^cSy+0pc|=Pz(~Ui^1Xz@unCehKgZgxELWuinqku zVw4yy#)z?EoER_O5fj8jF-c4oQ^Zv9u9zmKiy303m?dV5IbyDuC+3R=Ap#KCxdM5MPUL#6fXL92Q5!QE^Nh7vG8#;-okwPKz_*tT-pm ziwojAaZy|nm&FxvRa_I-#SL*&+!D9N9dTFO6W@y;#C`Fj_(}XMei6Tl-^B0Y5Amn? zOZ+YV5&w$+M1qzsAn|}mC=!XpB8f;Ul8NLZg-9t9+6k%6ZypxqJVf(6cmL-VeynGB8rNqMKMua zln^CFDN$OK5zmNcMOpEjC@0E`=S2lkQB)Eyh!;g=QAJc0FNtcRx~L&)idv$!s3Yo% zdZNB)AR3BBqOoWqnu=zkxo9C;idLevXd~K+cA~xLAUcXpqO<5Cx{8-YH_=`65Iw~! zqL=6``iQ=wpXe`M6$8X;VxSl#UKfMK8{$ndL<|+f#Beb}j1+H)x5X$iT8t56#W*ou zydx%viDHtNET)L5;$1OKOcyi6OfgH$7IVa0F;C1F3&cXPNGujh#Czg>u~aM*%f$-u zfmkV4iPd6_SSvmhABm5}Ix z+&|(0kx(QOiA55TR3sD0MGBEpq!OvcgCdQ1NTe0%M0$}yWE7c1X7RAdBC?8XBD=^T z9ubdO@xX2}Pi##H)$S3lPCqx19q$nr~iNfM3QA89KPm5xrxF{h?ic+GqC?lQ` z&x*3*IZ;lO7tf0dqN1oIUJx&e%A$&>Dqa%RM0HU^)D*QuZBa+m74<}Y(Lgj5jYMP7 zL^Ku6M03$Xv=psGYtcru741ZO(Lr<+okVBRMRXM}i*BO3=plNFS41z-Tl5iqML*GB zyebBW*Tg_ENW3lvi#NoZVu%=paOesMs2 zExr*4#UXK691%ywF>zdcD^7@$;*>Zo&WN+(oH#Eoi0{NjaY(uwpUgUBc{iOk|*kws(`*+h1cLp&lL6*n{zqKRlKnu+G3g=i^S ziPoZxXe-)@_M(I6C_0JGqKoJ%UKZU%chN)i6t9S0qPOTH`ig#{zj##)5U+`WVvu-U z3>I&QH^mS!R16ct#RxG{yd~Ziqr_-2MvN8X#CY+Jm>?#KNn)~?BBqLW#WXQp%n&oh zEHPWm5p%^nF<&eY3&kR_SS%6miTA})u}mx%E5rw4rC23ai#1}c_)vT#J{If5da*%l z6r03m@rn3UY!O?f&c8T3$kJu~riT&b$_*#4;4vItKus9-) zieuuq_*R?{C&ejoTAUGQ#W`_aToB)hi{g^FEUt*F;+nWFZit)WmbfkMh`ZvR_+I=V z?*9+GWcksvpTy7N7xAn3P5dtY5Pyol#NXl{@vrz#BxvIT5)X)kB9TZel8B@tnMf{D zh?F9gNG%=|X~aV!tw<-*iwq*8$Rsk0heZ~VRb&&{MGoLjCaQ}XqNb=NYKuCeuBa#Kiw2^hXe1hoCZefmCYp;DqNQjh zT8lQKt!O9Oiw>fr=p;IeE~2Y=S#%TKMGw(aydrvu-lC7_EBcB4;#Dy~ye0;ULE?2W zSiB+L6hp*NF-!~>Bg9DYmUvr?5~IZ!F;=d7g&&3zwOYxQ1C3cHFVz1aI_KO4JYw?XZC=Q9k;)pmZ zj)~*qTX9016sN>#aYmdK=frt&L3}4Jic8|MxFW8KYvQ`NA#RFW;2Spn3kVq@iiS#0a$S5+2%;I5@MPwD(M0SxwJR%+yImKh*agj^p7I{Qo zkx%3oPly8INl{P~5{1Q6qKGIeo)*PKaZy5)6s1IIQARu?o)u-qbE2FmFP;|_L`6|a zydYi_l|>a%RlFpsiRz+;s3~fR+MbMb}vQhX(LiQQt4*emvl{o;W5T6`l8ibLYCI3kXUW8%2@ zR-6zg#VK)GoDpZmIdNWG5Z{T5;*z*5u86DRnz$}*h@0Y;xGnC8yW*bsUi={Liyy^L z;%D)T_*MKSeiwg;KgD0-Z}E@#SNtauv~&N62Sh@VNF)|XL{gDVBo`?}N|8#W77vOv z;vtb%q!Z~y29Z%@5}C!rB8$i>vWe^>hj>IhDsqa)#N#5D$Sv}Syds~-FP;zu#FL_+ zC?pDtr$iA^R6H$;iQ=M!C@D&b(xQxbMm#IZiswW*QC>VRDu{}rl6XP9C@PC8qN;dF zR1?)j4N+6n617DgQCHLx^+f~GP&5*aMHA6fG!xB53(-=v60JoW(N?q*?L`OCQFIcW zMHkUkyeztj?xKh2DP9r1L~qeY^cDR?fAOjqAYKy##USyz7%bipZ;Byes2C=OixFa^ zcuTx3Mv2j4j2J7%iSgncF+ofelf+~(MNAd%ifLlHm?370Sz@-BBj$>EV!l`)7K%k; zu~;JB6Yq@uB!gd@R<9^=L`h9a#Epc1i5qHHs@xAy#+!sHJpTy7N7xAn3P5dtY z5Pyol#NXl{@vrz#Bxvvc5f6xjB9TZel8B@tnMf{Dh?F9gNG%=|X~aV!tw<-*iwq*8 z$Rsk0heZ~VRb&&{MGoLc8jB{Psc0seix#4#XeC;UHlnR)C)$e+qNC^}I*Tr%t9V&-6Wv7* z(Nnx4dWqhmkLWAhMF+jW~28u!Abun1HA>I^2#85Fz3>PEBNb#0pNY@K7vf9t zmDnYAi#=km*eCXj1LAA(jW{R{iNoTEI4X{btvDN#fe6;F#| zqPQp_N{UjVv?wE<5zmUU;yF=Hlo!v73ZkN@Bwi3Niprvjs489()kJksL(~+tL~T(= z)D`taebGQP6pch<(L^*A%|vt2LbMdEL~GGTv=!|{d(lC36rDt8(M5C>FNrBr#b`5mUvxVw#vPW{8<$mY6N(h`C~(m@gKHg<_FdES8A(#QS2YSSFT> z72*T2QmhiI#Tv0zd?-E=AB%Njz1Sc&icMm(_(Xgvwur4_o7gUPh@Ijy@wxayd?~&X zyTop>N9+~*#C~x=d@a5a2gMJrqJ?NFT8Y-8jc6;{iT0v{=qNgg&Z3LxDqa@dM0e3c^c1g%UZS_?Bl?Pd zqQ7`m3=pr0fntz&T?`g)h&RO$F;ol_!^H?OQoJSJ7Nf*yF-D9P<~M}XX115h4@l@C3cD3VvpD>_KE%C zfcRQ`BMyo~;;=X(j*4UAxcF9_5GTbcaax=aXT>>jUR)60iHqWrxGb)StKyotE^dgM z;+D8A?ufhMp7>t;AnuDF#ZTgA@r(FX{3d=Ee~3TDU*d1^kN8*oClYjW|A+@fLXk)$ z7D+@>kxV2PDMU(>`JFL_8{TipRv` zBA3W5@`$`5pU5ws5Cz1OqM#@w3X7*i5m8h;EsBZaqJ$_ZN{P~&yTu-{SL_q}#R2iP_(mKQhs0rV zL>v{z#BuShI3Z4oQ{uEZBhHF*;=H&Zz7rS4C2?6?5m&`Eab4UHH^nV+Tig+M#Xa%8 z_(9wkKZ>8k&*B&HtN2a)F8&aIioe9);vey^_)jG0?EVoCh=d}MNGy_wq#~I}E>eh; zB9%xj9u#TBLn5t6C(?@yBBRJ8GK+^r7LiqC6WK)$@rZa-LjCaQ}XqNb=NYKuCeuBa#Kiw2^hXe1hoCZefmCYp;DqNQjhT8lQKt!O9O ziw>fr=p;IeE~2Y=S#%TKMGw(aydrvu-lC7_EBcB4;#Dy~ye0;ULE?2WSiB+L6hp*N zF-!~>Bg9DYmUvr?5~IZ!F;=d8<54>dg+_Nvlm*Ok2OY9bV#9pya>=y^b*Ww#-P#hA6#Sw8-923XI zx8j62DNc#g;*2;e&WZElg7{8c6qm$haYbAe*Ti*kL);X%#BFg$+!go4_u>a}U;HS3 z5KlgKO{7Fk4AkxgV5Im9F4QIS(TCLR~LL~fBsVqT*>$OcWO-L`hLflon;gGvZlMRy-%liSpukQ9)D`mBb6;MNwH)5mm)Y zqME2KYKWSmmZ&Z2h`OSls4p6bhN6*ZESiX>qM2weT8NgSm1r&6h_<4gXfHa5j-r$3 zEV_uU;$_iIbQe8DPw|TAC3=fKqOa&D`iob^0P&g_CZi(CCj<_rCiSNY^;=cI*@pX^Um38aZhVzzf z+qP}nR?1E)m9lNywr$(CZQI{lr=5Lv@82_K|9SPhrGBI()#h9yYp(Z*4~UP5Pl(Tm zFNm**Z;0=RABdlbUx?p`KM0>zc>jY4f(VKTh6s)bfe48Rg$RuZg9wWVhX{{|fQX2Q zgouoYf{2QUhKP=cfryESg@}!agNTcWhlr0zfJlf)gh-4?f=G%;hDeS`fk=r+g-DG^ zgGh@=he(gefXIl*gvgA@g2;-hscj8fGCJ4geZ(Cf+&h8 zhA56GfhdV6g(!_EgD8tAhbWJzfT)P5gs6nOrL?=XNL>EL? zL^nitL=QwyL@z{dL?1+7L_b7-!~n!V#300A#1O<##4yBg#0bPl#3;mQ#2Ca_#5lxw z#011d#3aOI#1zC-#5BZo#0#5Tls#16zx#4f~c#2&<6#6HA+!~w)X#396C z#1X_%#4*Hi#0kVn#3{sS#2Lg{#5u%y#0A7f#3jUK#1+I<#5Kfq#0|tv#4W^a#2v(4 z#684)!~?`b#3RIG#1q6*#52Tm#0$hr#4E&W#2dt0#5=@$#0SJj#3#gO#23U@#5cru z#1F(z#4p5e#2}&G(z|y z8Y7w@nj)GZnj=~uS|VB@S|i#Z+9KK^+9UiB{)i5U07OSbAfgkZGolNkE20~sJE8}o zC!!akH=+-sFQOl!KVkr4AYu?=Fk%Q|C}J35IAR21Bw`d|G-3>5EMgpDJYoW3B4QF^ zGGYp1DqHK4Jl4Az~3?F=7c~DPkF7IbsE3C1Mp~HDV27 zEn*#FJz@i5BVrR`Ghz#3D`FdBJ7Nc7Ct??3H)0QBFJd2JKjHx5AmR|>FyaW}DB>96 zIN}82B;pj}G~x{6EaDvEJmLc4BH|L_GU5v2D&iXAI^qW6CgK+2HsTKAF5({IKH>r5 zA>t9@G2#j0DdHL8IpPK4CE^w0HR288E#e*GJ>mo6BjOX{GvW*4E8-jCJK_i8C*l|4 zH{uV%rw!i!Ac7!*B7z};BSIiTB0?cTBf=oUBEliUBO)LoA|fFoBcdRpBBCLpBVr(8 zB4Qz8BjO<9BH|(9BN8AIA`&4IBa$GJB9bAJBT^tzB2pnzBhnz!BGMt!BQhW|A~GQ| zBeEc}BC;W}BXS^eB61;eBk~~fBJv^fBMKl2A_^f2BZ?r3B8nl3BT67jB1$1jBg!Dk zBFZ7kBPt*&A}S#&BdQ>(BB~*(BWfUOB5EOOBkCaPBI+UPBN`wYA{rrl5seW|5KR%y z5X})S5G@g{5Umky5N#3d5bY6u2!BKeL;#{AA`sCD(HYSN(G}4R(H+qP(G$@N(HqeR z(HGGV(H}7YF%U5bF&HrfF%&TjF&r@hF%mHfF&Z%jF%~fnF&;4iF%dBdF&QxhF%>Zl zF&!}jF%vNhF&i-lF&8lpF(0u2u@JEcu^6!gu@tfFU%)$AmSbCiSczDLSdCbNSc_PP zSdZ9%*ofGK*o@eM*oxSO*pAqN*ooMM*p1kO*o)YQ*pE1XIEXleIE*-gIEpxiIF2}h zIEgrgIE^@iIEy%kIFGo1xQMufxQw`hxQe)jxQ@7ixQV!hxQ)1jxQn=lxQ}>%c!+p} zc#L?0c#3$2c#e31c!_w0c#U|2c#C+4c#rsi_=xy~_>B01_=@<3_>TC2_=)(1_>K63 z@M(*8K!_lSpon0I;D`{2kcd!-(1qKIOM;)oK6l891>(ugvMvWRks z@`wtEiik>x%7`k6s)%Zc>WCVMnuuD6+K4)cx`=v+`iKUIhKNQ8UqoX>6GT%)GemPl z3q(sqD@1EV8$??~J4AbgAHpBe0TF=chzLYK@3F~%L0m;#LtIDPK-@&!Lfl5&LEJ^$L)=F^Ks-b|LOe!1K|Dn~ zLp(>kK)gh}LcB)2LA*u0L%c_PKzu}eLVQMiL3~AgLwra4K>S4fLi|SjLHM-8`yWIQ zL{LO9L~uk1L`Xy^L})}9L|8;PM0i95L_|a+L}Wx1L{vmHM07+9L`+01L~KMHL|jBX zM0`X7L_$O&L}Ek|L{daDL~=w5L`p;|L~2ADL|Q~TM0!L9L`Fm=L}o-5L{>yLM0P|D zL{3C5L~cYLL|#NbM1Dj8L_tI$L}5e`L{UUBL~%q3L`g&`L}^4BL|H^RM0rF7L`6g; zL}f%3L{&sJM0G?BL`_63L~TSJL|sHZM14d9L_L94j~RBjv$UAjvt{|==t|6`?ZXj+VZXs?X?jY_W?ji0Y9v~hf9w8nho*0U{wH z5h5`n2_h*X86r6%1tKLP6(Thv4I(Wf9U?s<10o|L6CyJr3nD8b8zMU*2O=jT7a}(z z4k5jzk& z5xWq(5ql7O5&IDP5eE5q}Uqet7?b2!aTT2!;rb2!RNR2!#lZ z2!jZV2!{xdh=7QQh=hoYh=PcUh=z!ch=GWSh=quah=YiWh=+)eNPtL)NQ6j?NP$cM;} zD1a!4D1<1CD1s=8D26DGD1j)6D1|7ED1#`AD2FJIsDP-5sD!ADsDh}9sD`MHsDY@7 zsD-GFsDr4BsE4SJXn<&lXoT=ZG)6Q*G(|K+G)J^Rv_!N*v_`Z+v_-T-v`6?M{1F`x z0f>%>Ktv}*XG9l7S41~NcSH|FPed<7Z$uwNUqnAdf5ZU9K*S)#V8jr_P{c6AaKs42 zNW>__Xv7%ASj4!00qHK4Jl4 zAz~3?F=7c~DPkF7IbsE3C1Mp~HDV27En*#FJz@i5BVrR`Ghz#3D`FdBJ7Nc7Ct??3 zH)0QBFJd2JKjHx5AmR|>FyaW}DB>96IN}82B;pj}G~x{6>|X@;@$o6$KCoHafOE|A z%nQtm%uCG6%qz^R%xlc+%p1&`%v;Rc%sb4x%zMoH%m>Ve%ty?}%qPsJ%xBE!%oog; z%va3U%s0%p%y-Q9%n!_u%umeE%rDHZ%x}!^%pc63%wNpk%s)&YfBJd;hZ%$!lo^Z} zoEd@{k{OB_ni+-}mKly2o*97|kr{~@nHhx{l^Km0of(4}lNpN{n;C~0ml=;4pP7J} zkeP^?n3;r`l$nf~oSA}|l9`H`nwf@~mYI&3o|%D}k(r5^nVE%|m6?s1otcA~lbMT| zo0*51mzj^5pILxekXeXXm|28blv#{foLPcdl39vbnpuWfmRXKjo>_reky(jZnOTKd zm068homqoflUa*dn^}ihmsyWlpV@%fklBdo%WTYS!feWH#%#`P!EDKF#ca)N!)(iJ z$868^WBM~YFaww!nSsns%+Aa%%&yFC%K*)%z?~7%)!hd z%%RL-%;C%t%#qAd%+bs-%(2XI%<;?#%!$lN%*o6t%&E+2%<0S-%$dwt%-PI2%(={Y z%=yd(%!SNF%*D(l%%#j_%;n4#%$3Yl%+<^_%(cvQ%=OF-%#F-V%+1U#%&p9A%|#%-zgA%)QKg%>B#*%!ABB%)`th%%jX>%;U@x%#+Mh%+t&>%(KjM%=63(%!|xR z%*)Ix%&W|6%%$v+x%-hU6%)88c%=^p-%!kZJ%*V_p%%{v}%;(G(%$Lkp%-75} z%(u*U%=gR>%#X}Z%+Jg(%&*LE%kj?6%2CuV167iL#xH)eNc4`xqhFJ^CMA7)=>KW2aC0OmmEAm(7^5av+kFy?UP z2yGA?9J`5#~|mG3IgR3Fb-WDduVB8Rl8$Ip%rh1?ENOCFW)373Nju zHRg5Z4dzYeE#__J9p+u;J?4Gp1Li~KBj#h~6XsLqGv;&V3+7AaE9PtF8|GW)JLY@l z2j)lSC+2777v@*yH|BTd59UwiFXnINAEr+L-~Td$FoQCKF@rNhFhep!F+(%MFvBv# zF~c(>Fe5S}F(WghFrzY~F{3kMFk>=fF=I31Fyk`gG2=56FcUHpF%vVBFq1NqF_SY> zFjF#9F;g?sFw-*AG1D_MFf%eUF*7r>FtakVF|#vsFmp0^EXF!M6=G4nGEFbgsZ zF$*(`FpDyaF^e-xFiSE^F-tScFv~K_G0QV6Fe@@EF)K5xFsm}FF{?9cFl#bvF>5pH zFzYhwG3zrMFdH%(F@2ehnN65Yna!BZnJt(tnXQxq-Qnxrw=%xrMovxsAD5ttE~ zk(iO0QJ7Jg(U{ShF_6q!68JHQFnV6ZGS(sUw*_hdxIhZ+_xtO_`d6;>b`Iz~c1(*eyg_wnzMVLjI z#hAsJC730drI@9eWte4|<(TD}6_^#7m6(;8RhU(o)tJ?pHJCM-wV1V;b(nRT^_caU z4VVp?jhMd7#>^(nrp#u{=FAq%mdsYn*334{w#;_S_DnyfKeGcffZ355$n3=I%vPi8M>Z)P86UuHjMf93$@K;|IkVCE3!Q06e^aOMc+NaiT! zXyzE^Smrq9c;*D=MCK&sWabp+ROU41bmk1^Oy(@+Y~~#1T;@FHeC7h?LgpgoV&)R& zQsy$|a^?!=O6Dr&YUUc|TIM?Ddgcb^M&>5wX66>=R^~S5cIFP|PUbG=Zss25UgkdL ze&zw@LFOUmVdfF$QRXq`apno;N#-f$Y33Q`S>`$BdFBP?Mdl^uW#$#;RpvG3b>3UFJRJedYt^L*^sqW9Ad)Q|2?~bLI=?OXe%)Yvvo~Tjo3Fd*%n`N9HHy zXXY2?SLQe7cjgb~Pv$S?Z{{DSPaxj`GlMXLGJ`RLGea;#GD9&#Gs7^$GQ%;$Gb1n~ zG9xh~Govu0GNUo0Gh;AgGGj4gGvhGhGUGAhGZQcqG7~WqGm|irGLtcrGgB~AGE*^A zGt)5BGSe~BGczzVGBYtVGqW(WGP5zWGjlL=GIKF=GxIR>GV?L>GYc>aG7B*aGm9{b zGK(>bGfOZ_GE4nslz*RuQK3fZVxmyoU(fWzjU$gVhKV*@v=O3>6m67fqeUAd+E~%X zi8fxe38GCDZIWn{MVlhpRMDo1HeIwCqRkX-mT0p@n(dLOZU$h0HEfj5$Xp2Q# zBHB{XmWj4pv=ySQ6m6Agt3_KQ+FH@piMC#}4WexnZIfu5McX3UR?)VJwq3LxqU{uI zmuS01+auau(e{b9U$g_F9Te@5Xop2RBHB^Wj)`_$v=gG86z!B~r$sv>+F8-giFRJJ z3!+^V?UHDhMY|%}Rne}Ac3rd^qTLkjmT0#{yCd3N(e8{}C;SXhB5_CR%XOLWmYpv{0gj7A=fuVMPlkT6obSh!#<_NTNj+EsAJS zMT;g{bkSmn7E`oXqQw?1j%aa3izixq(GrN3P_#s%B^E7-Xh}s&CR%dQQizsPv{a&{ z7A=ivX+=vXT6)nkh?Y^bOrm8LEsJPbMaw2ycF}T(mQ%D`qU9DXk7#*C%O_fX(F%xG zP_#m#6&9_CXhlUUCR%aPN{Cicv{Is#7OjkEWko9|T6xhbh*nXwN}^R3t%_(>MXM%S zbP_#y(`HI$9v?ii86|I?Q%|&Y=T1(MdiPl=Q zHlno^t(|D?Me`HQU$hRQ1&G#Bv_R21iPl-PE~0f6t($1wMe8A2Ptkgb)?2hbqV*N6 zpJ@F>8z9<1(FTb&ShOLc4Ha#eXv0MtA=*gMMu|3Bv@xQM6>Xeo<3*bw+CXhp>qXlj+D6efiMCm^Euw7|ZJTJ@McX0TPSJLWwp+A4qU{xJpJ@9< zJ0RLY(GH1rShORe9Tn}EXval6A=*jNPKkC}v@@ce744j8=S909+C|YWiFR4EE23Q$ z?V4!UMY|!|P0?W)S{&kEv;zjL`yGP2GKH# zmPxeCqGb^+t7zFo%Pv|D(Q=BGOSIggnU0<(Rz#4N3_18^%JeXXahtWDB2*=28%XC zw4tI66K%L?BSaf1+9=URi#A5Iv7(I=ZMQRx@a>*n?7K^q-w56gg6K%O@D@0o<+A7gji?&9zwW6&PZM|q4 zMB6CZCeb#Fwnen9qHPmxyJ$N^+bP;E(RPcrN3^}7?GtUkXa__)DB2;>4vTg~w43CDAU6c15(SqFod1x@b2U>qJ0zXyJ$Z|`zhKl(SD2eM>L-<;`~pvAfg2oEtqJ*MGGNXNYO%x7Fx70 zqJnK{F zXq`msELs=Qx{B6KwCnyH5YdK;HcYhP zqKy!3q-di=8!g%x(Z-54PPFl&O%QFOXp=;nEZP*&riwOAwCSSF5N)PtvqYOM+8oj5 ziZ)NQ`JyclZJ}t3L|ZJ{64925woJ6;qOA~ZrD&@}TP@le(bkH#PPFx+Z4hmvXq!aa zEZP>)wu-h*wC$qp5N)SuyF}Y9+8)vNindR*{h}QZ?VxCfL^~|n5z&r{c1*P6qMZ=! zq-du^J1yE7(awr?PPFr)T@dY}XqQC0EZP;(u8MX|wCkeX5bdUDw?w-w+8xpEigr)5 z`=UJ%?V)IoM0+gS6VaZE_DrmXWyXdOih6s?nJoki;+T36A!iPl}T z9-{RWt(R!MMe8G4U(x!B)?c&%q74*nkZ6NN8zS0J(T0gOT(l9QjTCK^Xro0NBidNe z#)&pwv6q+E&rFiMCy|9ir_N zZI@`fMcX6VUeWf6wqLXZq8$|NkZ6ZRJ0jXq(T<6BT(lFSofPeqXs1OxBidQf&WUzj zvxOiS}KzAENyf?U!i3 zMf)R~Pd9P?Ct48Ef{GSQwBVwJ5G|x=p+pNUS{Tv7iWW|^@S;T!Euv_VM2jq16w#uJ z7EQG1qQwv`rf9K5i!E9l(c+30Pqg@=B@iv4Xo*BiELsxLl8Tm0wB(|t5G|!>sYFXH zS{l*Pik42a^rB@DEu(0eM9VB%7SXbbmQA$mqU8`Rr)arE%Pm?S(ejFxPqh4^6%ehU zXoW;8ELsuKii%cDwBn+b5Ur$Wr9>+&S{c#GidIgv@}gA`t)gg^M5`=X7164SR!y|( zqSX+srf9W9t1Vg`(dvp;Pqg}?H4v?#XpKbk6|J#oO+;%dS~Jm_i`GK4mZG&1t+i-v zL~AQrJJH&U<|mrJXdOfg5UrzVfueO1t+QxdMC&SAH_^I_)nmD6 z(fW%vK(v9P4H9jzXhTFBD%vp7hKn{rw2`8X5^c0-V?-M(+Bnh1i#9>DiK0ysZL(-n zM4KwwG|{GuHbbw`hMv^XV?m|3nKST2Rq~ zi56V65Tb+ z5~7t9t(0h`MJpp(S<%XgR$jCUqE!^Fl4zAht0G!e(W;47U9=ja)fBCkXthPFBU)Y2 z>WNlgv<9Lz6s?hHzM?f2t%+z&MQbKnbJ1Fe)>5=qqO}&Sjc9E}YbRQJ(fma77p;S6 z0ityjEl{*hqIDLni)dX%>n2)v(Rzs1Q?y>9^%kvZUifB_sn@sC)$0{9*Fi(v`3;n7VU{>Pepqs+H=ufi1t#nSE9WZ?Tu(}MSCaOd(l3K_EEG? zqJ0+ai)dd(`zG3V(SC^bQ?y^A{TA(yXg)o}`JZS(L<=ffFwugG7DBX;qJO z3oBYU(ZY)sL9~dXMG`HtXi-FqDq1wrqKg(ow3wpB5-ql9aYTzNT0GI>il5-qoAc|^-AT0YV8i&j9if}#}?t*~fCL@O#PRzkFrqLmV@^uNvf6V}Rz zuPiHCInm0CRzb9iqE!;DvS?LAt14PG(W;A9L$sQr)e^0?Xmv!ZD_T9#>WkJuw1%QJ z63th%#-cS5t*L0uL~AZu3(;DN)=IS2qO}pNt!V8;YcHCgX#S#g5G_Eoj-myM)=9L^ zqID6it7zRs>n>Uk(Rzy3OSImi^%1SFX#GU%FWLan28uRFw85ec5pAew!$cb{+6d7` ziZ)8L(V~qJZLDbHL>n*K1komnHc7O}qD>KPs%X+MHVfJXi-IrCR%jSVu%(~v{<6W7A=lwaYc(KT71zGh?Y>aM4}}YEs1DJMN1}H za?w(VmQu7-qNNrsjc92_OD9@-(K3jZQM62=Wfm=qXjw(eCR%pUa)_2wv|OU)7A=ox zc}2@7T7JyPAqBRw*nP|;L zYav=o(OQYtTC_HzwH2+MXzfMw6U|?=4x$B!)={)T(K?CNS+p*qbrr3fXx&BYAzDw- zdWqIsv_7Ks6|J9W{Y4uf+Cb3;i8ffYA)*ZxZJ21oMH?a7NYO@#Hd?eXqKy@8oM_`k zn;_am(I$yDS+pslO%-jLXwyZTA=*sQW{Eaiv^k>96>Xkq^F>=A+CtG5iMCj@C88}A zZJB7xMOz`-O3_w{wpz3`qOBEeooMSt+aTIT(Kd;;S+p&pZ53^sXxl~GA=*ySc8RuI zv^}Ej6>Xnr`$an-+Ck9{iFR1DBcdG@?U-oCMLQwdNzqP;c3QMEqMa4(oM`7oyCB*{ z(JqO0S+pynT@~$`XxBx%A=*vRZi#kVv^%2R744pA_eFak+C$MEiS}5uC!#$S?U`uL zMSCIIOVM75_FA+zqP-REooMex`ykp!(LRaxS+p;reHHDSXx~NqA=*#Teu?&5v_GQx z^b+TPq6HBxsA$1N3ocp+(L#zAO0>|Tg%K^RXyHT)FIoiAB8nDCw8)}G5iP1{(L{?b zS`5)*iWW<>*rLS|Ev{(sM2jz40?`tRmPoY3q9qY6sc6YWODzP z6Ro{yexmt{)wJw)p%S})Ohi`GZ9zM}OLt-ojk zL>nmDAkhYkHbk_cq74&mxM(9p8!6f-(MF3lMzpb_jT3FWXcI)6DB2{^CW|&jw5g&^ z6K%R^Genyy+APs#i#A8JxuVSzZN6v=L|Z7@BGDF$wnVh0qAe3`xo9gyTPfNq(N>GL zMzpn}trKm%Xd6V^DB32`HjB1Jw5_6T6K%U_J4D+l+Ah&{i?&C!y`t?CZNF#-L^~+j zA<+(tc0{zJq8$_MxM(LtJ1N>J(N2qYMzph{ofGZ6Xct7gDB2~_E{k?Ww5y_B6YaWa zH$=NB+AYy;i*`q}yQ19_?Y?LaM0+UOBhem<_C&O&qCFGsxo9s$dnwv0(O!%8Mzpu0 zy%X)dXdguTDB35{K8yB6w6CIl6YaZbKScW}+Aq<5i}pt}pWfp9PqZMS1r;rrXu(Ab zAzDb$LWveyv@oKD6)l`-;YEueT13$zi56M3D56CbEt+W2MT;R?OwnSA7F)D9qQw<0 zo@nt!OCVZ8(GrQ4ShOUfB^521XvsxOAzDh&Qi+yYv^1in6)l};=|#&RT1L?_iI!Qk zETUxqSY0xo@n(& zYam)f(He>7D_UdGnuyj^v}U3;7p;Y8Ek$c3T5HkTh}KrLcA~Wx%}+Fc(K?70AX-P! z0!8a2T4&L^h}KoKZlZM;t%qnmMe8M6Z_)aQ)>pKCqV*STfM^3n8zkCb(T0dNRJ38D z4Hs>MXd^`%CE94w#)vjnv~i-17j1%Q6GfXO+GNqDh&ENUX`)RRZH8zwMVlqsY|-Y3 zHdnNHqRkg=foKawTO`_I(Uyp|RJ3KHEf;NtXe&iqCE9Ay)`+%Nv~{Ab7j1)R8%5hB z+Gf$Vh_+R1ZPD(Cc2~4} zqTLtmfoKm!dnDRp(VmF*RJ3QJJs0hTXfH*3CE9Dz-iY>Aw0EMt7wv;+A4U5l+Go+e zi1t;qZ=!t{?T2VTMf)Y%Z_)mU=F>-<|A`hvw4kB|6D_!CAw&x)S}4&%ixx(-u%d+% zExc$EM2jd|B+(*^7DcqEqD2!ex@a*(iz!+x(PE1hN3^)2#S<;QXbD71C|V-X5{s5Z zw4|aX6D_%DDMU*tS}M^}iYiq=lF_M-WT<}X?Y(E>#4C|aOsokZ&_T9>~q zjE|2`Q9u7~<$VJKT6GQR>e#wDw(gFthhyvM*m^m(-j1!0W9#eK`Z>1#j%|Qr8|c^u zIkv%$ZHQwV>ez-kw&9L#gku}&*hV?F(T;75V;k$(#yPg}j%|Wto9NgkIkw4;ZHi-? z>e!|^w&{*-hGU!Q*k(Dl*^X_FW1H*P<~g?cj%|TsTje!Y!w&jj( zg=1Uk*j72V)sAhAV_WOk);YHIj%|Zu+vwOfIkwG?ZHr^u>e#k9w(X8>hhy96*mgO# z-HvUKW83T4_Bpowj_rVBJLuRBIkv-&?TBML>e!Asw&RZNgkwAD*iJdN(~j+oV>|2E z&N;U8j_rbDyXe?1IkwA=?TTZ&>e#M1w(E}VhGVe!w+w&#xRg=2f^*j_od*N*LtV|(k^-Z{4Sj_reE`{>v{IkwM^ z?Tcgk>e#+Hw(pMZhhzKc*nT;--;V8%WAo{2uUY?bY(X4bP{$U`u?2T*Askys#}>-5 zg?4OV99vk&7S6GScWe=6#dd6Q99vw+ z7SFN8cWen9TSCW{$gw4MY)KqjQpc9eu_bqGDI8l$$Ck>mrFLv-99vq)md>%IcWfCP zTSmv0$+2a2Y*`#zR>zjjv1NB`IUHL~$Ck^n<#ud&99v$;md~-}cWeb5TS3QG$gvf6 zY(*SfQO8!yu@!f0B^+Bx$5zU*m3C}p99vn(R?e}NcWf0LTSdoK$+1;-Y*idvRmWD% zu~m0$H5^+_$5zX+)pl%k99vz-R?o53cWezDTSLdz$g%l4w#JUFiDPT(*qS-E=8mm} zV{7TyS~<4Xj;)PjYwOtBIkxtW&CjvU$WDXpG{jCr?KI3z!|gP} zP9yC!%1)#0G{#P2?KI9#+Q6`P8;pC z$xfT?w8c(a?X=BK+wHW&PCM4bjD6+?R3sg=k0XCP8aQT$xfH;bj40r?R3pf*X?w}PB-mz%TBlL zbjMD2?R3vh_wDq+P7m$$$WD*#^u$h2?exq}&+YWWPA~2B%1*EC^u|ta?exx0@9p%# zP9N>`$xff`^u&F(f|KTl)@{d z^a`oGLTay&#w(=t3hBH;dasbdD`fNvnY=<~uaLznWc3Q!yh3)bki#qF^a{DWLT;~+ z$1CLZ3i-T3ey>o#D-`q!g}g#xuTaD*6!i+lyh3rWP{J#e^a`cCLTRs1#w(Qd3gx^) zd9P5xD^&CfmApb_uTaG+RP_qgyh3%aP{S+K^a{1SLT#^5$1BwJ3iZ4~eXr2KD>U>9 zjl6=dS7_`Nns|k#UZI&+XzmqSc!ic;p_Nx??G@U1g|=RyomXh@75uz{zgOts6#~3M zN3Rg*6*_r^&R(I5SLo^$x_O1}UZICq=;;-Dd4=9yp^sPS>lONWh5lY)fL9pk6$W{Q z!CqmAR~YIQhW!np|GkE*<=fT2TM6HefdRwqG{R0J?KH|xqwO@tPGjvf&Q9a)G{H_2 z?KH_wlkGIcPE+kP%}&$pG{a6a?KI0yv+Xp;PIK)v&rb90w7^aa?X<{Fi|w?;PD|~y z%udVgw8Bm+?X=2HtL?PLPHXM7&Q9y?w82gr?X<~Go9(p4PFwA?%}(3xw8Kt2?X=5I zyX~~cPJ8XN&rbX8bihsr?R3aahwXI4PDkx@%udJcbiz(2?R3gcr|opcPG{|O&Q9m; zbiqy+?R3dbm+f@LPFL-8%}&?tbi+&Q9;`^ubOa?exh`pY8O;PG9Zx%}(F#^uta+ z?exn|zwPwLPCot3Ma@5U3Sy_Ab_!;v;C2dOr;v6EWv9@8r(l1tznc05281DnC50n} zCq*DdBt;@cCPg7dB}F4eC&eJeB*h}dCdDDeCB-AfCnX>yBqbsxCM6*yB_$&zC#4{z zB&8yyCZ!>zC8Z;!CuJaIBxNFHCS@UIC1oRJC*>gJB;_LICgmaJCFLXKClw$SBo!hR zCKVwSB^4tT|Eu7Ce~(%;Yu7HI1hXWw6tgt546`h=9J4&L0<$8s60nVp!OnO&G&ncbM(nLU_2nZ203nSGdjnf;jknFE*unS+>vnM0UEnZuaF znIo7ZnWLDanPZq^nd6w_nG={3nUk24nNyfknbVllnKPI(nX{O)nRA$Pne&+QnG2W; znTwcnYWm?nRl3XnfI9YnGcu`nU9!{nNOHcna`NdnJ<_xnXj0ynQxeHneUkI znID)RnV*=SnO~S+nctY-nLn66nZKC7nSYo*1L&gnA7&6{P-ZY@aApW*NMLS`an zVrCL%Qf4w{a%KuJB_r{ zC_9a|(-=FAwbM8|jknVTJ599HBs)#E(-b>RwbL{^O}EnwJI%DyEIZA%)11Fkxc_|} z_J97ruk!D?#$4w*&$-Tbt_z&&Lg%{3xh{6DOPuRc=eo?fE_bdgoa;*Gy2`n(cCKrj z>ssf!&bh94t{a@|M(4W8xo&o@Tb%1w=eo_gZg;Lboa;{Ky34ulcCLGz>t5%&&$;e* zt_Pg!LFanNxgK_|N1W?X=X%V!9(S%Moa;&Fddj(;cCKff>sjY|&bgjs{x1&$-@rt`D5+L+AR)xjuHTPn_#h z=laaKK6kD!oa;;H`pUV!cCK&!u6#f9)=uy2^xjS%?DWx2pX~J6PG9Ww)lT2+^xaNB z{xkjec@97A^vh1a?exb^J_G;vTln99{vSI9u~Sex1+!CdJB6@QNIQkHQ)oMdu~S$( zg|kz5J4LWlL_0;YQ)D|uu~Sq#MYB_MJH@b5OgqK0Q*1lMu~S?-#j{gpQ))YRkTwjJ5{z*6+2b6Q#CtPw^I!})wEMBJJq&R9Xr*vQ$0J? zw^IW@?d>bL=$NPV?+E-%bncw9rnA?6lZUOYF4t?-cAme`>vqw4Ahpw34)n zw3@Vrw3f7vw4Stqw2`!lw3)Ppw3W1tw4Jnrw3D=pw41btw3oDxw4ZcsvOVX#D>uKkD#<`w#uIHTVdFOh;xn6Xxmz?Wm=X%AtUj4fU`_Iq+ zHPUs`4bn~0Ez)h$9nxLWJ<@&B1JXm%Bhq8i6Vg-CGtzU?3(`x{E7EJy8`4|SJJNg7 z2hvBuq7t&YKH_~^~57N)S`tKjB{9^uQ{$ctIqObjj8H5>>8H^d68G;#-8HyR2 z8HO2_8IBpA8G#v*8HpL08HE{@8I2j88G{*<8H*X48HX8{8IKvCnShy)nTVN~nS_~? znT(m7nSz;;nTna3nTDB`nU0yBnSq&+nTeU1nT45^nT?s9nS+^=nTwg5nTMH|nU9&D zS%6uPS%_JfS%g`XS&UhnS%O)TS&CVjS%z7bS&mtrS%F!RS&3PhS%q1ZS&dnpS%X=V zS&LblS%+DdS&vzt*?`%Q*@)@OY|L!JY|3oLY|d=KY{_iJY|U)LY|CuNY|r##`ZGKH zWte|2Y2cIR0vuaM#}??=Iytt^j;)Jh>+0CLIkxVOt%qal>DYQXw%(4dk7MiW*!nrP z{*G;cV;kt$206CDj%|oz8|v7GIkw@BZG>YR>DWd&w$YAljAI+?*v2`w@s4eRW1HyM zCONjrj%|u#o9fu6IkxGJZH8l;>DXpDw%Lwtj$@na*ycI5`HpRYV_WFh7CE-Xj%|r! zTk6=BIkx4FZG~f7>DX2|w$+YpjbmHu*w#6=^^R?WW83K1HaWJDYETw%v|xk7L{G*!DTL{f_N`V>{^B4mq~Nj_rtJJL=euIkw}D?Sx}H>DW#= zw$qO7jAJ|N*v>h&^N#I;W4q|sE;+W#j_rzLyXx4kIkxML?S^B!>DX>Lw%d;Fj$^y) z*zP&D`;P5_V|(b>9yzwhj_rwKd+OMpIkxAH?S*4|>DXR5w%3mBjbnT3*xos|_m1s@ zWBcgXJ~_6}j_r$M`|8-fIkxYP?T2Ig>DYcbw%?BJk7M&0Y_FC7`Ma5yMuRxlpw2az za}DlXLpaxv&NY;C4eeaRIM=YwHJoz||91`cpO;1>kRp;Iks_0#kfM^Jk)o4gkYbWz zkz$kLkm8c!k>ZmQkP?y-krI=Vkdl&;k&=^AkW!LTky4Y=kkXRUkD2n4OB- zsf3+M+NqSCO53T7oyyv&oSn+sse+v<+NqMAD%+`wovPZYnw_fKsfL|u+NqYEYTK!f zo$A`Do}KF3sezpu+NqJ9eC^cOPEG98)K1Op)Z9)j?9|dut?bm=PHpVe)=us0)ZR{h zcJjAV2RjAWsiU0&?bOLmo$b`cPF?NP%}(9z)Wc3a?bORoz3tS;PJQjv&rbdAG{8;+ z?KH?vgY7iLPDAZ9%ud7YG{R0J?KH|xqwO@tPGjvf&Q9a)G{H_2?KH_wlkGIcPE+kP z%}&$pG{a6a?KI0yv+Xp;PIK)v&rb90w7^aa?X<{Fi|w?;PD|~y%udVgw8Bm+?X=2H ztL?PLPHXM7&Q9y?w82gr?X<~Go9(p4PFwA?%}(3xw8Kt2?X=5IyX~~cPJ8XN&rbX8 zbihsr?R3aahwXI4PDkx@%udJcbiz(2?R3gcr|opcPG{|O?(Y=*?|t3>JQeUf^8)iC z^Ahti^9u7S^BVIy^9J)K^A__q^A7Va^B(g)^8xcA^AYng^9l1Q^BMCw^9A!I^A+k&6^DpViGeNiqWd>sg zXNF*gWQJmfW`<#gWrkyhXGUN~WJY2}W=3H~WkzF0XU1U0WX58~X2xO0WyWL1XC`1K zWF}%JW+q`KWhP@LXQp7LWTs-KW~O1LWu{}MXJ%k#WM*P!W@ce#WoBb$XXaq$WaeV# zX69k$W#(h%|4Y~Bo)z#41-(KcuTa=46!8j0y+SdsP~0n&@CqfpLMg9M+AEat3T3@Q zIj>OOD^&0b6}>_wuTa@5RPhQ`y+SpwP~9uk@Cr4(LM^XQ+bh)Z3U$3gJ+DyTD>U#5 z4ZT7mui)zy8heE%UZJU1X!bXFd`@U{(OQVsQnXg0wHB?7Xl+GnCt7>a{6zB?t%GO* zqIDE4P_#~>br!A5-}Zl}L0w7RNZm<2NIgltNWDpYNPS8DNc~9zNCQcONP|g3NJB}( zNW)1ZNFzz3NTW$(NMlLkNaINpNE1nuNRvrZNK;AENYhC({^~z>i=6{zGG{SoGv_ep zGUqYpGZ*~j-&3gnJ`iD{ofg??v7MIKX{nu-*=f0*R@iB!omSatwVl@3X|0{s*=fC< zHrQ#Soi^ENvz@lsX{(*K*=f6-cGzjBop#x2x1ILbX|J93*=fI>4%q3SoetUQu$_+B z>8PEK+3C2QPT1+Bole>5w4Kh_>8zd3+3CEUF4*a!oi5qwvYoEj>8hQs+3C8SZrJIj zoo?Cbww>8_pb+3CKW9@y!jogUfgv7MgS>8YKb+3C5RUfAiSonG1LwVmGB>8+jK z+3CHVKG^Bw-^t@r^OIOKcWQ@EvRV0 zL<=rj2+=}{7D}|xqJGbu9}GdVK_GbJ+>Gc_{}Gc7Y6Gd(i{Gb1w-Gcz*_ zGb=M2GdnW}Gbb|_GdD92GcPkAGe5Hcvmmn&voNy=vnaC|vpBN^vm~<=vox~|vn;b5 zvpll`vm&z+vof;^vnsP1vpTZ|vnI0^vo^C1vo5n9vp%x{vmvt))0f$p*@W4Y*^Jqo z*@D@U*^1ek*@oGc*^b$s>BsbEc3=iDJ2C^AotT}OU6@^&-I(2(J(xY2y_mh3eVBcj z{h0lk1DFGugP4PvL;mvrd#~eA$2QEd4R>rK9NS38Hp;P$c5GuD+gQgo&asVmY!e*Y zM8`JCu}yYtQykk=$2QHeO?PZF9NSFCHp{Wic5HJT+g!)?|MmTjYD z+ho}`TedBhZL4M5X4$q|wjGvjr)Aq^*>+pDJ(g{+W!q=j_FJ|CmhGTrJ7n1oTec&X z?WkotX4#HgwiA}^q-8s0*-l%wGnVbFWjklt&Rez%mhGZtyJXoeTed5f?W$$FX4$S= zwi}l1re(Wj*=}35JC^OPWxHqD?pwA8mhGWsdt})jTec^b?WtvZX4#%wwilM|rDc0% z*5$mzki_Yb)aj7S>5$y%kizMZ z(&>=O>5$s#kjCkd*6EPW>5$&(kiqGY(dm%M>5$p!kj3ec)#;GU>5$#&ki+Sa)9H}Q z>5$v$kjLqe*XfYY>5$*)P{8R>(CJXf=}_3|P{ip_)ag*n=}_G1P{Qd@(&^GI&78WMQx9|MX->V&skb@x zF{i%f)X$vyo6`Vu8fZ>~%xSPW4Kb&o<}}QlhMUs}a~f$*qs(cvIgK%=vF0?+oW`5e z1aq2bPLs@OvN=sLr>W*N&77v2(+qQ(X->1uX|_4dF{ioaG|!yoo6`bwT4+v-%xSSX zEitF1=CsV5mYdTGb6ROmtITP&Iju3LwdS*9+&78KI z(++dmX->P$X}3A;F{i!ew9lOOo6`YvI%rOZ%;~T>9Wke)=5)-Qj+@g7b2@2Gr_AZJ zIh`@5v*vWpoX(rm1#`M+PM6H-vN>Hbr>o|4&77{A(+zXFX->Dy>9#rDF{iucbkCgb zo6`exdT36M%;~W?Ju#=J=Jd>*o}1GPb9!k`ugvMSYx;lRy?kTY-deVImhHV|`(W8V zTDDJ??XzY3V%ff0wr`g0yJh=f*?w9!w;|?#s}PpW-Li$WY#x>^lw}KT*}_=1u$Il! zvUypyaF#8+Ws6|hB3iacmMyYni(=WLTDEAGExKikVcB9@wpf-ewq=WB+2UHZc$O`` zWlLb$5?Z!ImMyVmOJdoQTDD}CExBb&VcAkzwp5lawPj0V+0t6Jbe1i>Wy@gMGFrAw zmMybo%VOEGTDEMKExTpQVcBw8wp^Aiw`I#?+45Spe3mW0Wh-FW3R<>8maVX5D`MG- zTDD@At+-_?VcAMrwo;a@v}G$}*~(hBa+a;UWvgJ>Dq6NmmaVd7t76%zTDEGIt-58a zVcBY0wpy00wq>hh+3H%hdX}xeWouyB8d|nSmaVa6^R{eFEL&5{*37asw`?seTT9E< z%Cfb#Y;7!CTg%qYviVrH_Lj}pviVuI4wlW|vISVSK+D$AvISYTPL?g$vURp>T`XHy z%ht`Zb+>FiEL%^@*2}W>c5R-npJd}5?AN)ZcR-+jA9Lz!PW{ZOzc~#sr-9})$eae7 z(-3nSYEHwxZzBw&0r-kOU$eb3N(-L!9YEH|{X}LMAFsGH~w91@T zo6{O|T5C@0%xS$jZ7`>e=CsM2Hk;EHbJ}W7+stXZIqfi~o#wR5oOYYj9&_4jPW#Mh zzd0Q+r-SBn$ea$F(-CtzYEH+@>9{$aFsGB|bjqAgyC$z+kN5H~+nlj%XD!<~%XZ$f zU9fByE!!o_cGvfZ(4cP-mJ%XZ(gJ+N#KE!!i@ z_W0NK>nC}6@pt%%<$w9qvOTkG&n?>v%l6W;y|QetE!!K*_SUk!vuy7z+Xu_`(XxHA zY@aRL7t8k5vVF5`-!0n@%l6Zme-{&H*jms_*H+?xO8*5WU> zmVddm`pd2LUv6#wa%=mSTf4v9eExE4|CgKZUv7SXxpnx<&HpdA0N3rm9{~|ab|izy zPGm6Ine0M#CA*Q`$sS}+vKQH#>_he?`;q<00pvh(5IL9}LJlQ|k;BOm{0<5xJOLLM|njk;};y z&Xq|MsgFmncPBdCAX2=$sOcQau>Oq+(Ygq_mTU_1LQ&S5P6t9 zLLMcLk;lmsnh_)doePKSg}heS??#ID2drxZc{NywyRGBP=tf=o%KB2&9)d%REMbV%!T zNau7&?{vuEbjav*$mDd$>~zTDbja#-$mVp&?sUlEbjay+$mMj%?R3cFbja&;$mev( z?{p~ObSUU_DCBe~>~tvNbSUa{DCTr1?sO>ObSUX`DCKl0?Q|&PbSUd|DCcx2?{ui( zbg1ZbsN{60>~yH&bg1fdsOEI2?sTZ(bg1ccsO5C1?R2Q)bg1iesONO3?{sM3bZF>w zXykNg>^l5@y!Y|_zq_`;dLfeq?`g06CBxL=Gm0kVDB~?xO zkVna5eN0uimkQK>FWM#4nS(U6t zRwrwaHOX3JZL$tom#jzDCmWCr$wp*j(wl5THYJ;p&B+#IOR^Q&nruV1CEJlcWP8$= z^dmcv{$v0dNOmNH$WCN1*_rG@b|t%!-N_zgPqG);o9sjOCHs;6$pPd*au7L~96}By zhmpg{5#&g66giq4LyjfKk>kk;+2)5#g+OmY@Eo18<=CFhaz$pz#> zauK|+^^@)7x%d_q1YpOMeW7vxLw75SQcL%t>7k?+Y5eN0uim zkQK>FWM#4nS(U6tRwrwaHOX3JZL$tom#jzDCmWCr$wp*j(wl5THYJ;p&B+#IOR^Q& znruV1CEJlcWP8$=^dmcv{$v0dNOmNH$WCN1*_rG@b|t%!-N_zgPqG);o9sjOCHs;6 z$pPd*au7L~96}Byhmpg{5#&g66giq4LyjfKk>kk;+2)5#g+OmY@E zo18<=CFhaz$pz#>auKoeW8OkfF%XWEe6m=}CH# z;mGi01TrESiHuA}A)}Jf$mnDYGA0>|j7`QNIelvouFW-Gt>p@3U!0JLp`9LP%o%A)CcMd^@I9D1E7J>AZRc&1R4qrgN8#Rppnoh zXf!ki8Viks#zPaJiO?iyGBgF63QdEiLo=Y6&@5;+GzXds&4cDc3!sJ2B4{zR1X>C$ zgO)=ppq0=nXf?D3S_`d%)GgN{Qdpp(!k=rnW&It!hH&O;ZVi_j(LGIRyH3SEP)LpPwC&@JdTbO*W% z-GlB!51@z8Bj_>o1bPZRgPub#pqJ1q=r!~PdJDaS-a{XtkI*ORGxP=e3VnmVLqDLO zklQGIZV>`6jT&a6jl^b6jc;c6jzi`lvI>blvb2c zlvR{dlvh+xR8&+_R8~|`R8>?{R9Dnc)Kt_`)K=6{)K%0|)K@f6G*mQFG*);knkbqo znkkwqS}0m7S}9s9+9=v8+9`Y#?G?TXKSc+Hzal^psOYE&Qgl)TD>^H>D7q@TDY`3q zD0(V-DS9jVDEcb;Df%l0CAOfDHbc1D3&UgDV8f%xDft-pHBn* zz1s))`35xiUuj)eS=ZIpb&YjhYhBk_*Y(zQgLU0#T{l_R&DM2`b=_)Rw^`Ti)^&$< z-DzESS=Zgxb&qx3YhCwQ*ZtP@fOS1+T@P8;!`AhPbvcI$=&H z&FPdmoi?X4=5*Ga&Y9DBbGl$o7tQICIbAlVE9P|7oUWPEb#uC5PB+czmO0%vr#t3! z*PQN|(|vP#U``Ls>5(}-Hm4`%^wgZ5nbUJ~dSOm4&FPgny*8&e=JeK_-kH;TbNXOT zAI<5LIej*#FXr^soW7aUcXRq-PCw1bZM5-cJA^s8n^Q=0@-U}R<`mkT!kAN7bMiDN zFLMfKPT|cdf;mMrr%2`$*_@)7Q&e+`W=_$~DTX=4G^beR6x*EQm{VMHif2yo%_)I7 zB{Zi*=9Ji+l9*Fcb4q4T$;~N+Ii)nGROXc0oYI(6T60QgPU+1lgE?h1r%dLQ*)_Rd z(i80Z2j2X%kXgxWWOgzKnUl;#<|gxydC7caezE{rkSs(NCX0|o$zo)2vIJR@EJc=awj^7Tt;sfITe2PLL$)VcpIksLBo~p3$tC1cav8atTtTiRSGnkVJzH=y-$4J>&>Cnh zv<_MiZGbjHo1o3m7HBK94cZRvfObN=pxw|OXfL!6+7BIo4nl{Z!_X1vD0B=u4xNBb zLZ_h9&>83~bPhTXU4Sk^m!Qkg73eB-4Z059fNny!pxe+L=q_{*x(_{o9zu_x$Iuh# zDfG;xf4+`w+1$s+|2g@Bd`Z3{Uz2ahx8ytWJ^6wBNPZ$elV8ZMSY*WJEF&8JUbiMkS+>(a9KOOfnW3n~X!oCF7Ct z$pmCVG7*`WOhP6llaa~E6l6*=6`7h$L#8Fuk?F|{WJWR*nVHN&W+k(c*~uJaPBIsn zo6JMzCG(N_$pU0SvJhFAEJ79~i;>025@boT6j_=qLzX4Wk>$w>WJR(PS(&UtRwb*E z)yW!UO|lkQo2*0DCF_y($p&OYvJu&s^d_5-P040tbFu~5l59n`Cfks0$#$d<*`D+z z{m2faKN&yl1-X)3MXn~-kZZ|xHyl*A9k=w}~ zi{vHp zGI@o(N?s$clQ+nl%5N`521lRwCxq}y0|1{~xcf^;WCk{)CzGBg>63`=^FUSv2jJQ;zE zNJb(flTpa1WHd558H0>T#v)^ramcu2JTg9+fJ{gxA`_EI$fRU4GC7%oOi899QbNM<54lUc~DWHvH8nS;zp<|1>GdC0tEJ~BU9fGkKBA`6p6$f9I1vN&0S zEJ>CkOOs{DvSc~3JXwLPNLC^%lU2y7WHqunS%a)e)*@?@b;!D8J+eO8fNV%MA{&$5 zWD~L}*^F#Xwjf)Qt;p778?r6gj`ShhlfI-M*@5&Y1IR$KBN;??B7@1!WEZk4*^TT@ z_8@zby~y5VAF?mmkL*tlAP16z$id_gaws{B98QiPN0Ot+(c~C%EIE!GPfj2wl9R~E z

r$IgOl7&LC%!v&h-x92bAThik54o?^aYfnuRzkz%o8iDIc@nPRzOg<_>*m14DG zjbg20onpOWgJPp%lVY=Ci(;!{n_|0ShhnEau9iQ=i^nc}(Ph2o{+mEyJHjpD5f|Ggmb9r>R8Kz<}Yk)O#gTB8G(#QMj|7VQOKxdG%`9FgN#YWB4d+r z$hc%YGCrArOh_go6O&2Eq+~KOIhle?Nv0xGlWEAbWI8fEnSsnmW+F3_S;(wpHZnVz zgUm_hB6E{@$h>4eGCx^>EJzk23zJ32qGYi@{PlOvLH@$w> zWJR))i+^5pRvD@SRfVcS)u9?tO{f-B8>$1 z5^4pthT1@Fp>~iD)E@GM{GbkyKNR57?-xl0`3I66$sn>58BBI2yO3SUZe(||2ieob zKOd{Tpx#g)s4vtH>JJTo210|N!O#$BC^QTj4vm0DLZhJ3&=_beG!7aMO@Jmslc34a z6lf|m4Vn(kfM!CopxMwIXs%1YKURJG=aKWt1>{0<5xJOLLM|njk;};y&Xq|MsgFmncPBdCAX2=$sOcQau>Oq+(Ygq_mTU_1LQ&S5P6t9LLMcLk;lms zw~z9rw0@5vA3NAeT-nfyY2CBKp1$sgoT(rr8*azl{rWJuD3 z3`K?}!;oP~PtuDFM}{XOkP*p9WMnc58I_DiMkiyCG09kDY#0Ch4C6RZTqqtCA4&iv zgc3oCp(IdJC>fL-N&%&WQbDPqG*DV79h4r*0A++SL7AZ}P*x}#lpV?e<%DuUxuHBz zUML@wA1VM9gbG20p(0RGs2EfnDgl**N7HS9iKSH{7wQM~hXz0cp+V4KXb3bE8U_uA zMnEH>QP5~;3^W!R2aSg&Kog-!&}3)|G!>c#O^0ScGoe|~Y-kQN7n%pnhZaB!p+zqJ ze(DZn&?T4t`*z!9 z@(OvCyhdIpZ;&_1TjXu>4tbZnN8TqNkPpd6B!e~>>(w+Z;T3PHM)AxRH16d9TfLxv?iNiQ-S8J>(lMkFJV zk;y1zR5BVFos2=oBx8}W$v9+OG9DS9Oh6_i6OoC@BxF)D8JV0+L8c^Ak*UcvWLh#E znV!r*W+XF_naM0RBHiXxJGq10N$w(dlY7X$r{B2SZN$g|`*@;rHgyhvUmFOyfutK>EEI(dV9ILIF{h{I^vs-|o6`$(dTCCt%;~i`y)mb^=Jd{--kZ}0bNXmbpUmmAIejsw zujcg4oW7gW4|DoyPHq#8kBJcG_YoV?5_oH>PeO~$`g z5W%`ew62k?Yh>#h#kxkduFl(+p# z#yAm_7)k;qg_1$Zp%hR`C>4|%N&}^Z(n0B=3{XZW6Ox?p@^FwMG7 zx2`j+>rCr9%ev0Cu5+yGT$=FgF1D^qtm{(iy3D#Rx2`L!>q_go z%DS$$u4}C8TI;&bx~{jb8?5U_>$=IhZg#D|KT-PlZy~pm+sN(Y4ss{Ci`-4_A@`E| z$o=F2@*sJLJWL)TkCMm8(1dK^-7}C;$qCIzmBECny-|40VCJLfxS5P!Fgl)C=kj^?~|A z{h&aQ{l%T;w9A}!o6{b1+G|ex%xS+l9WbYZ=5)xM4x7^vb2@5H$IR)tIh`=4 zljd~FoKBn58FM;oPUp<&yg6Mkr;Fxv$($~m(-m{NYEIY8>AE@HFsGa5bjzG>o6{Y0 zx@%7N%;~;4Jus(-=Jd#%9-Gq>b9!n{&&=t$IlVBam*(`!oL-yL8*_SVPVda=y*Yhw zO~1b<^HK3h@mcXj@m29n@m=vl@l)Y8S^gh{P`E2XDm)aS6rmMi6k!#f3NJ-CMR-L7 zMMOm;MPx-3MN~yJMRY|BMNCC3MQlYJMO;NZMSMj9MM6a)MPfw~MN&mFMRG+7MM^~~ zMQTMFMOsBVMS4XBMMgy?MP@}7MOH;NMRr9FMNUO7MQ%kNMP5ZdMSevAML|U&MPWq| zMNvgDMR7$5MM*^|MQKGDMOj5TMR`R9MMXs=MP)@5MO8&LMRi3DMNLI5MQueLMO{Tb zMSVpBMMFg+MPr4xqKTrZqM4$(qJ^TRqLrexqK%@hqMgD=(O%)J@Kbb9_$vYwfr^fb zAVnudu%fe~i=wNdo1(j-hoYyVm!h|#kD{-lpQ68FfMTFxkYcc6h+?Q>m}0nMgkq#( zlw!1EjAE=}oMOCUf?}d#k_&(S25~Yp1)2&?gQi0>pqbDtXf`wlnhVW?=0gjhh0r2s zF|-6)3N3?{Lo1+_&?;y(v<6xWt%KG>8=#HQCTKIX1=j(UiUGxh zVnMN?I8a+Pa{e!W9*9SRMDhC?HukZn&?V?HbOpK!U4yPeH=vu)E$B9M2f7R0gYH8Qpoh>S=rQyJdI~** zo*{4)!&%qx)-{54jc8pXS=Y$cHHvkOYF(pQ*XY(YhINf;U1M3-*w!_U zb&cy<|9fjhJTg9+fJ{gxA`_EI$fRU4GC7%oOi899QbNM<54lUc~D zWHvH8nS;zp<|1>GdC0tEJ~BU9fGkKBA`6p6$f9I1vN&0SEJ>CkOOs{DvSc~3JXwLP zNLC^%lU2y7WHqunS%a)e)*@?@b;!D8J+eO8fNV%MA{&$5WD~L}*^F#Xwjf)Qt;p77 z8?r6gj`ShhlfI-M*@5&Y1IR$KBN;??B7@1!WEZk4*^TT@_8@zby~y5VAF?mmkL*tl zAP16z$id_gaws{B98QiPN4m(DPL49C(dIP9oW`2dICC0rP7};&qB%`6r^)6t#hj*^ z(=>CMZca1IX{I^NGN;+*G{>Cgn$tXUnr}`E%xR%HEi$LY=Cs6|mYUNtb6RdrE6i!7 zIju6M)#kLuoYtDtI&)fYP8-Z=qd9Fdr_JWH#hkX9(>8P3ZcaPQX{R~uGN;|5@5JHm57*bk&@$nbUQ1x?xT?&FPjo-8QE?=5*Jb?wQklb9!J-56$V3IXyO~ zC+76joSvD}b8~uOPA|>rl{vjOr#I&G)|}p%(|dFJU``*+>61BqHm5J<^wpfcnbUW3 z`e9B#&B<+=@kca-Ik}rtNOSTqr%>h;+ML3;rr(cGKK@}zPtuDFM}{XOkP*p9WMnc5 z8I_DiMkiyCG09kDY%&fRmyAcoClinf$wXvgG6|WKOhzUrQ;;diRAg#04VjiqN2Vt; zkQvELWM(o8nU%~&W+!uyImujPZZZ#eN0uimkQK>FWM#4nS(U6tRwrwaHOX3JZL$tom#jzDCmWCr$wp*j(wl5THYJ;p z&B+#IOR^Q&nruV1CEJlcWP8$=^dmcv{$v0dNOmNH$WCN1*_rG@b|t%!-N_zgPqG); zo9sjOCHs;6$pPd*au7L~96}Byhmpg{5#&g66giq4LyjfKk>kk;+2 z)5#g+OmY@Eo18<=CFhaz$pz#>auK|+^^@)7x%d_q1YpOMeW7vxLw z75SQcL%t>7k?+Y5jjT@AAZwDf$l7EbvMyPVtWP!| z8g$d+U)vNhR;Y)iHyeaQBtFX=~iApOYzGLY;@29ce}V6rpW zh3rapBfFD5$ev^`vNzd>>`V3|`;!C6f#e`^Fgb)AN)983lOxEHlP}1ZBfpbB$e*O!3_kxQ-N}%o z2N{YCO@<-ElAfd&8IBB3Mj#`Sk;uqo6f!Cqjf_slAY+oT$k=2YGAjjT@AAZwDf z$l7EbvMyPVtWP!|8g$d+U)vNhR;Y)iHyeaQBtFX=~i_(l9X zwEnyYkbz`JGKlO%29ur1E@W4-8`+)gLG~njk-f=2WM8r$*`FLh4kQPWgUKP}P;wYK zoE$-pBu9~>$uZ1)#0rEf^zl)fc>Tl$XlUFmz$_oW|5Ka_qX{aE^m^i%0)($A$|NWYYRCH-3Z zjr3dTchc{rKS+O+{v`cb`it~e>2K2CrGH5Oly;l>-^XwWX?N+6(jL;Gq(e)Gkq#^E zDeWa4PCC4F1nG#6Fr`q*F_$kxna}PCC7G2I-8_nWQsIXOYe-olQEsbPnm9(z&E_OXrc! zE1gd|zjOiVg3^Vg3riP~E-GD2y0~-+SG#_QPPVMSJ_D45N z7HS9iKSH{7wQM~hXz0c zp+V4KXb3bE8U_uAMnEH>QP5~;3^W!R2aSg&Kog-!&}3)|G!>c#O^0ScGoe|~Y-kQN z7n%pnhZaB!p+(SQXbH3wS_UnLRzNGERnTf^4YU?o2d#%TKpUY=&}L{0v=!P0ZHIP1 zJE2|BZfFm*7upBycj@=@AL)bAholcnACW#PeN6he^a<&c(x;?POP`TGD}7G-yz~X> zi_({*FH2vMzAAl9`nvQD>6_BGq;E^#k-jT^Px`*}1L=p-kE9<7UYWv;O-S4k7I>9a7puI+S#1 z=`hk^r9Gv+q{B&vmyY1-KVM%)gd#zap(s#PC>j(UiUGxhVnMN?I8axX=$^qqsazVMFJWyUJ zACw;|02PD^L4~0rP*JEDR2(V+m4r$`rJ*uVS*RRT9;yIUgepOmp(;>Ss2Wrqs^QYV zpHFK_*OIO+T}QgEbUo?%(hZ~=N;i^jEbT4bM7pVTGwJ58{_}X%0%{4hf?7juptevu z$Omc<`9gkB2gn}^aOvOQgFxwy(m~Rlq=Th9OLvj(D&0-GyL1ofp3=RfdrS9`?kn9- zy1(=Q>4DONqz6k6ksc~NOnSKV24mQT^Jiuev=~|fErpgr%b^v}N@x|d8d?Lbh1Nmq zp$*VRXcM#<+5&Bbwn5vW9nel_7qlDN1MP+OLHnTt&_U=BbQn4U9fgiT$DtF@N$3=G z8ae}=h0a0ep$pJOm;QY`yd-^D`ik^b>1)#0rEf^zl)fc>Tl$XlUFmz$_oW|5Ka_qX z{aE^m^i%0)($A$|NWYYRCH-3Zjr3dTchc{rKS+O+{v`cb`it~e>2K2CrGH5Oly;l_ z-|uw@X?N+6(jL;Gq(e)Gkq#^EDeWa4PCC4F1nG#R>3Y)jr5i{$lx`&5SlU~2}gS((R>vrTwHkNc&3%NC!%HboHOdxge+$6byBSxf+UHbRW!Z_*i(i5a7 zN>7rWEImbfs`NDJ>C!W#XG+hKo-I8`dam?5>G{$Nq!&srl3pymM0%<8GU?^gE2LLS zuaaIZy+(Si^g8MF(i@~VN^g?hEWJf~tMoSM?b17>cS`S)-YvaHdav|8>HX3Nqz_6T zl0GbbMEa=oG3n#dC!|kGpOQW;eMb7M^f~GC(ifyJN?($`EPX}#s`NGK>(V!*Z%W^i zzAb%6`mXdn>HE?Tq#sH@l71}xMEa@pGwJ8jFQi{ezmk3}{YLt&^gHSI(jTNhN`I36 zEd53LtMoVN@6tb{e@eT}`R}nYgtWVKNNEr0P|~5L!$^m9^`Fm=o{$$54hj!NfFeSX zpvX`ZC@K^UiVnqqVnVT?*ialOE))-n4<&#SLW!WnP!cF9lnhD^rGQdGsi4$Q8YnH4 z4oVMYfHFdvpv+JfC@Yi=$`0j#azeSF+)y4UFO(0;4;6q4LWQ8hP!XsoR17K(m4He@ zrJ&MK8K^8&4k`~-fGR?jpvq7cs47$qst(nFYC^T3+EATe`u88``{%WVb#bi+)rT5D z4WUL*W5^q70yTx2LCv8SP)n#4)Ea67wT0S2K2Uqe7xIHTK>knw6bN;Mf}l=NFw`09 z0(FJDLET+)cl+l(1wEvDO81iPE!{`DuXI1@{?Y@a2TBi;9xOdXdZ_d;>EW*a^LcUv zG!hyGjfTcRW1(@-cxVDN5t;-|hNeJMp=r=`m)!sN`!hp&rt~c7+0t{Q=St6$o-e&X zdZF|p>BZ7Zq?bxBlU^>pLVBh2D(ThIYoymouajOcy+L}T^d{-e(p#jrN^g_iF1sI#fK6=386$# zVkile6iNmqhf+W(p;S<6C=HYrN(ZHfGC&!jOi*Sh3zQYg24#nGKsljYP;Mv>lo!ee z<%bHm}g3^Vg3riP~E-GD2y0~-+>5|f=q)SVeakcB?H)r-=|BuQ-<)HFV z1*jra391ZLfvQ5)pz2T!s3uekstwhF>O%FP`cMO?A=C(J40%IMpr%kWs5#UEY6-Q1 zT0?E1wop6B2Wk)bLVi#O$R7%T0-=sj5Y!0@hB`xCpsr9is5{gH>IwCNdP9AnzED4? zKQsUu2n~V;LqnjU&@gB?Gy)n4je2=cU zr8h`#l-?x0S$d1~R_Sfh+og9%@08vpy<2*Z^j_(G()*2uQOr7uWdl)fZ=S^A3fRq1Qe*QIYr-;};3eOvmD^j+zD()Xnw zNI#T*B>h37oar9Vi2l>Q|BS^A6gSLtuk-=%*@ z|CDx{_uu1K2x)iekkTH~p`=4ghmj5|?J4ag9ZovDbOh;$(vhSiOGlB8DjiKax^xWb zn9{MNV@t=8j{CcReXSW!I=*xQ>4egWq!UXgkxnX|Ogg!A3h9)t_W0-X(XY=)si4$Q z8YnH44oVMYfHFdvpv+JfC@Yi=$`0j#azeSF+)y4UFO(0;4;6q4LWQ8hP!XsoR17K( zm4He@rJ&MK8K^8&4k`~-fGR?jpvq7cs47$qst(nFYC^T3+E5*+E>sVy4>f=qLXDut zkT=u>Y6>-jnnNw1mQX9GHPi-b3$=rMp!Se2I!v( zx(0qGz1z74TFY5BcPGcC}=b^1{w>EgT_M>po!2V zXfiYfnhH&Wrb9EJnb0g~HZ%vC3(bS(LkpmV&?0Ctv;rN2pk zm;NFBQ`&9*e}5K2NV`jil=hGgB^_EijC5FOPiZgdaMIzWBS=S-jwBsfI*N2u>1fi? zrDI6Pl#V4GTRM((T1@*3rE^H<&`K1d;7nCj}U0Aw^bW!PI(#54q zNSBl@C0$y&jC5J)a?<6cD@a$At|VPqx{7pF>1xu|rE5snl&&RRTe^;PUFmwR_W1q1 zs`^j^s3Ft{Y7BWpO`xVwGpIS#0%{4hf?7juptevu$Omc<`9gkB2gn}^fC8b8P!QA! z3WhpEU7)T|H>f+*1L_I&f_g)JpuSK)s6R9S8VC)7217%jq0lgBI5Ywp35|kALt~(^ z&^Ty3Gy$3jO@byvQ=qBPG-x_B1DXlVf@VW=pt;aIXg;(6S_mzI7DG#*rO+~HIkW;= z39W)wLu;V5&^l;6v;o=(ZGtvKTcEAbHfTGv1KJ7gf_6iDpuNyOXg_oSItU$t4ns$v zqtG$vICKI!`CI?ImiiQQ8ae}=h0a0ep$pJO=n`}px&mE=u0hwK8_-SY7IYiB1Kox0 zLHD5t&_n1E^cZ>qJ%ye@&!HF4OXwB!8hQi0h2BB$p%2hU=o9oA`T~80zCquiAJ9+8 zZGnEzA_U|Pg@inyP*7+n3=|gfguI||P_oEd~{bN z2bG5^Koy}%P-UnJR28ZQRflRoHKAHiZKw`Z7pe!(PpB8v8|nk~ zh5AALp#ji9Xb?0Q8UhW4hC#!j5zt6z6f_zd1C52oLF1tb&_rkwG#Q!#O@*dG)1evA zOlTG~8=3>nh2}x?p#{)FXc4p+S^_PFmO;y*70^m(6|@>!1Fd!G-|rBulU^^qL3*R~ zCh5)6Tco#2Z08pbrSC}JmA)r^U;2Uc zL+MA-kAL^CS3y0Iek%P;`nmKA>6g;4q+h$*^*6WwJi~qiy@lRE@1YOSN9Ysu8TtZ! zg}y=Gp&!ss$Zetiib6o{P)NuF3I&CR!a!jmPsj@j2Ze_sKoOxxP-G|y6cvgFMTcTQ zF`-ycY$y&C7m5ePhY~;up+rz(C<&AlN(Lo|Qa~x8R8VRt4U`s22c?HHKpCM-P-Z9# zloiSbWruP=IiXxoZYU3w7s?0ahYCOip+Zn$s0dW_xBhuXRtzc*m4He@rJ&MK8K^8& z4k`~-fGWE5?|Z>2NmrJxB3)IwnsjyP8qzhTYf0CZt|MJnx}J1>=?2mbr5i~%miCrz zBHdKFnRIjM7Sb)HTS>Q;ZX?}Rx}CI-bbD!EX+P->(*Du`(t*+)rGun9Ne4@JmhK|m zRl1vWcj+F|J*9g|_m=J>-B-Gwbbsjq(gURjNe`ADB0W@knDlVz5z-^2M@f&C9wR+g zdYtrl=?T&kr6);GmYyO#ReGBAbm*NiUXOBE3|4 zne=k$71ArES4ppyUL(C$dY$xo=?&5wr8h}$mfj-0ReGEBcIh3`JEeC?@0Q*py;pjl z^nU3B(g&pvNgtLzB7Ic)nDlY!6VfN8Pf4GaJ|lfr`keH6SO57+`vPJ`3KzE^g(0%9u^bmRkJ%*k@PoZbfbLa*15_$!_hTcGLp?A=G=mYc- z`UHK3zCd50Z_sz>2lNwiTcjUZAs}}sB;)~wfganv z=Y0F;xo((|aTgEhvbssROL<6nN_k0nOZi9@lPWG%LaL-xDXG#@Wu(eVm6IwjRY9tv zR3)j(QdOj?N>!8cm8vdPL&{I8rc^Dd+ER6->Ppp<@|UVF)j%phs-aXPsm4-Gq?$@K zlWHy%DAhtLNUEh&uvCats8lPdFk6{_XKW3(feknuj)2?3k#IY>J=_8A2zP=la1XLU<9p7+wM|g_pt0 z;T7;ocon=FUIVX%*TL)I4e&;I6TBJT0&j)4!Q0^-@J@Iayc^yF?}hil`{4udLHH1S z7(N0Yg^$6<;S=yl_!N8^J_DbH&%x*63-Cqw5_}oH0$+u%!Pns%@J;v@d>g(4--YkN z_u&WdL--N=7=8jjg`dIC;TQ1BzqURr^a_3rzk%Pv@8I|F2lylW3H}U!fxp7v;P3Dc z_$T}ewp(I;4YmHh?BRHDd^iD|5O#nQ!HMA{up^ulP6j83Q@|+g7rYzZ1Mh|R!TaF@@Im+xd>B3gABB&>$Kez3N%$0e8a@M`h0np~;S2CZ_!4{> zz5-u`uff;h8}Lo|7JM7N1K)-3!S~?@@I&|!{1|=$KZT#c&*2yFOZXN18h!)6h2O#N z;Scaf_!ImY{sMo6zro+(AMj837i_omzkfrshvUKV;RJ9(*a1!iCx(;2j&M>q8JrwW z0jGph!KvXia9TJWoF2{qXM{7snc*yORyZ4+9nJygwE5p_x1FSNN#&NxBb8SwpHzOS z0#eRWF4p2}HVVpn3rQ80Dk9}7RaDAN%3aDs%2Ucq%3I1us+d%9sS;8prAkSamMSAv zR;rv-d8rCg6{RXkRhFtERaL5*l&@5EsTxv#QZ=P&N!6CBBUM+bo|L~-eW?ag0a6X6 z8c8*lY9iHCs+m-CsX(a~QbAHJrGlkGq(Y@yNrg$ZmTDtqNQFy9NVS!Ulxio{UaEss zN2yLy7O5zyXsH;f&Qe{Zx=MAE>Mqqos;5+}R4=LCQhlWQO7)ZKFEv1Fpwu9#!BRt{ z;-rR34U-x!H9~5n)F`RZQe&jXN{y2mFEv4GqSPd*$x>6Krb*b z!)M^L@HzNAd;z`)UxF{gSKzDgHTXJw1HK90f^Wlj;JffW_&)pqeh5E;AHz@Jr|>iQ zIs5{C3BQ70!*AfX@H_ZD`~m(5e}X^5U*NCsH~2gJ1O5sBg6)?5_c?D5$Aja;3E+gV z1Dps>3@3pd;iPafI60gGP6?-iQ^RTCv~W5&J)8l~2xo#b!&%_0a5gwQoCD4YJHff& z+;AQ^FPsm~4;O%)VHda{TnH`<7lB>jqOcq64tv0!uovtN`@qHE;&2JLBwPwE4VQt- z!sX!da0R#`TnVlWSAna-)nH$^I$Q(xgKNUI;M#B0UV3^#$B z!p-33a3I_Q4uV_4!Egv13b%s8;MQ;(*nq>~2)HdA3AcmW|F!j*w+?VexD#xFqu^*b z2JUS0zn}Q-BGpx@n^bqH9#TD}Vx@XX^_J=*)mN&YRDY=fQUj$1Nez}7A{8e!RBD*i zaH$beBc(=3jg}fCHCAez)Oe{0QWK>nNlliTA~jWNn$&cu8B#N)W=YMKnjpTf`J=kN>oCHxA04Znfk!tdbs z@CW!K{0aUHe}TWk-{9}?5BMki3$|PS-=AxHI364yP5>u_9pFT8VmJxx2q%S;!O7ti za7s88oElC8r-jqO>ER4;MmQ6k8O{P{g|org;T&*I*a^-B=Z5pZdEtC;ez*Ya47xCrbD7lqwmci02=guP&I*at2K7l%v0CE-$VX}Anr7A^;uhbzDp;Yx62xC&eq zt_J(U)!`bjA6yfz1=oh_z;)qzus>WMZU6_s4dF&`W4H<26mAALhXdgja1h)Q4u(VE zP`DKw2DgUWzy=%+N5F02NVpx`9_|2lgge0&I0}x2W8ltk7q~0j4ek#2fP2ERa4)zw z+z0Lp_k;Vx1K@%1Ab2o51dfA;!o%R<@CbM$JPIBSkAcU+)`e926!X9 z3Em8Efw#ij;O+1ZcqhCI-VN`8_rm+&{qO{X;Op=W_$GV{z75}j@51-s`|tz!A^Zq_3_pRN!q4F6@C*1Q z{0e>zzk%Pv@8I|F2lylW3H}U!fxp7v;P3Dc_$T}ewp)Sk|KWIWd^iD|5O#nQ!HMA{ zup^ulP6j83Q@|%snTeYgP}05^mi!HwZ2a8tM$+#C*s zTfjkZOE?$~fkWX|a2VVgZUY-|I2-}Dg(Km1aC^7|+!5{sTi_@-8jgWG!(HI6a5uO+ z+ym|j$HKkf-f$ndFWe9A4-bF`!h_(!@DMl-9tsbGhr=V_k?<&ZG&}|#3y*`x!xP|% z@FaLLJO!Q#PlKn!GvJxpTjTUm+&k2HT(vC3%`Tk!yn*}@F(~){006Be}linKj5G6FW7D+zW;~g!SUe) za6;GtP6Q{0lfaH}QaBl$98LkJgj2z(t+u`b!F-l74c?O$P6wxlGr$?)OmJp63!D|s z24{zJz&T+jI2W87&I9L#^TGMy0uq#{?c7xqv57-m-g1uoMxENd< zE&-Q>OTne#GH_YA99$l*09S-7!Ij}Ea8Kta7#EC4uM1AR&W^H8g2s{a5x+Rw}m6E4vP*63$(Pe z%GpR}B#*VU|IL&}DkHU@-M|3*kiq_z4z{=X+55Q+G}6TdS~~tdB)5^r$ZO;?@*4#V zXT!xPXcRIE8$}FPqp0C#xEmgZr{QJz7{!d@MhT;&QOYQ7lrhR0<&5%11*4)-$*62p zF{&EX3}2(VQNyTdy%eKX?U+ISmQL0W@aJkQ|L1C=%&W2gb2ZVngS~_QogDM;Su@bm+jc;#rO)r<61Qt(39`O7 z%F@?%Ebrxhwgdj$%l>f4u7|mv=6adyV{UXvfJ5it$QbJ*)Zccm2gJl$2H5WH2-|}Y8Wa=~6CN2I z-aaVEF)A`VG|bU5(E4`2oPJJz_SR4TLDq=)ePSc)iG%;0D39I{+ncSAp7j&UKQ6#9 zKeCt+Yj8?ZVCU4orAfF%uQ@=5_28RO=_;UxzQniuB>rPSWHxO JNKA<9{{Vv3C3^q> diff --git a/scripts/database/database.py b/scripts/database/database.py index 66169121..758fdbab 100644 --- a/scripts/database/database.py +++ b/scripts/database/database.py @@ -15,10 +15,17 @@ import os.path import glob import re import json +try: + from urllib.request import urlopen # Python 3 +except ImportError: + from urllib2 import urlopen # Python 2 # Additional modules import pandas as pd +# Server storing a copy of the database +DATABASE_SERVER_URL = "http://www.cedricnugteren.nl/tuning/clblast.db" + # Constants VENDOR_DEFAULT = "default" DEVICETYPE_DEFAULT = "All" @@ -38,6 +45,15 @@ pd.set_option('display.width', 1000) # Database operations # ================================================================================================== +# Downloads the database and save it to disk +def DownloadDatabase(filename): + sys.stdout.write("## Downloading database from '"+DATABASE_SERVER_URL+"'...") + df = urlopen(DATABASE_SERVER_URL) + output = open(file_db,'wb') + output.write(df.read()) + output.close() + print("done") + # Loads the database from disk def LoadDatabase(filename): return pd.read_pickle(filename) @@ -221,9 +237,13 @@ if len(glob.glob(glob_json)) < 1: # The main body of the script # ================================================================================================== -# Loads the database if it exists. If not, a new database is initialized +# Downloads the database if a local copy is not present db_exists = os.path.isfile(file_db) -database = LoadDatabase(file_db) if db_exists else pd.DataFrame() +if not db_exists: + DownloadDatabase(file_db) + +# Loads the database from disk +database = LoadDatabase(file_db) # Loops over all JSON files in the supplied folder for file_json in glob.glob(glob_json): From 00be6f7530a16eb17f367b45d498e1e483b61b8d Mon Sep 17 00:00:00 2001 From: Cedric Nugteren Date: Sun, 7 Feb 2016 11:59:30 +0100 Subject: [PATCH 23/50] Added dictionary with short and long OpenCL vendor names to fix issues with Intel having multiple names --- include/internal/database.h | 11 +++++++++++ scripts/database/database.py | 28 ++++++++++++++++++++++++---- src/database.cc | 14 +++++++++++--- 3 files changed, 46 insertions(+), 7 deletions(-) diff --git a/include/internal/database.h b/include/internal/database.h index 9107f978..08e449fa 100644 --- a/include/internal/database.h +++ b/include/internal/database.h @@ -57,6 +57,17 @@ class Database { // The OpenCL device vendors static constexpr auto kDeviceVendorAll = "default"; + static constexpr auto kDeviceVendorIntel = "Intel"; + static constexpr auto kDeviceVendorAMD = "AMD"; + static constexpr auto kDeviceVendorNVIDIA = "NVIDIA"; + + // Alternative names for the above vendors + const std::unordered_map kVendorNames { + {"Intel(R) Corporation", kDeviceVendorIntel}, + {"GenuineIntel", kDeviceVendorIntel}, + {"Advanced Micro Devices, Inc.", kDeviceVendorAMD}, + {"NVIDIA Corporation", kDeviceVendorNVIDIA}, + }; // The database consists of separate database entries, stored together in a vector static const DatabaseEntry XaxpySingle, XaxpyDouble, XaxpyComplexSingle, XaxpyComplexDouble; diff --git a/scripts/database/database.py b/scripts/database/database.py index 758fdbab..89fe8286 100644 --- a/scripts/database/database.py +++ b/scripts/database/database.py @@ -38,6 +38,14 @@ KERNEL_ATTRIBUTES = ["precision", "kernel_family", "arg_m", "arg_n", "arg_k", "arg_alpha", "arg_beta"] ATTRIBUTES = DEVICE_ATTRIBUTES + DEVICETYPE_ATTRIBUTES + KERNEL_ATTRIBUTES +# OpenCL vendor names and their short name +VENDOR_NAMES = { "device_vendor": { + "GenuineIntel": "Intel", + "Intel(R) Corporation": "Intel", + "Advanced Micro Devices, Inc.": "AMD", + "NVIDIA Corporation": "NVIDIA", +}} + # Pandas options pd.set_option('display.width', 1000) @@ -91,6 +99,11 @@ def RemoveEntriesByDevice(df, devicename): def GetEntriesByField(df, field, value): return df[df[field] == value] +# Fixes the problem that some vendors use multiple different names +def SanitizeVendorNames(df): + df = df.replace(VENDOR_NAMES) + return df + # Retrieves the results with the lowest execution times def GetBestResults(df): dfbest = pd.DataFrame() @@ -175,7 +188,7 @@ def GetPrecision(family, precision): def GetDeviceVendor(vendor, devtype): if vendor == VENDOR_DEFAULT and devtype == DEVICETYPE_DEFAULT: return(" { // Default\n kDeviceType%s, \"%s\", {\n" % (devtype, vendor)) - return(" { // %s %ss\n kDeviceType%s, \"%s\", {\n" % (vendor, devtype, devtype, vendor)) + return(" { // %s %ss\n kDeviceType%s, \"%s\", {\n" % (vendor, devtype, devtype[0].upper() + devtype[1:], vendor)) # Prints the data to a C++ database def PrintData(df, outputdir): @@ -243,6 +256,7 @@ if not db_exists: DownloadDatabase(file_db) # Loads the database from disk +print("## Loading the database from disk...") database = LoadDatabase(file_db) # Loops over all JSON files in the supplied folder @@ -259,10 +273,14 @@ for file_json in glob.glob(glob_json): new_size = len(database.index) print("with "+str(new_size-old_size)+" new items") -# Stores the new database back to disk -SaveDatabase(database, file_db) + database = SanitizeVendorNames(database) + + # Stores the modified database back to disk + print("## Storing the database to disk...") + SaveDatabase(database, file_db) # Retrieves the best performing results +print("## Calculting the best results per device/kernel...") bests = GetBestResults(database) # Determines the defaults for other vendors and per vendor @@ -271,7 +289,9 @@ bests = ConcatenateData(bests, defaults) # Outputs the data as a C++ database path_cpp_database = os.path.join(path_clblast, "include", "internal", "database") -print("## Producing a C++ database in '"+path_cpp_database+"'") +print("## Producing a C++ database in '"+path_cpp_database+"'...") PrintData(bests, path_cpp_database) +print("## All done") + # ================================================================================================== diff --git a/src/database.cc b/src/database.cc index 7f5ac6eb..ba0a56d9 100644 --- a/src/database.cc +++ b/src/database.cc @@ -77,15 +77,23 @@ Database::Parameters Database::Search(const std::string &this_kernel, const std::string &this_vendor, const std::string &this_device, const Precision this_precision) const { - for (auto &db: database) { + // Set the short vendor name + auto this_short_vendor = this_vendor; + for (auto &combination : kVendorNames) { + if (this_vendor == combination.first) { + this_short_vendor = combination.second; + } + } + // Selects the right kernel + for (auto &db: database) { if (db.kernel == this_kernel && db.precision == this_precision) { // Searches for the right vendor and device type, or selects the default if unavailable. This // assumes that the default vendor / device type is last in the database. for (auto &vendor: db.vendors) { - if ((vendor.name == this_vendor || vendor.name == kDeviceVendorAll) && - (vendor.type == this_type || vendor.type == kDeviceTypeAll)) { + if ((vendor.name == this_short_vendor || vendor.name == kDeviceVendorAll) && + (vendor.type == this_type || vendor.type == kDeviceTypeAll)) { // Searches for the right device. If the current device is unavailable, selects the vendor // default parameters. This assumes the default is last in the database. From 165a94c200f753109b0011995162ab3cb97a64d6 Mon Sep 17 00:00:00 2001 From: Cedric Nugteren Date: Sun, 7 Feb 2016 16:39:37 +0100 Subject: [PATCH 24/50] Various fixes to the database script --- scripts/database/database.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/scripts/database/database.py b/scripts/database/database.py index 89fe8286..f2d47717 100644 --- a/scripts/database/database.py +++ b/scripts/database/database.py @@ -207,7 +207,7 @@ def PrintData(df, outputdir): f.write(GetDeviceVendor(vendor, devtype)) for device, dfdevice in dfdevtype.groupby(["device"]): devicename = "\"%s\"," % device - f.write(" { %-48s { " % devicename) + f.write(" { %-50s { " % devicename) # Collects the paramaters for this case and prints them parameters = [] @@ -265,6 +265,7 @@ for file_json in glob.glob(glob_json): # Loads the newly imported data sys.stdout.write("## Processing '"+file_json+"'") imported_data = ImportDataFromFile(file_json) + imported_data = SanitizeVendorNames(imported_data) # Adds the new data to the database old_size = len(database.index) @@ -273,9 +274,9 @@ for file_json in glob.glob(glob_json): new_size = len(database.index) print("with "+str(new_size-old_size)+" new items") - database = SanitizeVendorNames(database) - # Stores the modified database back to disk +# Stores the modified database back to disk +if len(glob.glob(glob_json)) >= 1: print("## Storing the database to disk...") SaveDatabase(database, file_db) From 6f4b34f8137c617b8a316bf6a84ebbe9b4872983 Mon Sep 17 00:00:00 2001 From: Cedric Nugteren Date: Sun, 7 Feb 2016 16:41:09 +0100 Subject: [PATCH 25/50] Added tuning parameters for various devices using the new database script --- README.md | 30 ++++- include/internal/database/copy.h | 162 ++++++++++++++++++---- include/internal/database/pad.h | 164 +++++++++++++++++++---- include/internal/database/padtranspose.h | 164 +++++++++++++++++++---- include/internal/database/transpose.h | 152 +++++++++++++++++---- include/internal/database/xaxpy.h | 164 +++++++++++++++++++---- include/internal/database/xdot.h | 164 +++++++++++++++++++---- include/internal/database/xgemm.h | 163 ++++++++++++++++++---- include/internal/database/xgemv.h | 141 ++++++++++++++++--- 9 files changed, 1086 insertions(+), 218 deletions(-) diff --git a/README.md b/README.md index 491ce489..2add9798 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,7 @@ CLBlast: The tuned OpenCL BLAS library CLBlast is a modern, lightweight, performant and tunable OpenCL BLAS library written in C++11. It is designed to leverage the full performance potential of a wide variety of OpenCL devices from different vendors, including desktop and laptop GPUs, embedded GPUs, and other accelerators. CLBlast implements BLAS routines: basic linear algebra subprograms operating on vectors and matrices. -__Note that the CLBlast library is actively being developed, and is not mature enough for production environments__. This preview-version doesn't support the less commonly used routines yet: they will be added in due time. It also lacks extensive tuning on some common OpenCL platforms: __out-of-the-box performance on some devices might be poor__. See below for more details. +__Note that the CLBlast library is actively being developed, and might not be mature enough for production environments__. This preview-version doesn't support the less commonly used routines yet: they will be added in due time. It also lacks extensive tuning on some common OpenCL platforms: __out-of-the-box performance on some devices might be poor__. See below for more details (and how to tune yourself). Why CLBlast and not clBLAS or cuBLAS? @@ -17,6 +17,7 @@ Use CLBlast instead of clBLAS: * When you care about achieving maximum performance. * When you want to be able to inspect the BLAS kernels or easily customize them to your needs. * When you run on exotic OpenCL devices which you need to tune yourself. +* When you value an organized and modern C++ codebase. Use CLBlast instead of cuBLAS: @@ -82,13 +83,24 @@ Using the tuners (optional) The CLBlast library will be tuned in the future for the most commonly used OpenCL devices. This pre-release of CLBlast is only tuned for a limited number of devices, in particular those with the following `CL_DEVICE_NAME` values: * NVIDIA GPUs: - - GeForce GTX480 + - GeForce GTX 480 + - GeForce GTX 680 + - GeForce GTX 750 Ti + - GeForce GTX 980 + - GeForce GTX Titan + - GeForce GTX Titan X - Tesla K20m - Tesla K40m * AMD GPUs: - Tahiti * Intel GPUs: - Iris +* Intel CPUs: + - Core i5-6200U + - Core i7-3770K + - Core i7-5930K +* Other devices: + - Intel MIC If your device is not (yet) among this list or if you want to tune CLBlast for specific parameters (e.g. rectangular matrix sizes), you should compile the library with the optional tuners: @@ -96,9 +108,19 @@ If your device is not (yet) among this list or if you want to tune CLBlast for s Note that CLBlast's tuners are based on the CLTune auto-tuning library, which has to be installed separately (version 1.7.0 or higher). CLTune is available from GitHub. -Compiling with `-DTUNERS=ON` will generate a number of tuners, each named `clblast_tuner_xxxxx`, in which `xxxxx` corresponds to a `.opencl` kernel file as found in `src/kernels`. These kernels corresponds to routines (e.g. `xgemm`) or to common pre-processing or post-processing kernels (`copy` and `transpose`). Running such a tuner will test a number of parameter-value combinations on your device and report which one gave the best performance. +Compiling with `-DTUNERS=ON` will generate a number of tuners, each named `clblast_tuner_xxxxx`, in which `xxxxx` corresponds to a `.opencl` kernel file as found in `src/kernels`. These kernels corresponds to routines (e.g. `xgemm`) or to common pre-processing or post-processing kernels (`copy` and `transpose`). Running such a tuner will test a number of parameter-value combinations on your device and report which one gave the best performance. Running `make alltuners` runs all tuners for all precisions in one go. You can set the default device and platform for `alltuners` by setting the `DEFAULT_DEVICE` and `DEFAULT_PLATFORM` environmental variables before running CMake. -The tuner will output a C++ database compatible line with the results, which can be added to `include/internal/database/xxxxx.h` in the appropriate section. Or, if tuning parameters already exist for your device but you believe they can be improved, this is also the place where they can be modified. If you want the found parameters to be included in future releases of CLBlast, please post the JSON output in the corresponding issue on GitHub or [email the main author](http://www.cedricnugteren.nl). +The tuners output a JSON-file with the results. The best results need to be added to `include/internal/database/xxxxx.h` in the appropriate section. However, this can be done automatically based on the JSON-data using a Python script in `scripts/database/database.py`. If you want the found parameters to be included in future releases of CLBlast, please attach the JSON files to the corresponding issue on GitHub or [email the main author](http://www.cedricnugteren.nl). + +In summary, tuning the entire library for your device can be done as follows (starting from the root of the CLBlast folder): + + mkdir build + cd build + cmake -DTUNERS=ON .. + make + make alltuners + python ../scripts/database/database.py . .. + make Compiling the tests (optional) diff --git a/include/internal/database/copy.h b/include/internal/database/copy.h index 3bd85fa6..9429857c 100644 --- a/include/internal/database/copy.h +++ b/include/internal/database/copy.h @@ -16,21 +16,48 @@ namespace clblast { const Database::DatabaseEntry Database::CopySingle = { "Copy", Precision::kSingle, { - { // Intel GPUs - kDeviceTypeGPU, "Intel", { - { "Iris", { {"COPY_DIMX",16}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",2} } }, - { "default", { {"COPY_DIMX",16}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",2} } }, + { // AMD GPUs + kDeviceTypeGPU, "AMD", { + { "Tahiti", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_VW",2}, {"COPY_WPT",2} } }, + { "default", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_VW",2}, {"COPY_WPT",2} } }, } }, - { // NVIDIA Corporation GPUs - kDeviceTypeGPU, "NVIDIA Corporation", { - { "Tesla K40m", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",4}, {"COPY_WPT",2} } }, - { "default", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",4}, {"COPY_WPT",2} } }, + { // Intel CPUs + kDeviceTypeCPU, "Intel", { + { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"COPY_DIMX",32}, {"COPY_DIMY",16}, {"COPY_VW",8}, {"COPY_WPT",2} } }, + { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { {"COPY_DIMX",32}, {"COPY_DIMY",16}, {"COPY_VW",8}, {"COPY_WPT",1} } }, + { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_VW",8}, {"COPY_WPT",1} } }, + { "default", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_VW",8}, {"COPY_WPT",1} } }, + } + }, + { // Intel GPUs + kDeviceTypeGPU, "Intel", { + { "Iris", { {"COPY_DIMX",16}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",2} } }, + { "default", { {"COPY_DIMX",16}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",2} } }, + } + }, + { // Intel accelerators + kDeviceTypeAccelerator, "Intel", { + { "Intel(R) Many Integrated Core Acceleration Card", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_VW",8}, {"COPY_WPT",1} } }, + { "default", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_VW",8}, {"COPY_WPT",1} } }, + } + }, + { // NVIDIA GPUs + kDeviceTypeGPU, "NVIDIA", { + { "GeForce GTX 480", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",4}, {"COPY_WPT",1} } }, + { "GeForce GTX 680", { {"COPY_DIMX",32}, {"COPY_DIMY",16}, {"COPY_VW",4}, {"COPY_WPT",1} } }, + { "GeForce GTX 750 Ti", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_VW",2}, {"COPY_WPT",1} } }, + { "GeForce GTX 980", { {"COPY_DIMX",32}, {"COPY_DIMY",16}, {"COPY_VW",1}, {"COPY_WPT",1} } }, + { "GeForce GTX TITAN", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_VW",2}, {"COPY_WPT",4} } }, + { "GeForce GTX TITAN X", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_VW",2}, {"COPY_WPT",1} } }, + { "Tesla K20m", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",4}, {"COPY_WPT",4} } }, + { "Tesla K40m", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",4}, {"COPY_WPT",2} } }, + { "default", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",1} } }, } }, { // Default kDeviceTypeAll, "default", { - { "default", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",2} } }, + { "default", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",1} } }, } }, } @@ -40,21 +67,46 @@ const Database::DatabaseEntry Database::CopySingle = { const Database::DatabaseEntry Database::CopyComplexSingle = { "Copy", Precision::kComplexSingle, { - { // Intel GPUs - kDeviceTypeGPU, "Intel", { - { "Iris", { {"COPY_DIMX",16}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",2} } }, - { "default", { {"COPY_DIMX",16}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",2} } }, + { // AMD GPUs + kDeviceTypeGPU, "AMD", { + { "Tahiti", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",2}, {"COPY_WPT",2} } }, + { "default", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",2}, {"COPY_WPT",2} } }, } }, - { // NVIDIA Corporation GPUs - kDeviceTypeGPU, "NVIDIA Corporation", { - { "Tesla K40m", { {"COPY_DIMX",16}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",1} } }, - { "default", { {"COPY_DIMX",16}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",1} } }, + { // Intel CPUs + kDeviceTypeCPU, "Intel", { + { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"COPY_DIMX",16}, {"COPY_DIMY",16}, {"COPY_VW",8}, {"COPY_WPT",1} } }, + { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_VW",2}, {"COPY_WPT",2} } }, + { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { {"COPY_DIMX",16}, {"COPY_DIMY",8}, {"COPY_VW",8}, {"COPY_WPT",1} } }, + { "default", { {"COPY_DIMX",16}, {"COPY_DIMY",8}, {"COPY_VW",2}, {"COPY_WPT",1} } }, + } + }, + { // Intel GPUs + kDeviceTypeGPU, "Intel", { + { "Iris", { {"COPY_DIMX",16}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",2} } }, + { "default", { {"COPY_DIMX",16}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",2} } }, + } + }, + { // Intel accelerators + kDeviceTypeAccelerator, "Intel", { + { "Intel(R) Many Integrated Core Acceleration Card", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_VW",4}, {"COPY_WPT",1} } }, + { "default", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_VW",4}, {"COPY_WPT",1} } }, + } + }, + { // NVIDIA GPUs + kDeviceTypeGPU, "NVIDIA", { + { "GeForce GTX 480", { {"COPY_DIMX",16}, {"COPY_DIMY",16}, {"COPY_VW",1}, {"COPY_WPT",1} } }, + { "GeForce GTX 750 Ti", { {"COPY_DIMX",32}, {"COPY_DIMY",16}, {"COPY_VW",1}, {"COPY_WPT",1} } }, + { "GeForce GTX 980", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",1} } }, + { "GeForce GTX TITAN X", { {"COPY_DIMX",16}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",1} } }, + { "Tesla K20m", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",4} } }, + { "Tesla K40m", { {"COPY_DIMX",16}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",1} } }, + { "default", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",1} } }, } }, { // Default kDeviceTypeAll, "default", { - { "default", { {"COPY_DIMX",16}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",1} } }, + { "default", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",1} } }, } }, } @@ -64,15 +116,42 @@ const Database::DatabaseEntry Database::CopyComplexSingle = { const Database::DatabaseEntry Database::CopyDouble = { "Copy", Precision::kDouble, { - { // NVIDIA Corporation GPUs - kDeviceTypeGPU, "NVIDIA Corporation", { - { "Tesla K40m", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",2}, {"COPY_WPT",2} } }, - { "default", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",2}, {"COPY_WPT",2} } }, + { // AMD GPUs + kDeviceTypeGPU, "AMD", { + { "Tahiti", { {"COPY_DIMX",8}, {"COPY_DIMY",32}, {"COPY_VW",2}, {"COPY_WPT",1} } }, + { "default", { {"COPY_DIMX",8}, {"COPY_DIMY",32}, {"COPY_VW",2}, {"COPY_WPT",1} } }, + } + }, + { // Intel CPUs + kDeviceTypeCPU, "Intel", { + { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"COPY_DIMX",16}, {"COPY_DIMY",8}, {"COPY_VW",8}, {"COPY_WPT",1} } }, + { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { {"COPY_DIMX",16}, {"COPY_DIMY",32}, {"COPY_VW",2}, {"COPY_WPT",1} } }, + { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { {"COPY_DIMX",16}, {"COPY_DIMY",16}, {"COPY_VW",8}, {"COPY_WPT",1} } }, + { "default", { {"COPY_DIMX",16}, {"COPY_DIMY",8}, {"COPY_VW",2}, {"COPY_WPT",1} } }, + } + }, + { // Intel accelerators + kDeviceTypeAccelerator, "Intel", { + { "Intel(R) Many Integrated Core Acceleration Card", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",8}, {"COPY_WPT",1} } }, + { "default", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",8}, {"COPY_WPT",1} } }, + } + }, + { // NVIDIA GPUs + kDeviceTypeGPU, "NVIDIA", { + { "GeForce GTX 480", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",2}, {"COPY_WPT",1} } }, + { "GeForce GTX 680", { {"COPY_DIMX",16}, {"COPY_DIMY",32}, {"COPY_VW",2}, {"COPY_WPT",1} } }, + { "GeForce GTX 750 Ti", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",2}, {"COPY_WPT",1} } }, + { "GeForce GTX 980", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_VW",2}, {"COPY_WPT",1} } }, + { "GeForce GTX TITAN", { {"COPY_DIMX",16}, {"COPY_DIMY",32}, {"COPY_VW",2}, {"COPY_WPT",2} } }, + { "GeForce GTX TITAN X", { {"COPY_DIMX",32}, {"COPY_DIMY",16}, {"COPY_VW",1}, {"COPY_WPT",1} } }, + { "Tesla K20m", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",2}, {"COPY_WPT",1} } }, + { "Tesla K40m", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",2}, {"COPY_WPT",2} } }, + { "default", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",1} } }, } }, { // Default kDeviceTypeAll, "default", { - { "default", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",2}, {"COPY_WPT",2} } }, + { "default", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",1} } }, } }, } @@ -82,15 +161,42 @@ const Database::DatabaseEntry Database::CopyDouble = { const Database::DatabaseEntry Database::CopyComplexDouble = { "Copy", Precision::kComplexDouble, { - { // NVIDIA Corporation GPUs - kDeviceTypeGPU, "NVIDIA Corporation", { - { "Tesla K40m", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",1} } }, - { "default", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",1} } }, + { // AMD GPUs + kDeviceTypeGPU, "AMD", { + { "Tahiti", { {"COPY_DIMX",8}, {"COPY_DIMY",16}, {"COPY_VW",1}, {"COPY_WPT",1} } }, + { "default", { {"COPY_DIMX",8}, {"COPY_DIMY",16}, {"COPY_VW",1}, {"COPY_WPT",1} } }, + } + }, + { // Intel CPUs + kDeviceTypeCPU, "Intel", { + { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_VW",8}, {"COPY_WPT",1} } }, + { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { {"COPY_DIMX",32}, {"COPY_DIMY",32}, {"COPY_VW",8}, {"COPY_WPT",1} } }, + { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",8}, {"COPY_WPT",1} } }, + { "default", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",8}, {"COPY_WPT",1} } }, + } + }, + { // Intel accelerators + kDeviceTypeAccelerator, "Intel", { + { "Intel(R) Many Integrated Core Acceleration Card", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_VW",8}, {"COPY_WPT",1} } }, + { "default", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_VW",8}, {"COPY_WPT",1} } }, + } + }, + { // NVIDIA GPUs + kDeviceTypeGPU, "NVIDIA", { + { "GeForce GTX 480", { {"COPY_DIMX",16}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",1} } }, + { "GeForce GTX 680", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",1} } }, + { "GeForce GTX 750 Ti", { {"COPY_DIMX",32}, {"COPY_DIMY",16}, {"COPY_VW",1}, {"COPY_WPT",1} } }, + { "GeForce GTX 980", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",1} } }, + { "GeForce GTX TITAN", { {"COPY_DIMX",16}, {"COPY_DIMY",16}, {"COPY_VW",1}, {"COPY_WPT",1} } }, + { "GeForce GTX TITAN X", { {"COPY_DIMX",16}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",1} } }, + { "Tesla K20m", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",2} } }, + { "Tesla K40m", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",1} } }, + { "default", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",1} } }, } }, { // Default kDeviceTypeAll, "default", { - { "default", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",1} } }, + { "default", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",1} } }, } }, } diff --git a/include/internal/database/pad.h b/include/internal/database/pad.h index d833a934..373f9641 100644 --- a/include/internal/database/pad.h +++ b/include/internal/database/pad.h @@ -16,21 +16,48 @@ namespace clblast { const Database::DatabaseEntry Database::PadSingle = { "Pad", Precision::kSingle, { - { // Intel GPUs - kDeviceTypeGPU, "Intel", { - { "Iris", { {"PAD_DIMX",32}, {"PAD_DIMY",16}, {"PAD_WPTX",2}, {"PAD_WPTY",1} } }, - { "default", { {"PAD_DIMX",32}, {"PAD_DIMY",16}, {"PAD_WPTX",2}, {"PAD_WPTY",1} } }, + { // AMD GPUs + kDeviceTypeGPU, "AMD", { + { "Tahiti", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",2} } }, + { "default", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",2} } }, } }, - { // NVIDIA Corporation GPUs - kDeviceTypeGPU, "NVIDIA Corporation", { - { "Tesla K40m", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, - { "default", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, + { // Intel CPUs + kDeviceTypeCPU, "Intel", { + { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"PAD_DIMX",32}, {"PAD_DIMY",16}, {"PAD_WPTX",4}, {"PAD_WPTY",1} } }, + { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { {"PAD_DIMX",16}, {"PAD_DIMY",32}, {"PAD_WPTX",4}, {"PAD_WPTY",4} } }, + { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",4}, {"PAD_WPTY",1} } }, + { "default", { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",4}, {"PAD_WPTY",1} } }, + } + }, + { // Intel GPUs + kDeviceTypeGPU, "Intel", { + { "Iris", { {"PAD_DIMX",32}, {"PAD_DIMY",16}, {"PAD_WPTX",2}, {"PAD_WPTY",1} } }, + { "default", { {"PAD_DIMX",32}, {"PAD_DIMY",16}, {"PAD_WPTX",2}, {"PAD_WPTY",1} } }, + } + }, + { // Intel accelerators + kDeviceTypeAccelerator, "Intel", { + { "Intel(R) Many Integrated Core Acceleration Card", { {"PAD_DIMX",32}, {"PAD_DIMY",16}, {"PAD_WPTX",2}, {"PAD_WPTY",1} } }, + { "default", { {"PAD_DIMX",32}, {"PAD_DIMY",16}, {"PAD_WPTX",2}, {"PAD_WPTY",1} } }, + } + }, + { // NVIDIA GPUs + kDeviceTypeGPU, "NVIDIA", { + { "GeForce GTX 480", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",4} } }, + { "GeForce GTX 680", { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",4}, {"PAD_WPTY",1} } }, + { "GeForce GTX 750 Ti", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",4}, {"PAD_WPTY",1} } }, + { "GeForce GTX 980", { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, + { "GeForce GTX TITAN", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",2}, {"PAD_WPTY",1} } }, + { "GeForce GTX TITAN X", { {"PAD_DIMX",16}, {"PAD_DIMY",16}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, + { "Tesla K20m", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",2}, {"PAD_WPTY",1} } }, + { "Tesla K40m", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, + { "default", { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, } }, { // Default kDeviceTypeAll, "default", { - { "default", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, + { "default", { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, } }, } @@ -40,21 +67,48 @@ const Database::DatabaseEntry Database::PadSingle = { const Database::DatabaseEntry Database::PadComplexSingle = { "Pad", Precision::kComplexSingle, { - { // Intel GPUs - kDeviceTypeGPU, "Intel", { - { "Iris", { {"PAD_DIMX",32}, {"PAD_DIMY",16}, {"PAD_WPTX",2}, {"PAD_WPTY",4} } }, - { "default", { {"PAD_DIMX",32}, {"PAD_DIMY",16}, {"PAD_WPTX",2}, {"PAD_WPTY",4} } }, + { // AMD GPUs + kDeviceTypeGPU, "AMD", { + { "Tahiti", { {"PAD_DIMX",16}, {"PAD_DIMY",16}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, + { "default", { {"PAD_DIMX",16}, {"PAD_DIMY",16}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, } }, - { // NVIDIA Corporation GPUs - kDeviceTypeGPU, "NVIDIA Corporation", { - { "Tesla K40m", { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, - { "default", { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, + { // Intel CPUs + kDeviceTypeCPU, "Intel", { + { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",2}, {"PAD_WPTY",2} } }, + { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { {"PAD_DIMX",32}, {"PAD_DIMY",32}, {"PAD_WPTX",4}, {"PAD_WPTY",1} } }, + { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { {"PAD_DIMX",32}, {"PAD_DIMY",16}, {"PAD_WPTX",4}, {"PAD_WPTY",1} } }, + { "default", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",2}, {"PAD_WPTY",1} } }, + } + }, + { // Intel GPUs + kDeviceTypeGPU, "Intel", { + { "Iris", { {"PAD_DIMX",32}, {"PAD_DIMY",16}, {"PAD_WPTX",2}, {"PAD_WPTY",4} } }, + { "default", { {"PAD_DIMX",32}, {"PAD_DIMY",16}, {"PAD_WPTX",2}, {"PAD_WPTY",4} } }, + } + }, + { // Intel accelerators + kDeviceTypeAccelerator, "Intel", { + { "Intel(R) Many Integrated Core Acceleration Card", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, + { "default", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, + } + }, + { // NVIDIA GPUs + kDeviceTypeGPU, "NVIDIA", { + { "GeForce GTX 480", { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",2}, {"PAD_WPTY",1} } }, + { "GeForce GTX 680", { {"PAD_DIMX",16}, {"PAD_DIMY",32}, {"PAD_WPTX",1}, {"PAD_WPTY",2} } }, + { "GeForce GTX 750 Ti", { {"PAD_DIMX",16}, {"PAD_DIMY",16}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, + { "GeForce GTX 980", { {"PAD_DIMX",16}, {"PAD_DIMY",16}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, + { "GeForce GTX TITAN", { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",2}, {"PAD_WPTY",1} } }, + { "GeForce GTX TITAN X", { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, + { "Tesla K20m", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",2} } }, + { "Tesla K40m", { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, + { "default", { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, } }, { // Default kDeviceTypeAll, "default", { - { "default", { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, + { "default", { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, } }, } @@ -64,15 +118,42 @@ const Database::DatabaseEntry Database::PadComplexSingle = { const Database::DatabaseEntry Database::PadDouble = { "Pad", Precision::kDouble, { - { // NVIDIA Corporation GPUs - kDeviceTypeGPU, "NVIDIA Corporation", { - { "Tesla K40m", { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",2} } }, - { "default", { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",2} } }, + { // AMD GPUs + kDeviceTypeGPU, "AMD", { + { "Tahiti", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, + { "default", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, + } + }, + { // Intel CPUs + kDeviceTypeCPU, "Intel", { + { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",4}, {"PAD_WPTY",1} } }, + { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { {"PAD_DIMX",32}, {"PAD_DIMY",32}, {"PAD_WPTX",4}, {"PAD_WPTY",1} } }, + { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",2}, {"PAD_WPTY",1} } }, + { "default", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",2}, {"PAD_WPTY",1} } }, + } + }, + { // Intel accelerators + kDeviceTypeAccelerator, "Intel", { + { "Intel(R) Many Integrated Core Acceleration Card", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, + { "default", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, + } + }, + { // NVIDIA GPUs + kDeviceTypeGPU, "NVIDIA", { + { "GeForce GTX 480", { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, + { "GeForce GTX 680", { {"PAD_DIMX",32}, {"PAD_DIMY",32}, {"PAD_WPTX",1}, {"PAD_WPTY",2} } }, + { "GeForce GTX 750 Ti", { {"PAD_DIMX",8}, {"PAD_DIMY",16}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, + { "GeForce GTX 980", { {"PAD_DIMX",8}, {"PAD_DIMY",16}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, + { "GeForce GTX TITAN", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, + { "GeForce GTX TITAN X", { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, + { "Tesla K20m", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, + { "Tesla K40m", { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",2} } }, + { "default", { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, } }, { // Default kDeviceTypeAll, "default", { - { "default", { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",2} } }, + { "default", { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, } }, } @@ -82,15 +163,42 @@ const Database::DatabaseEntry Database::PadDouble = { const Database::DatabaseEntry Database::PadComplexDouble = { "Pad", Precision::kComplexDouble, { - { // NVIDIA Corporation GPUs - kDeviceTypeGPU, "NVIDIA Corporation", { - { "Tesla K40m", { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, - { "default", { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, + { // AMD GPUs + kDeviceTypeGPU, "AMD", { + { "Tahiti", { {"PAD_DIMX",8}, {"PAD_DIMY",16}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, + { "default", { {"PAD_DIMX",8}, {"PAD_DIMY",16}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, + } + }, + { // Intel CPUs + kDeviceTypeCPU, "Intel", { + { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",2}, {"PAD_WPTY",1} } }, + { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { {"PAD_DIMX",16}, {"PAD_DIMY",32}, {"PAD_WPTX",4}, {"PAD_WPTY",1} } }, + { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",2}, {"PAD_WPTY",1} } }, + { "default", { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",2}, {"PAD_WPTY",1} } }, + } + }, + { // Intel accelerators + kDeviceTypeAccelerator, "Intel", { + { "Intel(R) Many Integrated Core Acceleration Card", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",4}, {"PAD_WPTY",1} } }, + { "default", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",4}, {"PAD_WPTY",1} } }, + } + }, + { // NVIDIA GPUs + kDeviceTypeGPU, "NVIDIA", { + { "GeForce GTX 480", { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, + { "GeForce GTX 680", { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, + { "GeForce GTX 750 Ti", { {"PAD_DIMX",32}, {"PAD_DIMY",32}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, + { "GeForce GTX 980", { {"PAD_DIMX",16}, {"PAD_DIMY",16}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, + { "GeForce GTX TITAN", { {"PAD_DIMX",8}, {"PAD_DIMY",32}, {"PAD_WPTX",1}, {"PAD_WPTY",2} } }, + { "GeForce GTX TITAN X", { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, + { "Tesla K20m", { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",2} } }, + { "Tesla K40m", { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, + { "default", { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, } }, { // Default kDeviceTypeAll, "default", { - { "default", { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, + { "default", { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, } }, } diff --git a/include/internal/database/padtranspose.h b/include/internal/database/padtranspose.h index dacc693f..99d4f32a 100644 --- a/include/internal/database/padtranspose.h +++ b/include/internal/database/padtranspose.h @@ -16,21 +16,48 @@ namespace clblast { const Database::DatabaseEntry Database::PadtransposeSingle = { "Padtranspose", Precision::kSingle, { - { // Intel GPUs - kDeviceTypeGPU, "Intel", { - { "Iris", { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",2} } }, - { "default", { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",2} } }, + { // AMD GPUs + kDeviceTypeGPU, "AMD", { + { "Tahiti", { {"PADTRA_PAD",0}, {"PADTRA_TILE",16}, {"PADTRA_WPT",4} } }, + { "default", { {"PADTRA_PAD",0}, {"PADTRA_TILE",16}, {"PADTRA_WPT",4} } }, } }, - { // NVIDIA Corporation GPUs - kDeviceTypeGPU, "NVIDIA Corporation", { - { "Tesla K40m", { {"PADTRA_PAD",1}, {"PADTRA_TILE",32}, {"PADTRA_WPT",2} } }, - { "default", { {"PADTRA_PAD",1}, {"PADTRA_TILE",32}, {"PADTRA_WPT",2} } }, + { // Intel CPUs + kDeviceTypeCPU, "Intel", { + { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"PADTRA_PAD",0}, {"PADTRA_TILE",16}, {"PADTRA_WPT",1} } }, + { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { {"PADTRA_PAD",0}, {"PADTRA_TILE",8}, {"PADTRA_WPT",8} } }, + { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { {"PADTRA_PAD",0}, {"PADTRA_TILE",32}, {"PADTRA_WPT",1} } }, + { "default", { {"PADTRA_PAD",0}, {"PADTRA_TILE",8}, {"PADTRA_WPT",1} } }, + } + }, + { // Intel GPUs + kDeviceTypeGPU, "Intel", { + { "Iris", { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",2} } }, + { "default", { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",2} } }, + } + }, + { // Intel accelerators + kDeviceTypeAccelerator, "Intel", { + { "Intel(R) Many Integrated Core Acceleration Card", { {"PADTRA_PAD",0}, {"PADTRA_TILE",16}, {"PADTRA_WPT",2} } }, + { "default", { {"PADTRA_PAD",0}, {"PADTRA_TILE",16}, {"PADTRA_WPT",2} } }, + } + }, + { // NVIDIA GPUs + kDeviceTypeGPU, "NVIDIA", { + { "GeForce GTX 480", { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",2} } }, + { "GeForce GTX 680", { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",2} } }, + { "GeForce GTX 750 Ti", { {"PADTRA_PAD",1}, {"PADTRA_TILE",32}, {"PADTRA_WPT",2} } }, + { "GeForce GTX 980", { {"PADTRA_PAD",0}, {"PADTRA_TILE",16}, {"PADTRA_WPT",1} } }, + { "GeForce GTX TITAN", { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",2} } }, + { "GeForce GTX TITAN X", { {"PADTRA_PAD",1}, {"PADTRA_TILE",32}, {"PADTRA_WPT",1} } }, + { "Tesla K20m", { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",2} } }, + { "Tesla K40m", { {"PADTRA_PAD",1}, {"PADTRA_TILE",32}, {"PADTRA_WPT",2} } }, + { "default", { {"PADTRA_PAD",0}, {"PADTRA_TILE",16}, {"PADTRA_WPT",1} } }, } }, { // Default kDeviceTypeAll, "default", { - { "default", { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",2} } }, + { "default", { {"PADTRA_PAD",0}, {"PADTRA_TILE",8}, {"PADTRA_WPT",1} } }, } }, } @@ -40,21 +67,48 @@ const Database::DatabaseEntry Database::PadtransposeSingle = { const Database::DatabaseEntry Database::PadtransposeComplexSingle = { "Padtranspose", Precision::kComplexSingle, { - { // Intel GPUs - kDeviceTypeGPU, "Intel", { - { "Iris", { {"PADTRA_PAD",0}, {"PADTRA_TILE",16}, {"PADTRA_WPT",2} } }, - { "default", { {"PADTRA_PAD",0}, {"PADTRA_TILE",16}, {"PADTRA_WPT",2} } }, + { // AMD GPUs + kDeviceTypeGPU, "AMD", { + { "Tahiti", { {"PADTRA_PAD",0}, {"PADTRA_TILE",16}, {"PADTRA_WPT",2} } }, + { "default", { {"PADTRA_PAD",0}, {"PADTRA_TILE",16}, {"PADTRA_WPT",2} } }, } }, - { // NVIDIA Corporation GPUs - kDeviceTypeGPU, "NVIDIA Corporation", { - { "Tesla K40m", { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",1} } }, - { "default", { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",1} } }, + { // Intel CPUs + kDeviceTypeCPU, "Intel", { + { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"PADTRA_PAD",1}, {"PADTRA_TILE",8}, {"PADTRA_WPT",4} } }, + { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { {"PADTRA_PAD",0}, {"PADTRA_TILE",8}, {"PADTRA_WPT",8} } }, + { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { {"PADTRA_PAD",0}, {"PADTRA_TILE",8}, {"PADTRA_WPT",4} } }, + { "default", { {"PADTRA_PAD",0}, {"PADTRA_TILE",8}, {"PADTRA_WPT",4} } }, + } + }, + { // Intel GPUs + kDeviceTypeGPU, "Intel", { + { "Iris", { {"PADTRA_PAD",0}, {"PADTRA_TILE",16}, {"PADTRA_WPT",2} } }, + { "default", { {"PADTRA_PAD",0}, {"PADTRA_TILE",16}, {"PADTRA_WPT",2} } }, + } + }, + { // Intel accelerators + kDeviceTypeAccelerator, "Intel", { + { "Intel(R) Many Integrated Core Acceleration Card", { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",1} } }, + { "default", { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",1} } }, + } + }, + { // NVIDIA GPUs + kDeviceTypeGPU, "NVIDIA", { + { "GeForce GTX 480", { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",1} } }, + { "GeForce GTX 680", { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",1} } }, + { "GeForce GTX 750 Ti", { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",1} } }, + { "GeForce GTX 980", { {"PADTRA_PAD",0}, {"PADTRA_TILE",16}, {"PADTRA_WPT",1} } }, + { "GeForce GTX TITAN", { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",1} } }, + { "GeForce GTX TITAN X", { {"PADTRA_PAD",1}, {"PADTRA_TILE",32}, {"PADTRA_WPT",1} } }, + { "Tesla K20m", { {"PADTRA_PAD",0}, {"PADTRA_TILE",16}, {"PADTRA_WPT",1} } }, + { "Tesla K40m", { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",1} } }, + { "default", { {"PADTRA_PAD",0}, {"PADTRA_TILE",16}, {"PADTRA_WPT",1} } }, } }, { // Default kDeviceTypeAll, "default", { - { "default", { {"PADTRA_PAD",0}, {"PADTRA_TILE",16}, {"PADTRA_WPT",1} } }, + { "default", { {"PADTRA_PAD",0}, {"PADTRA_TILE",8}, {"PADTRA_WPT",1} } }, } }, } @@ -64,15 +118,42 @@ const Database::DatabaseEntry Database::PadtransposeComplexSingle = { const Database::DatabaseEntry Database::PadtransposeDouble = { "Padtranspose", Precision::kDouble, { - { // NVIDIA Corporation GPUs - kDeviceTypeGPU, "NVIDIA Corporation", { - { "Tesla K40m", { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",1} } }, - { "default", { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",1} } }, + { // AMD GPUs + kDeviceTypeGPU, "AMD", { + { "Tahiti", { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",2} } }, + { "default", { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",2} } }, + } + }, + { // Intel CPUs + kDeviceTypeCPU, "Intel", { + { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"PADTRA_PAD",1}, {"PADTRA_TILE",8}, {"PADTRA_WPT",4} } }, + { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { {"PADTRA_PAD",0}, {"PADTRA_TILE",8}, {"PADTRA_WPT",8} } }, + { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { {"PADTRA_PAD",1}, {"PADTRA_TILE",32}, {"PADTRA_WPT",1} } }, + { "default", { {"PADTRA_PAD",0}, {"PADTRA_TILE",8}, {"PADTRA_WPT",1} } }, + } + }, + { // Intel accelerators + kDeviceTypeAccelerator, "Intel", { + { "Intel(R) Many Integrated Core Acceleration Card", { {"PADTRA_PAD",0}, {"PADTRA_TILE",16}, {"PADTRA_WPT",1} } }, + { "default", { {"PADTRA_PAD",0}, {"PADTRA_TILE",16}, {"PADTRA_WPT",1} } }, + } + }, + { // NVIDIA GPUs + kDeviceTypeGPU, "NVIDIA", { + { "GeForce GTX 480", { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",1} } }, + { "GeForce GTX 680", { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",1} } }, + { "GeForce GTX 750 Ti", { {"PADTRA_PAD",1}, {"PADTRA_TILE",32}, {"PADTRA_WPT",2} } }, + { "GeForce GTX 980", { {"PADTRA_PAD",1}, {"PADTRA_TILE",32}, {"PADTRA_WPT",1} } }, + { "GeForce GTX TITAN", { {"PADTRA_PAD",0}, {"PADTRA_TILE",16}, {"PADTRA_WPT",1} } }, + { "GeForce GTX TITAN X", { {"PADTRA_PAD",1}, {"PADTRA_TILE",32}, {"PADTRA_WPT",1} } }, + { "Tesla K20m", { {"PADTRA_PAD",0}, {"PADTRA_TILE",16}, {"PADTRA_WPT",1} } }, + { "Tesla K40m", { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",1} } }, + { "default", { {"PADTRA_PAD",0}, {"PADTRA_TILE",16}, {"PADTRA_WPT",1} } }, } }, { // Default kDeviceTypeAll, "default", { - { "default", { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",1} } }, + { "default", { {"PADTRA_PAD",0}, {"PADTRA_TILE",8}, {"PADTRA_WPT",1} } }, } }, } @@ -82,15 +163,42 @@ const Database::DatabaseEntry Database::PadtransposeDouble = { const Database::DatabaseEntry Database::PadtransposeComplexDouble = { "Padtranspose", Precision::kComplexDouble, { - { // NVIDIA Corporation GPUs - kDeviceTypeGPU, "NVIDIA Corporation", { - { "Tesla K40m", { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",1} } }, - { "default", { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",1} } }, + { // AMD GPUs + kDeviceTypeGPU, "AMD", { + { "Tahiti", { {"PADTRA_PAD",0}, {"PADTRA_TILE",8}, {"PADTRA_WPT",2} } }, + { "default", { {"PADTRA_PAD",0}, {"PADTRA_TILE",8}, {"PADTRA_WPT",2} } }, + } + }, + { // Intel CPUs + kDeviceTypeCPU, "Intel", { + { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"PADTRA_PAD",1}, {"PADTRA_TILE",8}, {"PADTRA_WPT",2} } }, + { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { {"PADTRA_PAD",1}, {"PADTRA_TILE",8}, {"PADTRA_WPT",4} } }, + { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { {"PADTRA_PAD",1}, {"PADTRA_TILE",8}, {"PADTRA_WPT",4} } }, + { "default", { {"PADTRA_PAD",1}, {"PADTRA_TILE",8}, {"PADTRA_WPT",2} } }, + } + }, + { // Intel accelerators + kDeviceTypeAccelerator, "Intel", { + { "Intel(R) Many Integrated Core Acceleration Card", { {"PADTRA_PAD",0}, {"PADTRA_TILE",16}, {"PADTRA_WPT",1} } }, + { "default", { {"PADTRA_PAD",0}, {"PADTRA_TILE",16}, {"PADTRA_WPT",1} } }, + } + }, + { // NVIDIA GPUs + kDeviceTypeGPU, "NVIDIA", { + { "GeForce GTX 480", { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",1} } }, + { "GeForce GTX 680", { {"PADTRA_PAD",1}, {"PADTRA_TILE",32}, {"PADTRA_WPT",1} } }, + { "GeForce GTX 750 Ti", { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",2} } }, + { "GeForce GTX 980", { {"PADTRA_PAD",0}, {"PADTRA_TILE",16}, {"PADTRA_WPT",1} } }, + { "GeForce GTX TITAN", { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",1} } }, + { "GeForce GTX TITAN X", { {"PADTRA_PAD",1}, {"PADTRA_TILE",32}, {"PADTRA_WPT",1} } }, + { "Tesla K20m", { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",1} } }, + { "Tesla K40m", { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",1} } }, + { "default", { {"PADTRA_PAD",0}, {"PADTRA_TILE",16}, {"PADTRA_WPT",1} } }, } }, { // Default kDeviceTypeAll, "default", { - { "default", { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",1} } }, + { "default", { {"PADTRA_PAD",0}, {"PADTRA_TILE",8}, {"PADTRA_WPT",1} } }, } }, } diff --git a/include/internal/database/transpose.h b/include/internal/database/transpose.h index 46a38bc2..8eee2e5d 100644 --- a/include/internal/database/transpose.h +++ b/include/internal/database/transpose.h @@ -16,21 +16,48 @@ namespace clblast { const Database::DatabaseEntry Database::TransposeSingle = { "Transpose", Precision::kSingle, { - { // Intel GPUs - kDeviceTypeGPU, "Intel", { - { "Iris", { {"TRA_DIM",8}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",4} } }, - { "default", { {"TRA_DIM",8}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",4} } }, + { // AMD GPUs + kDeviceTypeGPU, "AMD", { + { "Tahiti", { {"TRA_DIM",4}, {"TRA_PAD",0}, {"TRA_SHUFFLE",1}, {"TRA_WPT",4} } }, + { "default", { {"TRA_DIM",4}, {"TRA_PAD",0}, {"TRA_SHUFFLE",1}, {"TRA_WPT",4} } }, } }, - { // NVIDIA Corporation GPUs - kDeviceTypeGPU, "NVIDIA Corporation", { - { "Tesla K40m", { {"TRA_DIM",8}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",4} } }, - { "default", { {"TRA_DIM",8}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",4} } }, + { // Intel CPUs + kDeviceTypeCPU, "Intel", { + { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"TRA_DIM",4}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",8} } }, + { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { {"TRA_DIM",4}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",8} } }, + { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { {"TRA_DIM",4}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",8} } }, + { "default", { {"TRA_DIM",4}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",8} } }, + } + }, + { // Intel GPUs + kDeviceTypeGPU, "Intel", { + { "Iris", { {"TRA_DIM",8}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",4} } }, + { "default", { {"TRA_DIM",8}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",4} } }, + } + }, + { // Intel accelerators + kDeviceTypeAccelerator, "Intel", { + { "Intel(R) Many Integrated Core Acceleration Card", { {"TRA_DIM",16}, {"TRA_PAD",1}, {"TRA_SHUFFLE",1}, {"TRA_WPT",1} } }, + { "default", { {"TRA_DIM",16}, {"TRA_PAD",1}, {"TRA_SHUFFLE",1}, {"TRA_WPT",1} } }, + } + }, + { // NVIDIA GPUs + kDeviceTypeGPU, "NVIDIA", { + { "GeForce GTX 480", { {"TRA_DIM",16}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",2} } }, + { "GeForce GTX 680", { {"TRA_DIM",16}, {"TRA_PAD",1}, {"TRA_SHUFFLE",1}, {"TRA_WPT",2} } }, + { "GeForce GTX 750 Ti", { {"TRA_DIM",32}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",2} } }, + { "GeForce GTX 980", { {"TRA_DIM",16}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } }, + { "GeForce GTX TITAN", { {"TRA_DIM",8}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",4} } }, + { "GeForce GTX TITAN X", { {"TRA_DIM",16}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",4} } }, + { "Tesla K20m", { {"TRA_DIM",8}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",4} } }, + { "Tesla K40m", { {"TRA_DIM",8}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",4} } }, + { "default", { {"TRA_DIM",8}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } }, } }, { // Default kDeviceTypeAll, "default", { - { "default", { {"TRA_DIM",8}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",4} } }, + { "default", { {"TRA_DIM",4}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } }, } }, } @@ -40,21 +67,42 @@ const Database::DatabaseEntry Database::TransposeSingle = { const Database::DatabaseEntry Database::TransposeComplexSingle = { "Transpose", Precision::kComplexSingle, { - { // Intel GPUs - kDeviceTypeGPU, "Intel", { - { "Iris", { {"TRA_DIM",8}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",2} } }, - { "default", { {"TRA_DIM",8}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",2} } }, + { // AMD GPUs + kDeviceTypeGPU, "AMD", { + { "Tahiti", { {"TRA_DIM",16}, {"TRA_PAD",0}, {"TRA_SHUFFLE",1}, {"TRA_WPT",1} } }, + { "default", { {"TRA_DIM",16}, {"TRA_PAD",0}, {"TRA_SHUFFLE",1}, {"TRA_WPT",1} } }, } }, - { // NVIDIA Corporation GPUs - kDeviceTypeGPU, "NVIDIA Corporation", { - { "Tesla K40m", { {"TRA_DIM",16}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } }, - { "default", { {"TRA_DIM",16}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } }, + { // Intel CPUs + kDeviceTypeCPU, "Intel", { + { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"TRA_DIM",8}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",2} } }, + { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { {"TRA_DIM",4}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",8} } }, + { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { {"TRA_DIM",16}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",4} } }, + { "default", { {"TRA_DIM",4}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",2} } }, + } + }, + { // Intel GPUs + kDeviceTypeGPU, "Intel", { + { "Iris", { {"TRA_DIM",8}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",2} } }, + { "default", { {"TRA_DIM",8}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",2} } }, + } + }, + { // NVIDIA GPUs + kDeviceTypeGPU, "NVIDIA", { + { "GeForce GTX 480", { {"TRA_DIM",16}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } }, + { "GeForce GTX 680", { {"TRA_DIM",16}, {"TRA_PAD",1}, {"TRA_SHUFFLE",1}, {"TRA_WPT",1} } }, + { "GeForce GTX 750 Ti", { {"TRA_DIM",16}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } }, + { "GeForce GTX 980", { {"TRA_DIM",16}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } }, + { "GeForce GTX TITAN", { {"TRA_DIM",16}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } }, + { "GeForce GTX TITAN X", { {"TRA_DIM",32}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } }, + { "Tesla K20m", { {"TRA_DIM",16}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } }, + { "Tesla K40m", { {"TRA_DIM",16}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } }, + { "default", { {"TRA_DIM",16}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } }, } }, { // Default kDeviceTypeAll, "default", { - { "default", { {"TRA_DIM",8}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } }, + { "default", { {"TRA_DIM",4}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } }, } }, } @@ -64,15 +112,42 @@ const Database::DatabaseEntry Database::TransposeComplexSingle = { const Database::DatabaseEntry Database::TransposeDouble = { "Transpose", Precision::kDouble, { - { // NVIDIA Corporation GPUs - kDeviceTypeGPU, "NVIDIA Corporation", { - { "Tesla K40m", { {"TRA_DIM",16}, {"TRA_PAD",1}, {"TRA_SHUFFLE",1}, {"TRA_WPT",2} } }, - { "default", { {"TRA_DIM",16}, {"TRA_PAD",1}, {"TRA_SHUFFLE",1}, {"TRA_WPT",2} } }, + { // AMD GPUs + kDeviceTypeGPU, "AMD", { + { "Tahiti", { {"TRA_DIM",4}, {"TRA_PAD",1}, {"TRA_SHUFFLE",1}, {"TRA_WPT",4} } }, + { "default", { {"TRA_DIM",4}, {"TRA_PAD",1}, {"TRA_SHUFFLE",1}, {"TRA_WPT",4} } }, + } + }, + { // Intel CPUs + kDeviceTypeCPU, "Intel", { + { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"TRA_DIM",4}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",4} } }, + { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { {"TRA_DIM",4}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",8} } }, + { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { {"TRA_DIM",4}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",8} } }, + { "default", { {"TRA_DIM",4}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",4} } }, + } + }, + { // Intel accelerators + kDeviceTypeAccelerator, "Intel", { + { "Intel(R) Many Integrated Core Acceleration Card", { {"TRA_DIM",32}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } }, + { "default", { {"TRA_DIM",32}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } }, + } + }, + { // NVIDIA GPUs + kDeviceTypeGPU, "NVIDIA", { + { "GeForce GTX 480", { {"TRA_DIM",8}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",2} } }, + { "GeForce GTX 680", { {"TRA_DIM",16}, {"TRA_PAD",1}, {"TRA_SHUFFLE",1}, {"TRA_WPT",2} } }, + { "GeForce GTX 750 Ti", { {"TRA_DIM",32}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } }, + { "GeForce GTX 980", { {"TRA_DIM",16}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",2} } }, + { "GeForce GTX TITAN", { {"TRA_DIM",8}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",2} } }, + { "GeForce GTX TITAN X", { {"TRA_DIM",32}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } }, + { "Tesla K20m", { {"TRA_DIM",16}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",2} } }, + { "Tesla K40m", { {"TRA_DIM",16}, {"TRA_PAD",1}, {"TRA_SHUFFLE",1}, {"TRA_WPT",2} } }, + { "default", { {"TRA_DIM",8}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } }, } }, { // Default kDeviceTypeAll, "default", { - { "default", { {"TRA_DIM",16}, {"TRA_PAD",1}, {"TRA_SHUFFLE",1}, {"TRA_WPT",2} } }, + { "default", { {"TRA_DIM",4}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } }, } }, } @@ -82,15 +157,36 @@ const Database::DatabaseEntry Database::TransposeDouble = { const Database::DatabaseEntry Database::TransposeComplexDouble = { "Transpose", Precision::kComplexDouble, { - { // NVIDIA Corporation GPUs - kDeviceTypeGPU, "NVIDIA Corporation", { - { "Tesla K40m", { {"TRA_DIM",16}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } }, - { "default", { {"TRA_DIM",16}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } }, + { // AMD GPUs + kDeviceTypeGPU, "AMD", { + { "Tahiti", { {"TRA_DIM",16}, {"TRA_PAD",0}, {"TRA_SHUFFLE",1}, {"TRA_WPT",1} } }, + { "default", { {"TRA_DIM",16}, {"TRA_PAD",0}, {"TRA_SHUFFLE",1}, {"TRA_WPT",1} } }, + } + }, + { // Intel CPUs + kDeviceTypeCPU, "Intel", { + { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"TRA_DIM",4}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",4} } }, + { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { {"TRA_DIM",4}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",4} } }, + { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { {"TRA_DIM",16}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",2} } }, + { "default", { {"TRA_DIM",4}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",2} } }, + } + }, + { // NVIDIA GPUs + kDeviceTypeGPU, "NVIDIA", { + { "GeForce GTX 480", { {"TRA_DIM",8}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } }, + { "GeForce GTX 680", { {"TRA_DIM",16}, {"TRA_PAD",1}, {"TRA_SHUFFLE",1}, {"TRA_WPT",1} } }, + { "GeForce GTX 750 Ti", { {"TRA_DIM",16}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } }, + { "GeForce GTX 980", { {"TRA_DIM",32}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } }, + { "GeForce GTX TITAN", { {"TRA_DIM",16}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } }, + { "GeForce GTX TITAN X", { {"TRA_DIM",32}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } }, + { "Tesla K20m", { {"TRA_DIM",16}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } }, + { "Tesla K40m", { {"TRA_DIM",16}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } }, + { "default", { {"TRA_DIM",8}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } }, } }, { // Default kDeviceTypeAll, "default", { - { "default", { {"TRA_DIM",16}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } }, + { "default", { {"TRA_DIM",4}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } }, } }, } diff --git a/include/internal/database/xaxpy.h b/include/internal/database/xaxpy.h index 783e142d..bcaf80cd 100644 --- a/include/internal/database/xaxpy.h +++ b/include/internal/database/xaxpy.h @@ -16,21 +16,48 @@ namespace clblast { const Database::DatabaseEntry Database::XaxpySingle = { "Xaxpy", Precision::kSingle, { - { // Intel GPUs - kDeviceTypeGPU, "Intel", { - { "Iris", { {"VW",1}, {"WGS",64}, {"WPT",1} } }, - { "default", { {"VW",1}, {"WGS",64}, {"WPT",1} } }, + { // AMD GPUs + kDeviceTypeGPU, "AMD", { + { "Tahiti", { {"VW",2}, {"WGS",64}, {"WPT",1} } }, + { "default", { {"VW",2}, {"WGS",64}, {"WPT",1} } }, } }, - { // NVIDIA Corporation GPUs - kDeviceTypeGPU, "NVIDIA Corporation", { - { "Tesla K40m", { {"VW",4}, {"WGS",128}, {"WPT",1} } }, - { "default", { {"VW",4}, {"WGS",128}, {"WPT",1} } }, + { // Intel CPUs + kDeviceTypeCPU, "Intel", { + { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"VW",1}, {"WGS",512}, {"WPT",1} } }, + { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { {"VW",4}, {"WGS",256}, {"WPT",1} } }, + { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { {"VW",1}, {"WGS",128}, {"WPT",1} } }, + { "default", { {"VW",1}, {"WGS",128}, {"WPT",1} } }, + } + }, + { // Intel GPUs + kDeviceTypeGPU, "Intel", { + { "Iris", { {"VW",1}, {"WGS",64}, {"WPT",1} } }, + { "default", { {"VW",1}, {"WGS",64}, {"WPT",1} } }, + } + }, + { // Intel accelerators + kDeviceTypeAccelerator, "Intel", { + { "Intel(R) Many Integrated Core Acceleration Card", { {"VW",2}, {"WGS",1024}, {"WPT",2} } }, + { "default", { {"VW",2}, {"WGS",1024}, {"WPT",2} } }, + } + }, + { // NVIDIA GPUs + kDeviceTypeGPU, "NVIDIA", { + { "GeForce GTX 480", { {"VW",4}, {"WGS",64}, {"WPT",1} } }, + { "GeForce GTX 680", { {"VW",2}, {"WGS",64}, {"WPT",1} } }, + { "GeForce GTX 750 Ti", { {"VW",1}, {"WGS",1024}, {"WPT",1} } }, + { "GeForce GTX 980", { {"VW",1}, {"WGS",1024}, {"WPT",1} } }, + { "GeForce GTX TITAN", { {"VW",4}, {"WGS",256}, {"WPT",1} } }, + { "GeForce GTX TITAN X", { {"VW",1}, {"WGS",64}, {"WPT",1} } }, + { "Tesla K20m", { {"VW",4}, {"WGS",128}, {"WPT",1} } }, + { "Tesla K40m", { {"VW",4}, {"WGS",128}, {"WPT",1} } }, + { "default", { {"VW",1}, {"WGS",64}, {"WPT",1} } }, } }, { // Default kDeviceTypeAll, "default", { - { "default", { {"VW",1}, {"WGS",64}, {"WPT",1} } }, + { "default", { {"VW",1}, {"WGS",64}, {"WPT",1} } }, } }, } @@ -40,21 +67,48 @@ const Database::DatabaseEntry Database::XaxpySingle = { const Database::DatabaseEntry Database::XaxpyComplexSingle = { "Xaxpy", Precision::kComplexSingle, { - { // Intel GPUs - kDeviceTypeGPU, "Intel", { - { "Iris", { {"VW",2}, {"WGS",128}, {"WPT",1} } }, - { "default", { {"VW",2}, {"WGS",128}, {"WPT",1} } }, + { // AMD GPUs + kDeviceTypeGPU, "AMD", { + { "Tahiti", { {"VW",1}, {"WGS",64}, {"WPT",1} } }, + { "default", { {"VW",1}, {"WGS",64}, {"WPT",1} } }, } }, - { // NVIDIA Corporation GPUs - kDeviceTypeGPU, "NVIDIA Corporation", { - { "Tesla K40m", { {"VW",1}, {"WGS",128}, {"WPT",1} } }, - { "default", { {"VW",1}, {"WGS",128}, {"WPT",1} } }, + { // Intel CPUs + kDeviceTypeCPU, "Intel", { + { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"VW",4}, {"WGS",256}, {"WPT",1} } }, + { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { {"VW",1}, {"WGS",1024}, {"WPT",2} } }, + { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { {"VW",2}, {"WGS",1024}, {"WPT",1} } }, + { "default", { {"VW",1}, {"WGS",256}, {"WPT",1} } }, + } + }, + { // Intel GPUs + kDeviceTypeGPU, "Intel", { + { "Iris", { {"VW",2}, {"WGS",128}, {"WPT",1} } }, + { "default", { {"VW",2}, {"WGS",128}, {"WPT",1} } }, + } + }, + { // Intel accelerators + kDeviceTypeAccelerator, "Intel", { + { "Intel(R) Many Integrated Core Acceleration Card", { {"VW",1}, {"WGS",1024}, {"WPT",1} } }, + { "default", { {"VW",1}, {"WGS",1024}, {"WPT",1} } }, + } + }, + { // NVIDIA GPUs + kDeviceTypeGPU, "NVIDIA", { + { "GeForce GTX 480", { {"VW",1}, {"WGS",256}, {"WPT",1} } }, + { "GeForce GTX 680", { {"VW",1}, {"WGS",256}, {"WPT",1} } }, + { "GeForce GTX 750 Ti", { {"VW",1}, {"WGS",512}, {"WPT",1} } }, + { "GeForce GTX 980", { {"VW",1}, {"WGS",64}, {"WPT",1} } }, + { "GeForce GTX TITAN", { {"VW",1}, {"WGS",256}, {"WPT",1} } }, + { "GeForce GTX TITAN X", { {"VW",1}, {"WGS",512}, {"WPT",1} } }, + { "Tesla K20m", { {"VW",1}, {"WGS",128}, {"WPT",1} } }, + { "Tesla K40m", { {"VW",1}, {"WGS",128}, {"WPT",1} } }, + { "default", { {"VW",1}, {"WGS",64}, {"WPT",1} } }, } }, { // Default kDeviceTypeAll, "default", { - { "default", { {"VW",1}, {"WGS",128}, {"WPT",1} } }, + { "default", { {"VW",1}, {"WGS",64}, {"WPT",1} } }, } }, } @@ -64,15 +118,42 @@ const Database::DatabaseEntry Database::XaxpyComplexSingle = { const Database::DatabaseEntry Database::XaxpyDouble = { "Xaxpy", Precision::kDouble, { - { // NVIDIA Corporation GPUs - kDeviceTypeGPU, "NVIDIA Corporation", { - { "Tesla K40m", { {"VW",2}, {"WGS",128}, {"WPT",1} } }, - { "default", { {"VW",2}, {"WGS",128}, {"WPT",1} } }, + { // AMD GPUs + kDeviceTypeGPU, "AMD", { + { "Tahiti", { {"VW",1}, {"WGS",64}, {"WPT",1} } }, + { "default", { {"VW",1}, {"WGS",64}, {"WPT",1} } }, + } + }, + { // Intel CPUs + kDeviceTypeCPU, "Intel", { + { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"VW",1}, {"WGS",1024}, {"WPT",1} } }, + { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { {"VW",8}, {"WGS",64}, {"WPT",1} } }, + { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { {"VW",8}, {"WGS",2048}, {"WPT",1} } }, + { "default", { {"VW",1}, {"WGS",64}, {"WPT",1} } }, + } + }, + { // Intel accelerators + kDeviceTypeAccelerator, "Intel", { + { "Intel(R) Many Integrated Core Acceleration Card", { {"VW",2}, {"WGS",512}, {"WPT",1} } }, + { "default", { {"VW",2}, {"WGS",512}, {"WPT",1} } }, + } + }, + { // NVIDIA GPUs + kDeviceTypeGPU, "NVIDIA", { + { "GeForce GTX 480", { {"VW",2}, {"WGS",64}, {"WPT",1} } }, + { "GeForce GTX 680", { {"VW",1}, {"WGS",64}, {"WPT",1} } }, + { "GeForce GTX 750 Ti", { {"VW",1}, {"WGS",64}, {"WPT",1} } }, + { "GeForce GTX 980", { {"VW",1}, {"WGS",256}, {"WPT",1} } }, + { "GeForce GTX TITAN", { {"VW",2}, {"WGS",1024}, {"WPT",1} } }, + { "GeForce GTX TITAN X", { {"VW",1}, {"WGS",512}, {"WPT",1} } }, + { "Tesla K20m", { {"VW",2}, {"WGS",128}, {"WPT",1} } }, + { "Tesla K40m", { {"VW",2}, {"WGS",128}, {"WPT",1} } }, + { "default", { {"VW",1}, {"WGS",128}, {"WPT",1} } }, } }, { // Default kDeviceTypeAll, "default", { - { "default", { {"VW",2}, {"WGS",128}, {"WPT",1} } }, + { "default", { {"VW",1}, {"WGS",128}, {"WPT",1} } }, } }, } @@ -82,15 +163,42 @@ const Database::DatabaseEntry Database::XaxpyDouble = { const Database::DatabaseEntry Database::XaxpyComplexDouble = { "Xaxpy", Precision::kComplexDouble, { - { // NVIDIA Corporation GPUs - kDeviceTypeGPU, "NVIDIA Corporation", { - { "Tesla K40m", { {"VW",1}, {"WGS",64}, {"WPT",1} } }, - { "default", { {"VW",1}, {"WGS",64}, {"WPT",1} } }, + { // AMD GPUs + kDeviceTypeGPU, "AMD", { + { "Tahiti", { {"VW",1}, {"WGS",128}, {"WPT",1} } }, + { "default", { {"VW",1}, {"WGS",128}, {"WPT",1} } }, + } + }, + { // Intel CPUs + kDeviceTypeCPU, "Intel", { + { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"VW",8}, {"WGS",128}, {"WPT",1} } }, + { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { {"VW",8}, {"WGS",512}, {"WPT",1} } }, + { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { {"VW",1}, {"WGS",256}, {"WPT",1} } }, + { "default", { {"VW",1}, {"WGS",128}, {"WPT",1} } }, + } + }, + { // Intel accelerators + kDeviceTypeAccelerator, "Intel", { + { "Intel(R) Many Integrated Core Acceleration Card", { {"VW",1}, {"WGS",1024}, {"WPT",1} } }, + { "default", { {"VW",1}, {"WGS",1024}, {"WPT",1} } }, + } + }, + { // NVIDIA GPUs + kDeviceTypeGPU, "NVIDIA", { + { "GeForce GTX 480", { {"VW",1}, {"WGS",128}, {"WPT",1} } }, + { "GeForce GTX 680", { {"VW",1}, {"WGS",64}, {"WPT",1} } }, + { "GeForce GTX 750 Ti", { {"VW",1}, {"WGS",256}, {"WPT",2} } }, + { "GeForce GTX 980", { {"VW",1}, {"WGS",1024}, {"WPT",1} } }, + { "GeForce GTX TITAN", { {"VW",1}, {"WGS",64}, {"WPT",4} } }, + { "GeForce GTX TITAN X", { {"VW",1}, {"WGS",1024}, {"WPT",1} } }, + { "Tesla K20m", { {"VW",1}, {"WGS",64}, {"WPT",1} } }, + { "Tesla K40m", { {"VW",1}, {"WGS",64}, {"WPT",1} } }, + { "default", { {"VW",1}, {"WGS",64}, {"WPT",1} } }, } }, { // Default kDeviceTypeAll, "default", { - { "default", { {"VW",1}, {"WGS",64}, {"WPT",1} } }, + { "default", { {"VW",1}, {"WGS",64}, {"WPT",1} } }, } }, } diff --git a/include/internal/database/xdot.h b/include/internal/database/xdot.h index 66a5231e..a80398d7 100644 --- a/include/internal/database/xdot.h +++ b/include/internal/database/xdot.h @@ -16,21 +16,48 @@ namespace clblast { const Database::DatabaseEntry Database::XdotSingle = { "Xdot", Precision::kSingle, { - { // Intel GPUs - kDeviceTypeGPU, "Intel", { - { "Iris", { {"VW",1}, {"WGS1",512}, {"WGS2",32} } }, - { "default", { {"VW",1}, {"WGS1",512}, {"WGS2",32} } }, + { // AMD GPUs + kDeviceTypeGPU, "AMD", { + { "Tahiti", { {"VW",1}, {"WGS1",256}, {"WGS2",256} } }, + { "default", { {"VW",1}, {"WGS1",256}, {"WGS2",256} } }, } }, - { // NVIDIA Corporation GPUs - kDeviceTypeGPU, "NVIDIA Corporation", { - { "Tesla K40m", { {"VW",1}, {"WGS1",128}, {"WGS2",1024} } }, - { "default", { {"VW",1}, {"WGS1",128}, {"WGS2",1024} } }, + { // Intel CPUs + kDeviceTypeCPU, "Intel", { + { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"VW",1}, {"WGS1",512}, {"WGS2",1024} } }, + { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { {"VW",1}, {"WGS1",64}, {"WGS2",1024} } }, + { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { {"VW",1}, {"WGS1",512}, {"WGS2",1024} } }, + { "default", { {"VW",1}, {"WGS1",64}, {"WGS2",1024} } }, + } + }, + { // Intel GPUs + kDeviceTypeGPU, "Intel", { + { "Iris", { {"VW",1}, {"WGS1",512}, {"WGS2",32} } }, + { "default", { {"VW",1}, {"WGS1",512}, {"WGS2",32} } }, + } + }, + { // Intel accelerators + kDeviceTypeAccelerator, "Intel", { + { "Intel(R) Many Integrated Core Acceleration Card", { {"VW",1}, {"WGS1",64}, {"WGS2",1024} } }, + { "default", { {"VW",1}, {"WGS1",64}, {"WGS2",1024} } }, + } + }, + { // NVIDIA GPUs + kDeviceTypeGPU, "NVIDIA", { + { "GeForce GTX 480", { {"VW",1}, {"WGS1",256}, {"WGS2",128} } }, + { "GeForce GTX 680", { {"VW",1}, {"WGS1",128}, {"WGS2",128} } }, + { "GeForce GTX 750 Ti", { {"VW",1}, {"WGS1",64}, {"WGS2",1024} } }, + { "GeForce GTX 980", { {"VW",1}, {"WGS1",64}, {"WGS2",1024} } }, + { "GeForce GTX TITAN", { {"VW",1}, {"WGS1",128}, {"WGS2",1024} } }, + { "GeForce GTX TITAN X", { {"VW",1}, {"WGS1",64}, {"WGS2",1024} } }, + { "Tesla K20m", { {"VW",1}, {"WGS1",128}, {"WGS2",1024} } }, + { "Tesla K40m", { {"VW",1}, {"WGS1",128}, {"WGS2",1024} } }, + { "default", { {"VW",1}, {"WGS1",64}, {"WGS2",128} } }, } }, { // Default kDeviceTypeAll, "default", { - { "default", { {"VW",1}, {"WGS1",128}, {"WGS2",32} } }, + { "default", { {"VW",1}, {"WGS1",64}, {"WGS2",32} } }, } }, } @@ -40,21 +67,48 @@ const Database::DatabaseEntry Database::XdotSingle = { const Database::DatabaseEntry Database::XdotComplexSingle = { "Xdot", Precision::kComplexSingle, { - { // Intel GPUs - kDeviceTypeGPU, "Intel", { - { "Iris", { {"VW",1}, {"WGS1",512}, {"WGS2",1024} } }, - { "default", { {"VW",1}, {"WGS1",512}, {"WGS2",1024} } }, + { // AMD GPUs + kDeviceTypeGPU, "AMD", { + { "Tahiti", { {"VW",1}, {"WGS1",64}, {"WGS2",256} } }, + { "default", { {"VW",1}, {"WGS1",64}, {"WGS2",256} } }, } }, - { // NVIDIA Corporation GPUs - kDeviceTypeGPU, "NVIDIA Corporation", { - { "Tesla K40m", { {"VW",1}, {"WGS1",128}, {"WGS2",1024} } }, - { "default", { {"VW",1}, {"WGS1",128}, {"WGS2",1024} } }, + { // Intel CPUs + kDeviceTypeCPU, "Intel", { + { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"VW",1}, {"WGS1",256}, {"WGS2",1024} } }, + { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { {"VW",1}, {"WGS1",64}, {"WGS2",1024} } }, + { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { {"VW",1}, {"WGS1",512}, {"WGS2",1024} } }, + { "default", { {"VW",1}, {"WGS1",64}, {"WGS2",1024} } }, + } + }, + { // Intel GPUs + kDeviceTypeGPU, "Intel", { + { "Iris", { {"VW",1}, {"WGS1",512}, {"WGS2",1024} } }, + { "default", { {"VW",1}, {"WGS1",512}, {"WGS2",1024} } }, + } + }, + { // Intel accelerators + kDeviceTypeAccelerator, "Intel", { + { "Intel(R) Many Integrated Core Acceleration Card", { {"VW",1}, {"WGS1",64}, {"WGS2",1024} } }, + { "default", { {"VW",1}, {"WGS1",64}, {"WGS2",1024} } }, + } + }, + { // NVIDIA GPUs + kDeviceTypeGPU, "NVIDIA", { + { "GeForce GTX 480", { {"VW",1}, {"WGS1",512}, {"WGS2",512} } }, + { "GeForce GTX 680", { {"VW",1}, {"WGS1",256}, {"WGS2",32} } }, + { "GeForce GTX 750 Ti", { {"VW",1}, {"WGS1",128}, {"WGS2",32} } }, + { "GeForce GTX 980", { {"VW",1}, {"WGS1",64}, {"WGS2",1024} } }, + { "GeForce GTX TITAN", { {"VW",1}, {"WGS1",128}, {"WGS2",1024} } }, + { "GeForce GTX TITAN X", { {"VW",1}, {"WGS1",128}, {"WGS2",1024} } }, + { "Tesla K20m", { {"VW",1}, {"WGS1",256}, {"WGS2",512} } }, + { "Tesla K40m", { {"VW",1}, {"WGS1",128}, {"WGS2",1024} } }, + { "default", { {"VW",1}, {"WGS1",64}, {"WGS2",32} } }, } }, { // Default kDeviceTypeAll, "default", { - { "default", { {"VW",1}, {"WGS1",128}, {"WGS2",1024} } }, + { "default", { {"VW",1}, {"WGS1",64}, {"WGS2",32} } }, } }, } @@ -64,15 +118,42 @@ const Database::DatabaseEntry Database::XdotComplexSingle = { const Database::DatabaseEntry Database::XdotDouble = { "Xdot", Precision::kDouble, { - { // NVIDIA Corporation GPUs - kDeviceTypeGPU, "NVIDIA Corporation", { - { "Tesla K40m", { {"VW",1}, {"WGS1",256}, {"WGS2",1024} } }, - { "default", { {"VW",1}, {"WGS1",256}, {"WGS2",1024} } }, + { // AMD GPUs + kDeviceTypeGPU, "AMD", { + { "Tahiti", { {"VW",1}, {"WGS1",64}, {"WGS2",256} } }, + { "default", { {"VW",1}, {"WGS1",64}, {"WGS2",256} } }, + } + }, + { // Intel CPUs + kDeviceTypeCPU, "Intel", { + { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"VW",1}, {"WGS1",512}, {"WGS2",512} } }, + { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { {"VW",1}, {"WGS1",64}, {"WGS2",1024} } }, + { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { {"VW",1}, {"WGS1",1024}, {"WGS2",512} } }, + { "default", { {"VW",1}, {"WGS1",64}, {"WGS2",512} } }, + } + }, + { // Intel accelerators + kDeviceTypeAccelerator, "Intel", { + { "Intel(R) Many Integrated Core Acceleration Card", { {"VW",1}, {"WGS1",512}, {"WGS2",1024} } }, + { "default", { {"VW",1}, {"WGS1",512}, {"WGS2",1024} } }, + } + }, + { // NVIDIA GPUs + kDeviceTypeGPU, "NVIDIA", { + { "GeForce GTX 480", { {"VW",1}, {"WGS1",512}, {"WGS2",1024} } }, + { "GeForce GTX 680", { {"VW",1}, {"WGS1",64}, {"WGS2",128} } }, + { "GeForce GTX 750 Ti", { {"VW",1}, {"WGS1",64}, {"WGS2",1024} } }, + { "GeForce GTX 980", { {"VW",1}, {"WGS1",32}, {"WGS2",512} } }, + { "GeForce GTX TITAN", { {"VW",1}, {"WGS1",128}, {"WGS2",1024} } }, + { "GeForce GTX TITAN X", { {"VW",1}, {"WGS1",128}, {"WGS2",128} } }, + { "Tesla K20m", { {"VW",1}, {"WGS1",128}, {"WGS2",1024} } }, + { "Tesla K40m", { {"VW",1}, {"WGS1",256}, {"WGS2",1024} } }, + { "default", { {"VW",1}, {"WGS1",32}, {"WGS2",128} } }, } }, { // Default kDeviceTypeAll, "default", { - { "default", { {"VW",1}, {"WGS1",256}, {"WGS2",1024} } }, + { "default", { {"VW",1}, {"WGS1",32}, {"WGS2",128} } }, } }, } @@ -82,15 +163,42 @@ const Database::DatabaseEntry Database::XdotDouble = { const Database::DatabaseEntry Database::XdotComplexDouble = { "Xdot", Precision::kComplexDouble, { - { // NVIDIA Corporation GPUs - kDeviceTypeGPU, "NVIDIA Corporation", { - { "Tesla K40m", { {"VW",1}, {"WGS1",128}, {"WGS2",1024} } }, - { "default", { {"VW",1}, {"WGS1",128}, {"WGS2",1024} } }, + { // AMD GPUs + kDeviceTypeGPU, "AMD", { + { "Tahiti", { {"VW",1}, {"WGS1",64}, {"WGS2",256} } }, + { "default", { {"VW",1}, {"WGS1",64}, {"WGS2",256} } }, + } + }, + { // Intel CPUs + kDeviceTypeCPU, "Intel", { + { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"VW",1}, {"WGS1",256}, {"WGS2",1024} } }, + { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { {"VW",1}, {"WGS1",64}, {"WGS2",1024} } }, + { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { {"VW",1}, {"WGS1",512}, {"WGS2",1024} } }, + { "default", { {"VW",1}, {"WGS1",64}, {"WGS2",1024} } }, + } + }, + { // Intel accelerators + kDeviceTypeAccelerator, "Intel", { + { "Intel(R) Many Integrated Core Acceleration Card", { {"VW",1}, {"WGS1",32}, {"WGS2",1024} } }, + { "default", { {"VW",1}, {"WGS1",32}, {"WGS2",1024} } }, + } + }, + { // NVIDIA GPUs + kDeviceTypeGPU, "NVIDIA", { + { "GeForce GTX 480", { {"VW",1}, {"WGS1",512}, {"WGS2",512} } }, + { "GeForce GTX 680", { {"VW",1}, {"WGS1",256}, {"WGS2",64} } }, + { "GeForce GTX 750 Ti", { {"VW",1}, {"WGS1",32}, {"WGS2",64} } }, + { "GeForce GTX 980", { {"VW",1}, {"WGS1",32}, {"WGS2",128} } }, + { "GeForce GTX TITAN", { {"VW",1}, {"WGS1",128}, {"WGS2",512} } }, + { "GeForce GTX TITAN X", { {"VW",1}, {"WGS1",128}, {"WGS2",128} } }, + { "Tesla K20m", { {"VW",1}, {"WGS1",128}, {"WGS2",1024} } }, + { "Tesla K40m", { {"VW",1}, {"WGS1",128}, {"WGS2",1024} } }, + { "default", { {"VW",1}, {"WGS1",32}, {"WGS2",64} } }, } }, { // Default kDeviceTypeAll, "default", { - { "default", { {"VW",1}, {"WGS1",128}, {"WGS2",1024} } }, + { "default", { {"VW",1}, {"WGS1",32}, {"WGS2",64} } }, } }, } diff --git a/include/internal/database/xgemm.h b/include/internal/database/xgemm.h index 9fbd8fbb..1197bc0a 100644 --- a/include/internal/database/xgemm.h +++ b/include/internal/database/xgemm.h @@ -16,21 +16,48 @@ namespace clblast { const Database::DatabaseEntry Database::XgemmSingle = { "Xgemm", Precision::kSingle, { - { // Intel GPUs - kDeviceTypeGPU, "Intel", { - { "Iris", { {"KWG",16}, {"KWI",8}, {"MDIMA",16}, {"MDIMC",8}, {"MWG",128}, {"NDIMB",32}, {"NDIMC",16}, {"NWG",64}, {"SA",1}, {"SB",1}, {"STRM",1}, {"STRN",1}, {"VWM",4}, {"VWN",1} } }, - { "default", { {"KWG",16}, {"KWI",8}, {"MDIMA",16}, {"MDIMC",8}, {"MWG",128}, {"NDIMB",32}, {"NDIMC",16}, {"NWG",64}, {"SA",1}, {"SB",1}, {"STRM",1}, {"STRN",1}, {"VWM",4}, {"VWN",1} } }, + { // AMD GPUs + kDeviceTypeGPU, "AMD", { + { "Tahiti", { {"KWG",32}, {"KWI",2}, {"MDIMA",16}, {"MDIMC",32}, {"MWG",128}, {"NDIMB",16}, {"NDIMC",8}, {"NWG",64}, {"SA",0}, {"SB",0}, {"STRM",0}, {"STRN",0}, {"VWM",4}, {"VWN",1} } }, + { "default", { {"KWG",32}, {"KWI",2}, {"MDIMA",16}, {"MDIMC",32}, {"MWG",128}, {"NDIMB",16}, {"NDIMC",8}, {"NWG",64}, {"SA",0}, {"SB",0}, {"STRM",0}, {"STRN",0}, {"VWM",4}, {"VWN",1} } }, } }, - { // NVIDIA Corporation GPUs - kDeviceTypeGPU, "NVIDIA Corporation", { - { "Tesla K40m", { {"KWG",16}, {"KWI",8}, {"MDIMA",16}, {"MDIMC",8}, {"MWG",64}, {"NDIMB",16}, {"NDIMC",16}, {"NWG",128}, {"SA",1}, {"SB",1}, {"STRM",1}, {"STRN",0}, {"VWM",2}, {"VWN",4} } }, - { "default", { {"KWG",16}, {"KWI",8}, {"MDIMA",16}, {"MDIMC",8}, {"MWG",64}, {"NDIMB",16}, {"NDIMC",16}, {"NWG",128}, {"SA",1}, {"SB",1}, {"STRM",1}, {"STRN",0}, {"VWM",2}, {"VWN",4} } }, + { // Intel CPUs + kDeviceTypeCPU, "Intel", { + { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"KWG",32}, {"KWI",8}, {"MDIMA",32}, {"MDIMC",32}, {"MWG",64}, {"NDIMB",32}, {"NDIMC",16}, {"NWG",64}, {"SA",1}, {"SB",1}, {"STRM",1}, {"STRN",0}, {"VWM",2}, {"VWN",2} } }, + { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { {"KWG",32}, {"KWI",2}, {"MDIMA",32}, {"MDIMC",8}, {"MWG",128}, {"NDIMB",8}, {"NDIMC",8}, {"NWG",128}, {"SA",1}, {"SB",1}, {"STRM",1}, {"STRN",1}, {"VWM",2}, {"VWN",8} } }, + { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { {"KWG",32}, {"KWI",8}, {"MDIMA",16}, {"MDIMC",16}, {"MWG",64}, {"NDIMB",32}, {"NDIMC",32}, {"NWG",64}, {"SA",0}, {"SB",1}, {"STRM",1}, {"STRN",0}, {"VWM",1}, {"VWN",2} } }, + { "default", { {"KWG",32}, {"KWI",2}, {"MDIMA",16}, {"MDIMC",8}, {"MWG",64}, {"NDIMB",8}, {"NDIMC",8}, {"NWG",64}, {"SA",0}, {"SB",1}, {"STRM",1}, {"STRN",0}, {"VWM",1}, {"VWN",2} } }, + } + }, + { // Intel GPUs + kDeviceTypeGPU, "Intel", { + { "Iris", { {"KWG",16}, {"KWI",8}, {"MDIMA",16}, {"MDIMC",8}, {"MWG",128}, {"NDIMB",32}, {"NDIMC",16}, {"NWG",64}, {"SA",1}, {"SB",1}, {"STRM",1}, {"STRN",1}, {"VWM",4}, {"VWN",1} } }, + { "default", { {"KWG",16}, {"KWI",8}, {"MDIMA",16}, {"MDIMC",8}, {"MWG",128}, {"NDIMB",32}, {"NDIMC",16}, {"NWG",64}, {"SA",1}, {"SB",1}, {"STRM",1}, {"STRN",1}, {"VWM",4}, {"VWN",1} } }, + } + }, + { // Intel accelerators + kDeviceTypeAccelerator, "Intel", { + { "Intel(R) Many Integrated Core Acceleration Card", { {"KWG",32}, {"KWI",2}, {"MDIMA",32}, {"MDIMC",32}, {"MWG",32}, {"NDIMB",32}, {"NDIMC",8}, {"NWG",128}, {"SA",0}, {"SB",0}, {"STRM",1}, {"STRN",0}, {"VWM",1}, {"VWN",4} } }, + { "default", { {"KWG",32}, {"KWI",2}, {"MDIMA",32}, {"MDIMC",32}, {"MWG",32}, {"NDIMB",32}, {"NDIMC",8}, {"NWG",128}, {"SA",0}, {"SB",0}, {"STRM",1}, {"STRN",0}, {"VWM",1}, {"VWN",4} } }, + } + }, + { // NVIDIA GPUs + kDeviceTypeGPU, "NVIDIA", { + { "GeForce GTX 480", { {"KWG",16}, {"KWI",2}, {"MDIMA",16}, {"MDIMC",8}, {"MWG",64}, {"NDIMB",32}, {"NDIMC",16}, {"NWG",64}, {"SA",1}, {"SB",1}, {"STRM",1}, {"STRN",1}, {"VWM",2}, {"VWN",2} } }, + { "GeForce GTX 680", { {"KWG",32}, {"KWI",8}, {"MDIMA",8}, {"MDIMC",16}, {"MWG",64}, {"NDIMB",32}, {"NDIMC",16}, {"NWG",128}, {"SA",1}, {"SB",1}, {"STRM",0}, {"STRN",0}, {"VWM",4}, {"VWN",2} } }, + { "GeForce GTX 750 Ti", { {"KWG",16}, {"KWI",2}, {"MDIMA",32}, {"MDIMC",16}, {"MWG",128}, {"NDIMB",16}, {"NDIMC",8}, {"NWG",128}, {"SA",1}, {"SB",1}, {"STRM",1}, {"STRN",1}, {"VWM",4}, {"VWN",4} } }, + { "GeForce GTX 980", { {"KWG",16}, {"KWI",2}, {"MDIMA",16}, {"MDIMC",16}, {"MWG",64}, {"NDIMB",16}, {"NDIMC",8}, {"NWG",128}, {"SA",1}, {"SB",1}, {"STRM",1}, {"STRN",0}, {"VWM",4}, {"VWN",8} } }, + { "GeForce GTX TITAN", { {"KWG",16}, {"KWI",8}, {"MDIMA",32}, {"MDIMC",16}, {"MWG",64}, {"NDIMB",8}, {"NDIMC",8}, {"NWG",64}, {"SA",1}, {"SB",1}, {"STRM",1}, {"STRN",0}, {"VWM",2}, {"VWN",2} } }, + { "GeForce GTX TITAN X", { {"KWG",16}, {"KWI",2}, {"MDIMA",8}, {"MDIMC",16}, {"MWG",128}, {"NDIMB",8}, {"NDIMC",8}, {"NWG",128}, {"SA",1}, {"SB",1}, {"STRM",1}, {"STRN",1}, {"VWM",4}, {"VWN",8} } }, + { "Tesla K20m", { {"KWG",16}, {"KWI",2}, {"MDIMA",32}, {"MDIMC",16}, {"MWG",64}, {"NDIMB",16}, {"NDIMC",8}, {"NWG",64}, {"SA",1}, {"SB",1}, {"STRM",1}, {"STRN",0}, {"VWM",2}, {"VWN",4} } }, + { "Tesla K40m", { {"KWG",16}, {"KWI",8}, {"MDIMA",16}, {"MDIMC",8}, {"MWG",64}, {"NDIMB",16}, {"NDIMC",16}, {"NWG",128}, {"SA",1}, {"SB",1}, {"STRM",1}, {"STRN",0}, {"VWM",2}, {"VWN",4} } }, + { "default", { {"KWG",16}, {"KWI",2}, {"MDIMA",8}, {"MDIMC",16}, {"MWG",64}, {"NDIMB",8}, {"NDIMC",8}, {"NWG",64}, {"SA",1}, {"SB",1}, {"STRM",1}, {"STRN",0}, {"VWM",2}, {"VWN",4} } }, } }, { // Default kDeviceTypeAll, "default", { - { "default", { {"KWG",16}, {"KWI",8}, {"MDIMA",16}, {"MDIMC",8}, {"MWG",64}, {"NDIMB",16}, {"NDIMC",16}, {"NWG",64}, {"SA",1}, {"SB",1}, {"STRM",1}, {"STRN",0}, {"VWM",2}, {"VWN",1} } }, + { "default", { {"KWG",16}, {"KWI",2}, {"MDIMA",8}, {"MDIMC",16}, {"MWG",64}, {"NDIMB",8}, {"NDIMC",8}, {"NWG",64}, {"SA",1}, {"SB",1}, {"STRM",1}, {"STRN",0}, {"VWM",2}, {"VWN",4} } }, } }, } @@ -40,21 +67,48 @@ const Database::DatabaseEntry Database::XgemmSingle = { const Database::DatabaseEntry Database::XgemmComplexSingle = { "Xgemm", Precision::kComplexSingle, { - { // Intel GPUs - kDeviceTypeGPU, "Intel", { - { "Iris", { {"KWG",32}, {"KWI",8}, {"MDIMA",32}, {"MDIMC",16}, {"MWG",64}, {"NDIMB",8}, {"NDIMC",16}, {"NWG",64}, {"SA",1}, {"SB",0}, {"STRM",1}, {"STRN",0}, {"VWM",1}, {"VWN",1} } }, - { "default", { {"KWG",32}, {"KWI",8}, {"MDIMA",32}, {"MDIMC",16}, {"MWG",64}, {"NDIMB",8}, {"NDIMC",16}, {"NWG",64}, {"SA",1}, {"SB",0}, {"STRM",1}, {"STRN",0}, {"VWM",1}, {"VWN",1} } }, + { // AMD GPUs + kDeviceTypeGPU, "AMD", { + { "Tahiti", { {"KWG",16}, {"KWI",2}, {"MDIMA",8}, {"MDIMC",8}, {"MWG",32}, {"NDIMB",8}, {"NDIMC",16}, {"NWG",32}, {"SA",1}, {"SB",0}, {"STRM",0}, {"STRN",1}, {"VWM",2}, {"VWN",1} } }, + { "default", { {"KWG",16}, {"KWI",2}, {"MDIMA",8}, {"MDIMC",8}, {"MWG",32}, {"NDIMB",8}, {"NDIMC",16}, {"NWG",32}, {"SA",1}, {"SB",0}, {"STRM",0}, {"STRN",1}, {"VWM",2}, {"VWN",1} } }, } }, - { // NVIDIA Corporation GPUs - kDeviceTypeGPU, "NVIDIA Corporation", { - { "Tesla K40m", { {"KWG",16}, {"KWI",2}, {"MDIMA",32}, {"MDIMC",32}, {"MWG",32}, {"NDIMB",32}, {"NDIMC",8}, {"NWG",64}, {"SA",0}, {"SB",1}, {"STRM",0}, {"STRN",0}, {"VWM",1}, {"VWN",1} } }, - { "default", { {"KWG",16}, {"KWI",2}, {"MDIMA",32}, {"MDIMC",32}, {"MWG",32}, {"NDIMB",32}, {"NDIMC",8}, {"NWG",64}, {"SA",0}, {"SB",1}, {"STRM",0}, {"STRN",0}, {"VWM",1}, {"VWN",1} } }, + { // Intel CPUs + kDeviceTypeCPU, "Intel", { + { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"KWG",32}, {"KWI",2}, {"MDIMA",32}, {"MDIMC",16}, {"MWG",32}, {"NDIMB",16}, {"NDIMC",16}, {"NWG",64}, {"SA",0}, {"SB",1}, {"STRM",1}, {"STRN",0}, {"VWM",1}, {"VWN",2} } }, + { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { {"KWG",32}, {"KWI",2}, {"MDIMA",8}, {"MDIMC",8}, {"MWG",128}, {"NDIMB",16}, {"NDIMC",32}, {"NWG",128}, {"SA",0}, {"SB",0}, {"STRM",0}, {"STRN",0}, {"VWM",1}, {"VWN",4} } }, + { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { {"KWG",32}, {"KWI",2}, {"MDIMA",8}, {"MDIMC",16}, {"MWG",16}, {"NDIMB",16}, {"NDIMC",16}, {"NWG",128}, {"SA",0}, {"SB",0}, {"STRM",1}, {"STRN",1}, {"VWM",1}, {"VWN",4} } }, + { "default", { {"KWG",32}, {"KWI",2}, {"MDIMA",8}, {"MDIMC",8}, {"MWG",16}, {"NDIMB",16}, {"NDIMC",16}, {"NWG",64}, {"SA",0}, {"SB",0}, {"STRM",0}, {"STRN",0}, {"VWM",1}, {"VWN",2} } }, + } + }, + { // Intel GPUs + kDeviceTypeGPU, "Intel", { + { "Iris", { {"KWG",32}, {"KWI",8}, {"MDIMA",32}, {"MDIMC",16}, {"MWG",64}, {"NDIMB",8}, {"NDIMC",16}, {"NWG",64}, {"SA",1}, {"SB",0}, {"STRM",1}, {"STRN",0}, {"VWM",1}, {"VWN",1} } }, + { "default", { {"KWG",32}, {"KWI",8}, {"MDIMA",32}, {"MDIMC",16}, {"MWG",64}, {"NDIMB",8}, {"NDIMC",16}, {"NWG",64}, {"SA",1}, {"SB",0}, {"STRM",1}, {"STRN",0}, {"VWM",1}, {"VWN",1} } }, + } + }, + { // Intel accelerators + kDeviceTypeAccelerator, "Intel", { + { "Intel(R) Many Integrated Core Acceleration Card", { {"KWG",32}, {"KWI",2}, {"MDIMA",32}, {"MDIMC",32}, {"MWG",32}, {"NDIMB",32}, {"NDIMC",16}, {"NWG",128}, {"SA",1}, {"SB",0}, {"STRM",0}, {"STRN",0}, {"VWM",1}, {"VWN",4} } }, + { "default", { {"KWG",32}, {"KWI",2}, {"MDIMA",32}, {"MDIMC",32}, {"MWG",32}, {"NDIMB",32}, {"NDIMC",16}, {"NWG",128}, {"SA",1}, {"SB",0}, {"STRM",0}, {"STRN",0}, {"VWM",1}, {"VWN",4} } }, + } + }, + { // NVIDIA GPUs + kDeviceTypeGPU, "NVIDIA", { + { "GeForce GTX 480", { {"KWG",16}, {"KWI",2}, {"MDIMA",16}, {"MDIMC",16}, {"MWG",32}, {"NDIMB",32}, {"NDIMC",16}, {"NWG",128}, {"SA",0}, {"SB",1}, {"STRM",1}, {"STRN",1}, {"VWM",2}, {"VWN",2} } }, + { "GeForce GTX 680", { {"KWG",16}, {"KWI",2}, {"MDIMA",32}, {"MDIMC",16}, {"MWG",64}, {"NDIMB",32}, {"NDIMC",32}, {"NWG",128}, {"SA",1}, {"SB",0}, {"STRM",0}, {"STRN",0}, {"VWM",2}, {"VWN",2} } }, + { "GeForce GTX 750 Ti", { {"KWG",16}, {"KWI",2}, {"MDIMA",32}, {"MDIMC",8}, {"MWG",128}, {"NDIMB",16}, {"NDIMC",32}, {"NWG",128}, {"SA",1}, {"SB",1}, {"STRM",1}, {"STRN",0}, {"VWM",1}, {"VWN",4} } }, + { "GeForce GTX 980", { {"KWG",32}, {"KWI",8}, {"MDIMA",32}, {"MDIMC",32}, {"MWG",64}, {"NDIMB",16}, {"NDIMC",16}, {"NWG",64}, {"SA",1}, {"SB",1}, {"STRM",1}, {"STRN",0}, {"VWM",2}, {"VWN",1} } }, + { "GeForce GTX TITAN", { {"KWG",16}, {"KWI",8}, {"MDIMA",16}, {"MDIMC",16}, {"MWG",64}, {"NDIMB",32}, {"NDIMC",16}, {"NWG",64}, {"SA",1}, {"SB",1}, {"STRM",1}, {"STRN",0}, {"VWM",1}, {"VWN",1} } }, + { "GeForce GTX TITAN X", { {"KWG",16}, {"KWI",2}, {"MDIMA",8}, {"MDIMC",8}, {"MWG",64}, {"NDIMB",8}, {"NDIMC",8}, {"NWG",32}, {"SA",1}, {"SB",0}, {"STRM",1}, {"STRN",1}, {"VWM",1}, {"VWN",4} } }, + { "Tesla K20m", { {"KWG",32}, {"KWI",2}, {"MDIMA",8}, {"MDIMC",16}, {"MWG",64}, {"NDIMB",8}, {"NDIMC",16}, {"NWG",64}, {"SA",1}, {"SB",0}, {"STRM",0}, {"STRN",0}, {"VWM",1}, {"VWN",4} } }, + { "Tesla K40m", { {"KWG",16}, {"KWI",2}, {"MDIMA",32}, {"MDIMC",32}, {"MWG",32}, {"NDIMB",32}, {"NDIMC",8}, {"NWG",64}, {"SA",0}, {"SB",1}, {"STRM",0}, {"STRN",0}, {"VWM",1}, {"VWN",1} } }, + { "default", { {"KWG",16}, {"KWI",2}, {"MDIMA",8}, {"MDIMC",8}, {"MWG",64}, {"NDIMB",8}, {"NDIMC",8}, {"NWG",32}, {"SA",1}, {"SB",0}, {"STRM",0}, {"STRN",0}, {"VWM",1}, {"VWN",4} } }, } }, { // Default kDeviceTypeAll, "default", { - { "default", { {"KWG",16}, {"KWI",2}, {"MDIMA",32}, {"MDIMC",16}, {"MWG",32}, {"NDIMB",8}, {"NDIMC",8}, {"NWG",64}, {"SA",0}, {"SB",0}, {"STRM",0}, {"STRN",0}, {"VWM",1}, {"VWN",1} } }, + { "default", { {"KWG",16}, {"KWI",2}, {"MDIMA",8}, {"MDIMC",8}, {"MWG",64}, {"NDIMB",8}, {"NDIMC",8}, {"NWG",32}, {"SA",1}, {"SB",0}, {"STRM",0}, {"STRN",0}, {"VWM",1}, {"VWN",4} } }, } }, } @@ -64,15 +118,42 @@ const Database::DatabaseEntry Database::XgemmComplexSingle = { const Database::DatabaseEntry Database::XgemmDouble = { "Xgemm", Precision::kDouble, { - { // NVIDIA Corporation GPUs - kDeviceTypeGPU, "NVIDIA Corporation", { - { "Tesla K40m", { {"KWG",32}, {"KWI",2}, {"MDIMA",16}, {"MDIMC",8}, {"MWG",64}, {"NDIMB",16}, {"NDIMC",32}, {"NWG",128}, {"SA",1}, {"SB",0}, {"STRM",1}, {"STRN",1}, {"VWM",2}, {"VWN",4} } }, - { "default", { {"KWG",32}, {"KWI",2}, {"MDIMA",16}, {"MDIMC",8}, {"MWG",64}, {"NDIMB",16}, {"NDIMC",32}, {"NWG",128}, {"SA",1}, {"SB",0}, {"STRM",1}, {"STRN",1}, {"VWM",2}, {"VWN",4} } }, + { // AMD GPUs + kDeviceTypeGPU, "AMD", { + { "Tahiti", { {"KWG",32}, {"KWI",2}, {"MDIMA",16}, {"MDIMC",8}, {"MWG",16}, {"NDIMB",8}, {"NDIMC",8}, {"NWG",32}, {"SA",0}, {"SB",0}, {"STRM",0}, {"STRN",1}, {"VWM",1}, {"VWN",4} } }, + { "default", { {"KWG",32}, {"KWI",2}, {"MDIMA",16}, {"MDIMC",8}, {"MWG",16}, {"NDIMB",8}, {"NDIMC",8}, {"NWG",32}, {"SA",0}, {"SB",0}, {"STRM",0}, {"STRN",1}, {"VWM",1}, {"VWN",4} } }, + } + }, + { // Intel CPUs + kDeviceTypeCPU, "Intel", { + { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"KWG",32}, {"KWI",2}, {"MDIMA",32}, {"MDIMC",16}, {"MWG",128}, {"NDIMB",16}, {"NDIMC",16}, {"NWG",64}, {"SA",0}, {"SB",1}, {"STRM",1}, {"STRN",0}, {"VWM",1}, {"VWN",2} } }, + { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { {"KWG",32}, {"KWI",2}, {"MDIMA",16}, {"MDIMC",8}, {"MWG",128}, {"NDIMB",8}, {"NDIMC",8}, {"NWG",64}, {"SA",1}, {"SB",0}, {"STRM",0}, {"STRN",1}, {"VWM",2}, {"VWN",8} } }, + { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { {"KWG",32}, {"KWI",2}, {"MDIMA",8}, {"MDIMC",16}, {"MWG",128}, {"NDIMB",16}, {"NDIMC",8}, {"NWG",128}, {"SA",0}, {"SB",0}, {"STRM",1}, {"STRN",1}, {"VWM",1}, {"VWN",8} } }, + { "default", { {"KWG",32}, {"KWI",2}, {"MDIMA",8}, {"MDIMC",8}, {"MWG",128}, {"NDIMB",8}, {"NDIMC",8}, {"NWG",64}, {"SA",0}, {"SB",0}, {"STRM",0}, {"STRN",0}, {"VWM",1}, {"VWN",2} } }, + } + }, + { // Intel accelerators + kDeviceTypeAccelerator, "Intel", { + { "Intel(R) Many Integrated Core Acceleration Card", { {"KWG",32}, {"KWI",8}, {"MDIMA",8}, {"MDIMC",16}, {"MWG",16}, {"NDIMB",16}, {"NDIMC",16}, {"NWG",128}, {"SA",0}, {"SB",0}, {"STRM",1}, {"STRN",0}, {"VWM",1}, {"VWN",4} } }, + { "default", { {"KWG",32}, {"KWI",8}, {"MDIMA",8}, {"MDIMC",16}, {"MWG",16}, {"NDIMB",16}, {"NDIMC",16}, {"NWG",128}, {"SA",0}, {"SB",0}, {"STRM",1}, {"STRN",0}, {"VWM",1}, {"VWN",4} } }, + } + }, + { // NVIDIA GPUs + kDeviceTypeGPU, "NVIDIA", { + { "GeForce GTX 480", { {"KWG",16}, {"KWI",2}, {"MDIMA",8}, {"MDIMC",16}, {"MWG",32}, {"NDIMB",32}, {"NDIMC",8}, {"NWG",64}, {"SA",1}, {"SB",1}, {"STRM",1}, {"STRN",0}, {"VWM",1}, {"VWN",2} } }, + { "GeForce GTX 680", { {"KWG",32}, {"KWI",8}, {"MDIMA",8}, {"MDIMC",8}, {"MWG",32}, {"NDIMB",16}, {"NDIMC",32}, {"NWG",128}, {"SA",1}, {"SB",0}, {"STRM",0}, {"STRN",1}, {"VWM",2}, {"VWN",4} } }, + { "GeForce GTX 750 Ti", { {"KWG",16}, {"KWI",2}, {"MDIMA",16}, {"MDIMC",8}, {"MWG",32}, {"NDIMB",16}, {"NDIMC",32}, {"NWG",32}, {"SA",0}, {"SB",0}, {"STRM",1}, {"STRN",1}, {"VWM",2}, {"VWN",1} } }, + { "GeForce GTX 980", { {"KWG",32}, {"KWI",8}, {"MDIMA",16}, {"MDIMC",8}, {"MWG",64}, {"NDIMB",32}, {"NDIMC",32}, {"NWG",128}, {"SA",0}, {"SB",0}, {"STRM",1}, {"STRN",0}, {"VWM",2}, {"VWN",4} } }, + { "GeForce GTX TITAN", { {"KWG",16}, {"KWI",8}, {"MDIMA",16}, {"MDIMC",8}, {"MWG",32}, {"NDIMB",16}, {"NDIMC",32}, {"NWG",128}, {"SA",1}, {"SB",1}, {"STRM",1}, {"STRN",1}, {"VWM",2}, {"VWN",2} } }, + { "GeForce GTX TITAN X", { {"KWG",16}, {"KWI",8}, {"MDIMA",16}, {"MDIMC",16}, {"MWG",16}, {"NDIMB",16}, {"NDIMC",16}, {"NWG",16}, {"SA",0}, {"SB",0}, {"STRM",0}, {"STRN",0}, {"VWM",1}, {"VWN",1} } }, + { "Tesla K20m", { {"KWG",16}, {"KWI",2}, {"MDIMA",32}, {"MDIMC",8}, {"MWG",32}, {"NDIMB",16}, {"NDIMC",16}, {"NWG",64}, {"SA",1}, {"SB",0}, {"STRM",0}, {"STRN",0}, {"VWM",1}, {"VWN",1} } }, + { "Tesla K40m", { {"KWG",32}, {"KWI",2}, {"MDIMA",16}, {"MDIMC",8}, {"MWG",64}, {"NDIMB",16}, {"NDIMC",32}, {"NWG",128}, {"SA",1}, {"SB",0}, {"STRM",1}, {"STRN",1}, {"VWM",2}, {"VWN",4} } }, + { "default", { {"KWG",16}, {"KWI",2}, {"MDIMA",16}, {"MDIMC",8}, {"MWG",16}, {"NDIMB",16}, {"NDIMC",16}, {"NWG",16}, {"SA",0}, {"SB",0}, {"STRM",0}, {"STRN",0}, {"VWM",1}, {"VWN",1} } }, } }, { // Default kDeviceTypeAll, "default", { - { "default", { {"KWG",32}, {"KWI",2}, {"MDIMA",16}, {"MDIMC",8}, {"MWG",64}, {"NDIMB",16}, {"NDIMC",32}, {"NWG",128}, {"SA",1}, {"SB",0}, {"STRM",1}, {"STRN",1}, {"VWM",2}, {"VWN",4} } }, + { "default", { {"KWG",16}, {"KWI",2}, {"MDIMA",16}, {"MDIMC",8}, {"MWG",16}, {"NDIMB",16}, {"NDIMC",16}, {"NWG",16}, {"SA",0}, {"SB",0}, {"STRM",0}, {"STRN",0}, {"VWM",1}, {"VWN",1} } }, } }, } @@ -82,15 +163,41 @@ const Database::DatabaseEntry Database::XgemmDouble = { const Database::DatabaseEntry Database::XgemmComplexDouble = { "Xgemm", Precision::kComplexDouble, { - { // NVIDIA Corporation GPUs - kDeviceTypeGPU, "NVIDIA Corporation", { - { "Tesla K40m", { {"KWG",16}, {"KWI",8}, {"MDIMA",8}, {"MDIMC",8}, {"MWG",32}, {"NDIMB",32}, {"NDIMC",16}, {"NWG",32}, {"SA",0}, {"SB",0}, {"STRM",1}, {"STRN",0}, {"VWM",1}, {"VWN",1} } }, - { "default", { {"KWG",16}, {"KWI",8}, {"MDIMA",8}, {"MDIMC",8}, {"MWG",32}, {"NDIMB",32}, {"NDIMC",16}, {"NWG",32}, {"SA",0}, {"SB",0}, {"STRM",1}, {"STRN",0}, {"VWM",1}, {"VWN",1} } }, + { // AMD GPUs + kDeviceTypeGPU, "AMD", { + { "Tahiti", { {"KWG",16}, {"KWI",2}, {"MDIMA",16}, {"MDIMC",8}, {"MWG",16}, {"NDIMB",8}, {"NDIMC",8}, {"NWG",16}, {"SA",0}, {"SB",0}, {"STRM",1}, {"STRN",0}, {"VWM",1}, {"VWN",1} } }, + { "default", { {"KWG",16}, {"KWI",2}, {"MDIMA",16}, {"MDIMC",8}, {"MWG",16}, {"NDIMB",8}, {"NDIMC",8}, {"NWG",16}, {"SA",0}, {"SB",0}, {"STRM",1}, {"STRN",0}, {"VWM",1}, {"VWN",1} } }, + } + }, + { // Intel CPUs + kDeviceTypeCPU, "Intel", { + { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"KWG",32}, {"KWI",2}, {"MDIMA",16}, {"MDIMC",32}, {"MWG",128}, {"NDIMB",16}, {"NDIMC",16}, {"NWG",64}, {"SA",0}, {"SB",1}, {"STRM",0}, {"STRN",0}, {"VWM",2}, {"VWN",4} } }, + { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { {"KWG",32}, {"KWI",2}, {"MDIMA",8}, {"MDIMC",8}, {"MWG",128}, {"NDIMB",8}, {"NDIMC",16}, {"NWG",128}, {"SA",0}, {"SB",0}, {"STRM",0}, {"STRN",1}, {"VWM",1}, {"VWN",8} } }, + { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { {"KWG",32}, {"KWI",8}, {"MDIMA",8}, {"MDIMC",32}, {"MWG",32}, {"NDIMB",8}, {"NDIMC",8}, {"NWG",32}, {"SA",0}, {"SB",1}, {"STRM",0}, {"STRN",0}, {"VWM",1}, {"VWN",2} } }, + { "default", { {"KWG",32}, {"KWI",2}, {"MDIMA",8}, {"MDIMC",8}, {"MWG",32}, {"NDIMB",8}, {"NDIMC",8}, {"NWG",32}, {"SA",0}, {"SB",0}, {"STRM",0}, {"STRN",0}, {"VWM",1}, {"VWN",2} } }, + } + }, + { // Intel accelerators + kDeviceTypeAccelerator, "Intel", { + { "Intel(R) Many Integrated Core Acceleration Card", { {"KWG",32}, {"KWI",2}, {"MDIMA",16}, {"MDIMC",16}, {"MWG",16}, {"NDIMB",16}, {"NDIMC",8}, {"NWG",32}, {"SA",0}, {"SB",0}, {"STRM",1}, {"STRN",0}, {"VWM",1}, {"VWN",1} } }, + { "default", { {"KWG",32}, {"KWI",2}, {"MDIMA",16}, {"MDIMC",16}, {"MWG",16}, {"NDIMB",16}, {"NDIMC",8}, {"NWG",32}, {"SA",0}, {"SB",0}, {"STRM",1}, {"STRN",0}, {"VWM",1}, {"VWN",1} } }, + } + }, + { // NVIDIA GPUs + kDeviceTypeGPU, "NVIDIA", { + { "GeForce GTX 480", { {"KWG",16}, {"KWI",2}, {"MDIMA",32}, {"MDIMC",32}, {"MWG",32}, {"NDIMB",32}, {"NDIMC",8}, {"NWG",32}, {"SA",0}, {"SB",0}, {"STRM",1}, {"STRN",0}, {"VWM",1}, {"VWN",1} } }, + { "GeForce GTX 680", { {"KWG",16}, {"KWI",8}, {"MDIMA",16}, {"MDIMC",8}, {"MWG",64}, {"NDIMB",16}, {"NDIMC",32}, {"NWG",32}, {"SA",0}, {"SB",1}, {"STRM",1}, {"STRN",0}, {"VWM",1}, {"VWN",1} } }, + { "GeForce GTX 750 Ti", { {"KWG",16}, {"KWI",2}, {"MDIMA",16}, {"MDIMC",16}, {"MWG",16}, {"NDIMB",8}, {"NDIMC",8}, {"NWG",32}, {"SA",0}, {"SB",0}, {"STRM",0}, {"STRN",0}, {"VWM",1}, {"VWN",4} } }, + { "GeForce GTX 980", { {"KWG",16}, {"KWI",2}, {"MDIMA",16}, {"MDIMC",8}, {"MWG",32}, {"NDIMB",8}, {"NDIMC",16}, {"NWG",128}, {"SA",0}, {"SB",0}, {"STRM",1}, {"STRN",1}, {"VWM",2}, {"VWN",2} } }, + { "GeForce GTX TITAN X", { {"KWG",32}, {"KWI",8}, {"MDIMA",16}, {"MDIMC",16}, {"MWG",128}, {"NDIMB",16}, {"NDIMC",16}, {"NWG",32}, {"SA",0}, {"SB",0}, {"STRM",1}, {"STRN",0}, {"VWM",1}, {"VWN",1} } }, + { "Tesla K20m", { {"KWG",32}, {"KWI",2}, {"MDIMA",32}, {"MDIMC",8}, {"MWG",32}, {"NDIMB",16}, {"NDIMC",16}, {"NWG",64}, {"SA",0}, {"SB",0}, {"STRM",1}, {"STRN",0}, {"VWM",1}, {"VWN",1} } }, + { "Tesla K40m", { {"KWG",16}, {"KWI",8}, {"MDIMA",8}, {"MDIMC",8}, {"MWG",32}, {"NDIMB",32}, {"NDIMC",16}, {"NWG",32}, {"SA",0}, {"SB",0}, {"STRM",1}, {"STRN",0}, {"VWM",1}, {"VWN",1} } }, + { "default", { {"KWG",32}, {"KWI",2}, {"MDIMA",16}, {"MDIMC",8}, {"MWG",32}, {"NDIMB",16}, {"NDIMC",16}, {"NWG",32}, {"SA",0}, {"SB",0}, {"STRM",1}, {"STRN",0}, {"VWM",1}, {"VWN",1} } }, } }, { // Default kDeviceTypeAll, "default", { - { "default", { {"KWG",16}, {"KWI",8}, {"MDIMA",8}, {"MDIMC",8}, {"MWG",32}, {"NDIMB",32}, {"NDIMC",16}, {"NWG",32}, {"SA",0}, {"SB",0}, {"STRM",1}, {"STRN",0}, {"VWM",1}, {"VWN",1} } }, + { "default", { {"KWG",32}, {"KWI",2}, {"MDIMA",16}, {"MDIMC",8}, {"MWG",32}, {"NDIMB",16}, {"NDIMC",16}, {"NWG",32}, {"SA",0}, {"SB",0}, {"STRM",1}, {"STRN",0}, {"VWM",1}, {"VWN",1} } }, } }, } diff --git a/include/internal/database/xgemv.h b/include/internal/database/xgemv.h index ae9fbf30..37d859e2 100644 --- a/include/internal/database/xgemv.h +++ b/include/internal/database/xgemv.h @@ -16,21 +16,47 @@ namespace clblast { const Database::DatabaseEntry Database::XgemvSingle = { "Xgemv", Precision::kSingle, { - { // Intel GPUs - kDeviceTypeGPU, "Intel", { - { "Iris", { {"WGS1",64}, {"WPT1",2}, {"VW2",1}, {"WGS2",128}, {"WPT2",2}, {"VW3",4}, {"WGS3",64}, {"WPT3",8} } }, - { "default", { {"WGS1",64}, {"WPT1",2}, {"VW2",1}, {"WGS2",128}, {"WPT2",2}, {"VW3",4}, {"WGS3",64}, {"WPT3",8} } }, + { // AMD GPUs + kDeviceTypeGPU, "AMD", { + { "Tahiti", { {"WGS1",256}, {"WPT1",1}, {"VW2",1}, {"WGS2",64}, {"WPT2",1}, {"VW3",1}, {"WGS3",64}, {"WPT3",1} } }, + { "default", { {"WGS1",256}, {"WPT1",1}, {"VW2",1}, {"WGS2",64}, {"WPT2",1}, {"VW3",1}, {"WGS3",64}, {"WPT3",1} } }, } }, - { // NVIDIA Corporation GPUs - kDeviceTypeGPU, "NVIDIA Corporation", { - { "Tesla K40m", { {"WGS1",256}, {"WPT1",1}, {"VW2",1}, {"WGS2",256}, {"WPT2",1}, {"VW3",1}, {"WGS3",128}, {"WPT3",1} } }, - { "default", { {"WGS1",256}, {"WPT1",1}, {"VW2",1}, {"WGS2",256}, {"WPT2",1}, {"VW3",1}, {"WGS3",128}, {"WPT3",1} } }, + { // Intel CPUs + kDeviceTypeCPU, "Intel", { + { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"WGS1",64}, {"WPT1",1}, {"VW2",4}, {"WGS2",128}, {"WPT2",4}, {"VW3",1}, {"WGS3",64}, {"WPT3",1} } }, + { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { {"WGS1",64}, {"WPT1",4}, {"VW2",1}, {"WGS2",64}, {"WPT2",4}, {"VW3",2}, {"WGS3",64}, {"WPT3",4} } }, + { "default", { {"WGS1",64}, {"WPT1",1}, {"VW2",1}, {"WGS2",64}, {"WPT2",4}, {"VW3",1}, {"WGS3",64}, {"WPT3",1} } }, + } + }, + { // Intel GPUs + kDeviceTypeGPU, "Intel", { + { "Iris", { {"WGS1",64}, {"WPT1",2}, {"VW2",1}, {"WGS2",128}, {"WPT2",2}, {"VW3",4}, {"WGS3",64}, {"WPT3",8} } }, + { "default", { {"WGS1",64}, {"WPT1",2}, {"VW2",1}, {"WGS2",128}, {"WPT2",2}, {"VW3",4}, {"WGS3",64}, {"WPT3",8} } }, + } + }, + { // Intel accelerators + kDeviceTypeAccelerator, "Intel", { + { "Intel(R) Many Integrated Core Acceleration Card", { {"WGS1",64}, {"WPT1",1}, {"VW2",1}, {"WGS2",64}, {"WPT2",1}, {"VW3",1}, {"WGS3",64}, {"WPT3",1} } }, + { "default", { {"WGS1",64}, {"WPT1",1}, {"VW2",1}, {"WGS2",64}, {"WPT2",1}, {"VW3",1}, {"WGS3",64}, {"WPT3",1} } }, + } + }, + { // NVIDIA GPUs + kDeviceTypeGPU, "NVIDIA", { + { "GeForce GTX 480", { {"WGS1",64}, {"WPT1",1}, {"VW2",1}, {"WGS2",128}, {"WPT2",1}, {"VW3",2}, {"WGS3",64}, {"WPT3",2} } }, + { "GeForce GTX 680", { {"WGS1",256}, {"WPT1",1}, {"VW2",1}, {"WGS2",128}, {"WPT2",1}, {"VW3",2}, {"WGS3",128}, {"WPT3",2} } }, + { "GeForce GTX 750 Ti", { {"WGS1",256}, {"WPT1",1}, {"VW2",1}, {"WGS2",256}, {"WPT2",1}, {"VW3",4}, {"WGS3",128}, {"WPT3",4} } }, + { "GeForce GTX 980", { {"WGS1",128}, {"WPT1",1}, {"VW2",1}, {"WGS2",256}, {"WPT2",1}, {"VW3",1}, {"WGS3",128}, {"WPT3",1} } }, + { "GeForce GTX TITAN", { {"WGS1",256}, {"WPT1",1}, {"VW2",1}, {"WGS2",256}, {"WPT2",1}, {"VW3",1}, {"WGS3",256}, {"WPT3",1} } }, + { "GeForce GTX TITAN X", { {"WGS1",256}, {"WPT1",1}, {"VW2",1}, {"WGS2",64}, {"WPT2",1}, {"VW3",1}, {"WGS3",128}, {"WPT3",1} } }, + { "Tesla K20m", { {"WGS1",128}, {"WPT1",1}, {"VW2",1}, {"WGS2",256}, {"WPT2",1}, {"VW3",1}, {"WGS3",256}, {"WPT3",1} } }, + { "Tesla K40m", { {"WGS1",256}, {"WPT1",1}, {"VW2",1}, {"WGS2",256}, {"WPT2",1}, {"VW3",1}, {"WGS3",128}, {"WPT3",1} } }, + { "default", { {"WGS1",64}, {"WPT1",1}, {"VW2",1}, {"WGS2",64}, {"WPT2",1}, {"VW3",1}, {"WGS3",64}, {"WPT3",1} } }, } }, { // Default kDeviceTypeAll, "default", { - { "default", { {"WGS1",64}, {"WPT1",1}, {"VW2",1}, {"WGS2",128}, {"WPT2",1}, {"VW3",1}, {"WGS3",64}, {"WPT3",1} } }, + { "default", { {"WGS1",64}, {"WPT1",1}, {"VW2",1}, {"WGS2",64}, {"WPT2",1}, {"VW3",1}, {"WGS3",64}, {"WPT3",1} } }, } }, } @@ -40,15 +66,43 @@ const Database::DatabaseEntry Database::XgemvSingle = { const Database::DatabaseEntry Database::XgemvComplexSingle = { "Xgemv", Precision::kComplexSingle, { + { // AMD GPUs + kDeviceTypeGPU, "AMD", { + { "Tahiti", { {"WGS1",64}, {"WPT1",1}, {"VW2",1}, {"WGS2",128}, {"WPT2",1}, {"VW3",1}, {"WGS3",64}, {"WPT3",1} } }, + { "default", { {"WGS1",64}, {"WPT1",1}, {"VW2",1}, {"WGS2",128}, {"WPT2",1}, {"VW3",1}, {"WGS3",64}, {"WPT3",1} } }, + } + }, + { // Intel CPUs + kDeviceTypeCPU, "Intel", { + { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"WGS1",128}, {"WPT1",1}, {"VW2",1}, {"WGS2",128}, {"WPT2",2}, {"VW3",1}, {"WGS3",64}, {"WPT3",1} } }, + { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { {"WGS1",64}, {"WPT1",4}, {"VW2",4}, {"WGS2",64}, {"WPT2",4}, {"VW3",2}, {"WGS3",64}, {"WPT3",2} } }, + { "default", { {"WGS1",64}, {"WPT1",1}, {"VW2",1}, {"WGS2",64}, {"WPT2",2}, {"VW3",1}, {"WGS3",64}, {"WPT3",1} } }, + } + }, { // Intel GPUs kDeviceTypeGPU, "Intel", { - { "Iris", { {"WGS1",256}, {"WPT1",1}, {"VW2",1}, {"WGS2",64}, {"WPT2",1}, {"VW3",1}, {"WGS3",64}, {"WPT3",1} } }, - { "default", { {"WGS1",256}, {"WPT1",1}, {"VW2",1}, {"WGS2",64}, {"WPT2",1}, {"VW3",1}, {"WGS3",64}, {"WPT3",1} } }, + { "Iris", { {"WGS1",256}, {"WPT1",1}, {"VW2",1}, {"WGS2",64}, {"WPT2",1}, {"VW3",1}, {"WGS3",64}, {"WPT3",1} } }, + { "default", { {"WGS1",256}, {"WPT1",1}, {"VW2",1}, {"WGS2",64}, {"WPT2",1}, {"VW3",1}, {"WGS3",64}, {"WPT3",1} } }, + } + }, + { // Intel accelerators + kDeviceTypeAccelerator, "Intel", { + { "Intel(R) Many Integrated Core Acceleration Card", { {"WGS1",64}, {"WPT1",1}, {"VW2",1}, {"WGS2",64}, {"WPT2",1}, {"VW3",1}, {"WGS3",64}, {"WPT3",1} } }, + { "default", { {"WGS1",64}, {"WPT1",1}, {"VW2",1}, {"WGS2",64}, {"WPT2",1}, {"VW3",1}, {"WGS3",64}, {"WPT3",1} } }, + } + }, + { // NVIDIA GPUs + kDeviceTypeGPU, "NVIDIA", { + { "GeForce GTX 480", { {"WGS1",64}, {"WPT1",1}, {"VW2",1}, {"WGS2",64}, {"WPT2",1}, {"VW3",1}, {"WGS3",64}, {"WPT3",1} } }, + { "GeForce GTX 680", { {"WGS1",64}, {"WPT1",1}, {"VW2",1}, {"WGS2",64}, {"WPT2",1}, {"VW3",1}, {"WGS3",64}, {"WPT3",1} } }, + { "GeForce GTX 750 Ti", { {"WGS1",64}, {"WPT1",1}, {"VW2",1}, {"WGS2",64}, {"WPT2",1} } }, + { "GeForce GTX TITAN", { {"WGS1",256}, {"WPT1",1} } }, + { "default", { {"WGS1",64}, {"WPT1",1}, {"VW2",1}, {"WGS2",64}, {"WPT2",1}, {"VW3",1}, {"WGS3",64}, {"WPT3",1} } }, } }, { // Default kDeviceTypeAll, "default", { - { "default", { {"WGS1",256}, {"WPT1",1}, {"VW2",1}, {"WGS2",64}, {"WPT2",1}, {"VW3",1}, {"WGS3",64}, {"WPT3",1} } }, + { "default", { {"WGS1",64}, {"WPT1",1}, {"VW2",1}, {"WGS2",64}, {"WPT2",1}, {"VW3",1}, {"WGS3",64}, {"WPT3",1} } }, } }, } @@ -58,15 +112,41 @@ const Database::DatabaseEntry Database::XgemvComplexSingle = { const Database::DatabaseEntry Database::XgemvDouble = { "Xgemv", Precision::kDouble, { - { // NVIDIA Corporation GPUs - kDeviceTypeGPU, "NVIDIA Corporation", { - { "Tesla K40m", { {"WGS1",256}, {"WPT1",1}, {"VW2",1}, {"WGS2",256}, {"WPT2",1}, {"VW3",1}, {"WGS3",256}, {"WPT3",1} } }, - { "default", { {"WGS1",256}, {"WPT1",1}, {"VW2",1}, {"WGS2",256}, {"WPT2",1}, {"VW3",1}, {"WGS3",256}, {"WPT3",1} } }, + { // AMD GPUs + kDeviceTypeGPU, "AMD", { + { "Tahiti", { {"WGS1",256}, {"WPT1",1}, {"VW2",1}, {"WGS2",64}, {"WPT2",1}, {"VW3",2}, {"WGS3",64}, {"WPT3",2} } }, + { "default", { {"WGS1",256}, {"WPT1",1}, {"VW2",1}, {"WGS2",64}, {"WPT2",1}, {"VW3",2}, {"WGS3",64}, {"WPT3",2} } }, + } + }, + { // Intel CPUs + kDeviceTypeCPU, "Intel", { + { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"WGS1",64}, {"WPT1",2}, {"VW2",4}, {"WGS2",128}, {"WPT2",4}, {"VW3",1}, {"WGS3",64}, {"WPT3",1} } }, + { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { {"WGS1",64}, {"WPT1",4}, {"VW2",1}, {"WGS2",64}, {"WPT2",4}, {"VW3",1}, {"WGS3",64}, {"WPT3",2} } }, + { "default", { {"WGS1",64}, {"WPT1",2}, {"VW2",1}, {"WGS2",64}, {"WPT2",4}, {"VW3",1}, {"WGS3",64}, {"WPT3",1} } }, + } + }, + { // Intel accelerators + kDeviceTypeAccelerator, "Intel", { + { "Intel(R) Many Integrated Core Acceleration Card", { {"WGS1",64}, {"WPT1",1}, {"VW2",1}, {"WGS2",64}, {"WPT2",1}, {"VW3",1}, {"WGS3",64}, {"WPT3",1} } }, + { "default", { {"WGS1",64}, {"WPT1",1}, {"VW2",1}, {"WGS2",64}, {"WPT2",1}, {"VW3",1}, {"WGS3",64}, {"WPT3",1} } }, + } + }, + { // NVIDIA GPUs + kDeviceTypeGPU, "NVIDIA", { + { "GeForce GTX 480", { {"WGS1",256}, {"WPT1",1}, {"VW2",1}, {"WGS2",64}, {"WPT2",1}, {"VW3",1}, {"WGS3",64}, {"WPT3",1} } }, + { "GeForce GTX 680", { {"WGS1",128}, {"WPT1",1}, {"VW2",1}, {"WGS2",128}, {"WPT2",1}, {"VW3",2}, {"WGS3",128}, {"WPT3",2} } }, + { "GeForce GTX 750 Ti", { {"WGS1",64}, {"WPT1",1}, {"VW2",1}, {"WGS2",256}, {"WPT2",1}, {"VW3",2}, {"WGS3",256}, {"WPT3",2} } }, + { "GeForce GTX 980", { {"WGS1",64}, {"WPT1",1}, {"VW2",1}, {"WGS2",64}, {"WPT2",1}, {"VW3",1}, {"WGS3",64}, {"WPT3",1} } }, + { "GeForce GTX TITAN", { {"WGS1",256}, {"WPT1",1}, {"VW2",1}, {"WGS2",256}, {"WPT2",1}, {"VW3",1}, {"WGS3",256}, {"WPT3",1} } }, + { "GeForce GTX TITAN X", { {"WGS1",64}, {"WPT1",1}, {"VW2",1}, {"WGS2",128}, {"WPT2",1}, {"VW3",1}, {"WGS3",128}, {"WPT3",1} } }, + { "Tesla K20m", { {"WGS1",256}, {"WPT1",1}, {"VW2",1}, {"WGS2",128}, {"WPT2",1}, {"VW3",1}, {"WGS3",64}, {"WPT3",1} } }, + { "Tesla K40m", { {"WGS1",256}, {"WPT1",1}, {"VW2",1}, {"WGS2",256}, {"WPT2",1}, {"VW3",1}, {"WGS3",256}, {"WPT3",1} } }, + { "default", { {"WGS1",64}, {"WPT1",1}, {"VW2",1}, {"WGS2",64}, {"WPT2",1}, {"VW3",1}, {"WGS3",64}, {"WPT3",1} } }, } }, { // Default kDeviceTypeAll, "default", { - { "default", { {"WGS1",256}, {"WPT1",1}, {"VW2",1}, {"WGS2",256}, {"WPT2",1}, {"VW3",1}, {"WGS3",256}, {"WPT3",1} } }, + { "default", { {"WGS1",64}, {"WPT1",1}, {"VW2",1}, {"WGS2",64}, {"WPT2",1}, {"VW3",1}, {"WGS3",64}, {"WPT3",1} } }, } }, } @@ -76,9 +156,34 @@ const Database::DatabaseEntry Database::XgemvDouble = { const Database::DatabaseEntry Database::XgemvComplexDouble = { "Xgemv", Precision::kComplexDouble, { + { // AMD GPUs + kDeviceTypeGPU, "AMD", { + { "Tahiti", { {"WGS1",256}, {"WPT1",1}, {"VW2",1}, {"WGS2",64}, {"WPT2",1}, {"VW3",1}, {"WGS3",64}, {"WPT3",1} } }, + { "default", { {"WGS1",256}, {"WPT1",1}, {"VW2",1}, {"WGS2",64}, {"WPT2",1}, {"VW3",1}, {"WGS3",64}, {"WPT3",1} } }, + } + }, + { // Intel CPUs + kDeviceTypeCPU, "Intel", { + { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"WGS1",64}, {"WPT1",1}, {"VW2",2}, {"WGS2",64}, {"WPT2",4}, {"VW3",1}, {"WGS3",256}, {"WPT3",1} } }, + { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { {"WGS1",64}, {"WPT1",4}, {"VW2",4}, {"WGS2",64}, {"WPT2",4}, {"VW3",2}, {"WGS3",256}, {"WPT3",2} } }, + { "default", { {"WGS1",64}, {"WPT1",1}, {"VW2",2}, {"WGS2",64}, {"WPT2",4}, {"VW3",1}, {"WGS3",256}, {"WPT3",1} } }, + } + }, + { // Intel accelerators + kDeviceTypeAccelerator, "Intel", { + { "Intel(R) Many Integrated Core Acceleration Card", { {"WGS1",64}, {"WPT1",1}, {"VW2",1}, {"WGS2",64}, {"WPT2",1}, {"VW3",1}, {"WGS3",64}, {"WPT3",1} } }, + { "default", { {"WGS1",64}, {"WPT1",1}, {"VW2",1}, {"WGS2",64}, {"WPT2",1}, {"VW3",1}, {"WGS3",64}, {"WPT3",1} } }, + } + }, + { // NVIDIA GPUs + kDeviceTypeGPU, "NVIDIA", { + { "GeForce GTX 480", { {"WGS1",64}, {"WPT1",1}, {"VW2",1}, {"WGS2",64}, {"WPT2",1}, {"VW3",1}, {"WGS3",64}, {"WPT3",1} } }, + { "default", { {"WGS1",64}, {"WPT1",1}, {"VW2",1}, {"WGS2",64}, {"WPT2",1}, {"VW3",1}, {"WGS3",64}, {"WPT3",1} } }, + } + }, { // Default kDeviceTypeAll, "default", { - { "default", { {"WGS1",128}, {"WPT1",1}, {"VW2",1}, {"WGS2",128}, {"WPT2",1}, {"VW3",1}, {"WGS3",128}, {"WPT3",1} } }, + { "default", { {"WGS1",64}, {"WPT1",1}, {"VW2",1}, {"WGS2",64}, {"WPT2",1}, {"VW3",1}, {"WGS3",64}, {"WPT3",1} } }, } }, } From 38c56bbde2ed108d47bd058ba239725b3396475d Mon Sep 17 00:00:00 2001 From: Cedric Nugteren Date: Mon, 8 Feb 2016 19:43:34 +0100 Subject: [PATCH 26/50] Split-up the XGEMV kernel in two parts --- src/kernels/level2/xgemv.opencl | 259 +----------------------- src/kernels/level2/xgemv_fast.opencl | 288 +++++++++++++++++++++++++++ src/routines/level2/xgemv.cc | 1 + src/tuning/xgemv.cc | 1 + 4 files changed, 292 insertions(+), 257 deletions(-) create mode 100644 src/kernels/level2/xgemv_fast.opencl diff --git a/src/kernels/level2/xgemv.opencl b/src/kernels/level2/xgemv.opencl index 908d7d13..30b131b4 100644 --- a/src/kernels/level2/xgemv.opencl +++ b/src/kernels/level2/xgemv.opencl @@ -7,7 +7,7 @@ // Author(s): // Cedric Nugteren // -// This file contains the Xgemv kernel for matrix-vector multiplication. +// This file contains the Xgemv kernel (generic version) for matrix-vector multiplication. // // ================================================================================================= @@ -31,55 +31,7 @@ R"( #define UNROLL1 32 // Unroll factor (must be a divider of WGS1) #endif -// 2: For the fast version -#ifndef WGS2 - #define WGS2 64 // The local work-group size -#endif -#ifndef WPT2 - #define WPT2 1 // The amount of work-per-thread -#endif -#ifndef VW2 - #define VW2 1 // Vector width of matrix A loads -#endif - -// 3: For the fast rotated version -#ifndef WGS3 - #define WGS3 64 // The local work-group size -#endif -#ifndef WPT3 - #define WPT3 1 // The amount of work-per-thread -#endif -#ifndef VW3 - #define VW3 1 // Vector width of matrix A loads -#endif - -// ================================================================================================= - -// Data-widths for the 'fast' kernel -#if VW2 == 1 - typedef real realVF; -#elif VW2 == 2 - typedef real2 realVF; -#elif VW2 == 4 - typedef real4 realVF; -#elif VW2 == 8 - typedef real8 realVF; -#elif VW2 == 16 - typedef real16 realVF; -#endif - -// Data-widths for the 'fast' kernel with rotated matrix -#if VW3 == 1 - typedef real realVFR; -#elif VW3 == 2 - typedef real2 realVFR; -#elif VW3 == 4 - typedef real4 realVFR; -#elif VW3 == 8 - typedef real8 realVFR; -#elif VW3 == 16 - typedef real16 realVFR; -#endif +// 2 and 3: For the fast versions, see 'xgemv_fast.opencl' // ================================================================================================= @@ -255,18 +207,6 @@ inline real LoadMatrixA(const __global real* restrict agm, const int x, const in return result; } -// Loads a vector input value (1/2) -inline realVF LoadMatrixAVF(const __global realVF* restrict agm, const int x, const int y, - const int a_ld) { - return agm[a_ld*y + x]; -} - -// Loads a vector input value (2/2): as before, but different data-type -inline realVFR LoadMatrixAVFR(const __global realVFR* restrict agm, const int x, const int y, - const int a_ld) { - return agm[a_ld*y + x]; -} - // ================================================================================================= // Full version of the kernel @@ -371,202 +311,7 @@ __kernel void Xgemv(const int m, const int n, const real alpha, const real beta, // ================================================================================================= -// Faster version of the kernel, assuming that: -// --> 'm' and 'n' are multiples of WGS2 -// --> 'a_offset' is 0 -// --> 'a_ld' is a multiple of VW2 -// --> 'a_rotated' is 0 -// --> 'do_conjugate' is 0 -__attribute__((reqd_work_group_size(WGS2, 1, 1))) -__kernel void XgemvFast(const int m, const int n, const real alpha, const real beta, - const int a_rotated, - const __global realVF* restrict agm, const int a_offset, const int a_ld, - const __global real* restrict xgm, const int x_offset, const int x_inc, - __global real* ygm, const int y_offset, const int y_inc, - const int do_conjugate, const int parameter, - const int kl, const int ku) { - // Local memory for the vector X - __local real xlm[WGS2]; - - // Initializes the accumulation register - real acc[WPT2]; - #pragma unroll - for (int w=0; w 'm' and 'n' are multiples of WGS3 -// --> 'a_offset' is 0 -// --> 'a_ld' is a multiple of VW3 -// --> 'a_rotated' is 1 -// --> 'do_conjugate' is 0 -__attribute__((reqd_work_group_size(WGS3, 1, 1))) -__kernel void XgemvFastRot(const int m, const int n, const real alpha, const real beta, - const int a_rotated, - const __global realVFR* restrict agm, const int a_offset, const int a_ld, - const __global real* restrict xgm, const int x_offset, const int x_inc, - __global real* ygm, const int y_offset, const int y_inc, - const int do_conjugate, const int parameter, - const int kl, const int ku) { - // Local memory for the vector X - __local real xlm[WGS3]; - - // Initializes the accumulation register - real acc[WPT3]; - #pragma unroll - for (int w=0; w +// +// This file contains the Xgemv kernel (fast versions) for matrix-vector multiplication. +// +// ================================================================================================= + +// Enables loading of this file using the C++ pre-processor's #include (C++11 standard raw string +// literal). Comment-out this line for syntax-highlighting when developing. +R"( + +// ================================================================================================= + +// Parameters set by the tuner or by the database. Here they are given a basic default value in case +// this kernel file is used outside of the CLBlast library. + +// 1: For the full version, see 'xgemv.opencl' + +// 2: For the fast version +#ifndef WGS2 + #define WGS2 64 // The local work-group size +#endif +#ifndef WPT2 + #define WPT2 1 // The amount of work-per-thread +#endif +#ifndef VW2 + #define VW2 1 // Vector width of matrix A loads +#endif + +// 3: For the fast rotated version +#ifndef WGS3 + #define WGS3 64 // The local work-group size +#endif +#ifndef WPT3 + #define WPT3 1 // The amount of work-per-thread +#endif +#ifndef VW3 + #define VW3 1 // Vector width of matrix A loads +#endif + +// ================================================================================================= + +// Data-widths for the 'fast' kernel +#if VW2 == 1 + typedef real realVF; +#elif VW2 == 2 + typedef real2 realVF; +#elif VW2 == 4 + typedef real4 realVF; +#elif VW2 == 8 + typedef real8 realVF; +#elif VW2 == 16 + typedef real16 realVF; +#endif + +// Data-widths for the 'fast' kernel with rotated matrix +#if VW3 == 1 + typedef real realVFR; +#elif VW3 == 2 + typedef real2 realVFR; +#elif VW3 == 4 + typedef real4 realVFR; +#elif VW3 == 8 + typedef real8 realVFR; +#elif VW3 == 16 + typedef real16 realVFR; +#endif + +// ================================================================================================= + +// Loads a vector input value (1/2) +inline realVF LoadMatrixAVF(const __global realVF* restrict agm, const int x, const int y, + const int a_ld) { + return agm[a_ld*y + x]; +} + +// Loads a vector input value (2/2): as before, but different data-type +inline realVFR LoadMatrixAVFR(const __global realVFR* restrict agm, const int x, const int y, + const int a_ld) { + return agm[a_ld*y + x]; +} + +// ================================================================================================= + +// Faster version of the kernel, assuming that: +// --> 'm' and 'n' are multiples of WGS2 +// --> 'a_offset' is 0 +// --> 'a_ld' is a multiple of VW2 +// --> 'a_rotated' is 0 +// --> 'do_conjugate' is 0 +__attribute__((reqd_work_group_size(WGS2, 1, 1))) +__kernel void XgemvFast(const int m, const int n, const real alpha, const real beta, + const int a_rotated, + const __global realVF* restrict agm, const int a_offset, const int a_ld, + const __global real* restrict xgm, const int x_offset, const int x_inc, + __global real* ygm, const int y_offset, const int y_inc, + const int do_conjugate, const int parameter, + const int kl, const int ku) { + // Local memory for the vector X + __local real xlm[WGS2]; + + // Initializes the accumulation register + real acc[WPT2]; + #pragma unroll + for (int w=0; w 'm' and 'n' are multiples of WGS3 +// --> 'a_offset' is 0 +// --> 'a_ld' is a multiple of VW3 +// --> 'a_rotated' is 1 +// --> 'do_conjugate' is 0 +__attribute__((reqd_work_group_size(WGS3, 1, 1))) +__kernel void XgemvFastRot(const int m, const int n, const real alpha, const real beta, + const int a_rotated, + const __global realVFR* restrict agm, const int a_offset, const int a_ld, + const __global real* restrict xgm, const int x_offset, const int x_inc, + __global real* ygm, const int y_offset, const int y_inc, + const int do_conjugate, const int parameter, + const int kl, const int ku) { + // Local memory for the vector X + __local real xlm[WGS3]; + + // Initializes the accumulation register + real acc[WPT3]; + #pragma unroll + for (int w=0; w::Xgemv(Queue &queue, Event &event, const std::string &name): Routine(queue, event, name, {"Pad", "Xgemv"}, precision_) { source_string_ = #include "../../kernels/level2/xgemv.opencl" + #include "../../kernels/level2/xgemv_fast.opencl" ; } diff --git a/src/tuning/xgemv.cc b/src/tuning/xgemv.cc index 9861fb2b..43369c3b 100644 --- a/src/tuning/xgemv.cc +++ b/src/tuning/xgemv.cc @@ -35,6 +35,7 @@ class TuneXgemv { return #include "../src/kernels/common.opencl" #include "../src/kernels/level2/xgemv.opencl" + #include "../src/kernels/level2/xgemv_fast.opencl" ; } From bf84463ab20f2f39071719fad9bd28a6bb13fc24 Mon Sep 17 00:00:00 2001 From: Cedric Nugteren Date: Mon, 8 Feb 2016 20:06:02 +0100 Subject: [PATCH 27/50] Separated the GEMM kernel in two parts to reduce string length for MSVC --- src/kernels/level3/xgemm_part1.opencl | 329 ++++++++++++++++++ .../{xgemm.opencl => xgemm_part2.opencl} | 306 +--------------- src/routines/level3/xgemm.cc | 3 +- src/routines/level3/xher2k.cc | 3 +- src/routines/level3/xherk.cc | 3 +- src/routines/level3/xsyr2k.cc | 3 +- src/routines/level3/xsyrk.cc | 3 +- src/tuning/xgemm.cc | 3 +- 8 files changed, 342 insertions(+), 311 deletions(-) create mode 100644 src/kernels/level3/xgemm_part1.opencl rename src/kernels/level3/{xgemm.opencl => xgemm_part2.opencl} (61%) diff --git a/src/kernels/level3/xgemm_part1.opencl b/src/kernels/level3/xgemm_part1.opencl new file mode 100644 index 00000000..4cb0585b --- /dev/null +++ b/src/kernels/level3/xgemm_part1.opencl @@ -0,0 +1,329 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren +// +// This file contains an optimized matrix-multiplication kernel according to the paper by Matsumoto +// et al. and the tutorial on http://www.cedricnugteren.nl/tutorial.php. It is fully configurable +// (and tunable!) using more or less the same parameters/naming conventions as in the paper. It +// supports single and double precision (SGEMM/DGEMM) through a pre-processor define. +// +// Matrices are accessed as follows: +// A: [k*M + m], with 'k' ranging from 0:K and 'm' from 0:M (m,k,m) +// B: [k*N + n], with 'k' ranging from 0:K and 'n' from 0:N (n,k,n) +// C: [n*M + m], with 'n' ranging from 0:N and 'm' from 0:M (m,n,m) +// +// Or as an image (assuming column-major) +// K +// o-------o +// | | +// N | [B^T] | +// | | +// o-------o +// K N +// o-------o o-----o +// M | [A] | M | [C] | +// | | | | +// o-------o o-----o +// +// +// This kernel is seperated into two files. This is part 1 out of 2, +// +// ================================================================================================= + +// Enables loading of this file using the C++ pre-processor's #include (C++11 standard raw string +// literal). Comment-out this line for syntax-highlighting when developing. +R"( + +// ================================================================================================= + +// Parameters set by the tuner or by the database. Here they are given a basic default value in case +// this kernel file is used outside of the CLBlast library. +#ifndef MWG + #define MWG 8 // Tile-size in dimension M (e.g. 64, 128) +#endif +#ifndef NWG + #define NWG 8 // Tile-size in dimension N (e.g. 64, 128) +#endif +#ifndef KWG + #define KWG 8 // Tile-size in dimension K (e.g. 8, 16) +#endif +#ifndef MDIMC + #define MDIMC 8 // Threads per workgroup in M-dimension (e.g. 8, 16, 32) +#endif +#ifndef NDIMC + #define NDIMC 8 // Threads per workgroup in N-dimension (e.g. 8, 16, 32) +#endif +#ifndef MDIMA + #define MDIMA 8 // Re-shaped tile dimension of matrix A: KDIMA * MDIMA +#endif +#ifndef NDIMB + #define NDIMB 8 // Re-shaped tile dimension of matrix B: KDIMB * NDIMB +#endif +#ifndef KWI + #define KWI 1 // Unroll factor of the KWG loop (smaller or equal than KWG) +#endif +#ifndef VWM + #define VWM 1 // Vector width of matrices A and C +#endif +#ifndef VWN + #define VWN 1 // Vector width of matrix B +#endif +#ifndef STRM + #define STRM 0 // Use strided access within a thread in the M-dimension (1) or not (0) +#endif +#ifndef STRN + #define STRN 0 // Use strided access within a thread in the N-dimension (1) or not (0) +#endif +#ifndef SA + #define SA 0 // Use local/shared memory to cache matrix A (1) or not (0) +#endif +#ifndef SB + #define SB 0 // Use local/shared memory to cache matrix B (1) or not (0) +#endif + +// Helper parameters based on the above tuning parameters +#define MWI (MWG/MDIMC) // Work per work-item (M-dimension) +#define NWI (NWG/NDIMC) // Work per work-item (N-dimension) +#define KDIMA ((MDIMC*NDIMC)/(MDIMA)) // Re-shaped tile dimension of matrix A: KDIMA * MDIMA +#define KDIMB ((MDIMC*NDIMC)/(NDIMB)) // Re-shaped tile dimension of matrix B: KDIMB * NDIMB +#define MWA (MWG/MDIMA) // Amount of loads-per-thread for matrix A (M-dimension) +#define KWA (KWG/KDIMA) // Amount of loads-per-thread for matrix A (K-dimension) +#define KWB (KWG/KDIMB) // Amount of loads-per-thread for matrix B (K-dimension) +#define NWB (NWG/NDIMB) // Amount of loads-per-thread for matrix B (N-dimension) + +// Settings +#define USE_VECTOR_MAD 0 // Unroll (0) or don't (1) unroll the vector MAD manually + +// ================================================================================================= + +// Data-widths in dimension M +#if VWM == 1 + typedef real realM; +#elif VWM == 2 + typedef real2 realM; +#elif VWM == 4 + typedef real4 realM; +#elif VWM == 8 + typedef real8 realM; +#elif VWM == 16 + typedef real16 realM; +#endif + +// Data-widths in dimension N +#if VWN == 1 + typedef real realN; +#elif VWN == 2 + typedef real2 realN; +#elif VWN == 4 + typedef real4 realN; +#elif VWN == 8 + typedef real8 realN; +#elif VWN == 16 + typedef real16 realN; +#endif + +// ================================================================================================= + +// Initializes the accumulation registers to zero +inline void InitAccRegisters(realM cpm[NWI][MWI/VWM]) { + #pragma unroll + for (int mi=0; mi // -// This file contains an optimized matrix-multiplication kernel according to the paper by Matsumoto -// et al. and the tutorial on http://www.cedricnugteren.nl/tutorial.php. It is fully configurable -// (and tunable!) using more or less the same parameters/naming conventions as in the paper. It -// supports single and double precision (SGEMM/DGEMM) through a pre-processor define. -// -// Matrices are accessed as follows: -// A: [k*M + m], with 'k' ranging from 0:K and 'm' from 0:M (m,k,m) -// B: [k*N + n], with 'k' ranging from 0:K and 'n' from 0:N (n,k,n) -// C: [n*M + m], with 'n' ranging from 0:N and 'm' from 0:M (m,n,m) -// -// Or as an image (assuming column-major) -// K -// o-------o -// | | -// N | [B^T] | -// | | -// o-------o -// K N -// o-------o o-----o -// M | [A] | M | [C] | -// | | | | -// o-------o o-----o -// +// This is part 2 of 2 of the GEMM kernel. See part 1 for more information. // // ================================================================================================= @@ -39,288 +17,6 @@ R"( // ================================================================================================= -// Parameters set by the tuner or by the database. Here they are given a basic default value in case -// this kernel file is used outside of the CLBlast library. -#ifndef MWG - #define MWG 8 // Tile-size in dimension M (e.g. 64, 128) -#endif -#ifndef NWG - #define NWG 8 // Tile-size in dimension N (e.g. 64, 128) -#endif -#ifndef KWG - #define KWG 8 // Tile-size in dimension K (e.g. 8, 16) -#endif -#ifndef MDIMC - #define MDIMC 8 // Threads per workgroup in M-dimension (e.g. 8, 16, 32) -#endif -#ifndef NDIMC - #define NDIMC 8 // Threads per workgroup in N-dimension (e.g. 8, 16, 32) -#endif -#ifndef MDIMA - #define MDIMA 8 // Re-shaped tile dimension of matrix A: KDIMA * MDIMA -#endif -#ifndef NDIMB - #define NDIMB 8 // Re-shaped tile dimension of matrix B: KDIMB * NDIMB -#endif -#ifndef KWI - #define KWI 1 // Unroll factor of the KWG loop (smaller or equal than KWG) -#endif -#ifndef VWM - #define VWM 1 // Vector width of matrices A and C -#endif -#ifndef VWN - #define VWN 1 // Vector width of matrix B -#endif -#ifndef STRM - #define STRM 0 // Use strided access within a thread in the M-dimension (1) or not (0) -#endif -#ifndef STRN - #define STRN 0 // Use strided access within a thread in the N-dimension (1) or not (0) -#endif -#ifndef SA - #define SA 0 // Use local/shared memory to cache matrix A (1) or not (0) -#endif -#ifndef SB - #define SB 0 // Use local/shared memory to cache matrix B (1) or not (0) -#endif - -// Helper parameters based on the above tuning parameters -#define MWI (MWG/MDIMC) // Work per work-item (M-dimension) -#define NWI (NWG/NDIMC) // Work per work-item (N-dimension) -#define KDIMA ((MDIMC*NDIMC)/(MDIMA)) // Re-shaped tile dimension of matrix A: KDIMA * MDIMA -#define KDIMB ((MDIMC*NDIMC)/(NDIMB)) // Re-shaped tile dimension of matrix B: KDIMB * NDIMB -#define MWA (MWG/MDIMA) // Amount of loads-per-thread for matrix A (M-dimension) -#define KWA (KWG/KDIMA) // Amount of loads-per-thread for matrix A (K-dimension) -#define KWB (KWG/KDIMB) // Amount of loads-per-thread for matrix B (K-dimension) -#define NWB (NWG/NDIMB) // Amount of loads-per-thread for matrix B (N-dimension) - -// Settings -#define USE_VECTOR_MAD 0 // Unroll (0) or don't (1) unroll the vector MAD manually - -// ================================================================================================= - -// Data-widths in dimension M -#if VWM == 1 - typedef real realM; -#elif VWM == 2 - typedef real2 realM; -#elif VWM == 4 - typedef real4 realM; -#elif VWM == 8 - typedef real8 realM; -#elif VWM == 16 - typedef real16 realM; -#endif - -// Data-widths in dimension N -#if VWN == 1 - typedef real realN; -#elif VWN == 2 - typedef real2 realN; -#elif VWN == 4 - typedef real4 realN; -#elif VWN == 8 - typedef real8 realN; -#elif VWN == 16 - typedef real16 realN; -#endif - -// ================================================================================================= - -// Initializes the accumulation registers to zero -inline void InitAccRegisters(realM cpm[NWI][MWI/VWM]) { - #pragma unroll - for (int mi=0; mi::Xgemm(Queue &queue, Event &event, const std::string &name): #include "../../kernels/level3/pad.opencl" #include "../../kernels/level3/transpose.opencl" #include "../../kernels/level3/padtranspose.opencl" - #include "../../kernels/level3/xgemm.opencl" + #include "../../kernels/level3/xgemm_part1.opencl" + #include "../../kernels/level3/xgemm_part2.opencl" ; } diff --git a/src/routines/level3/xher2k.cc b/src/routines/level3/xher2k.cc index e9970fd1..1711905d 100644 --- a/src/routines/level3/xher2k.cc +++ b/src/routines/level3/xher2k.cc @@ -34,7 +34,8 @@ Xher2k::Xher2k(Queue &queue, Event &event, const std::string &name): #include "../../kernels/level3/pad.opencl" #include "../../kernels/level3/transpose.opencl" #include "../../kernels/level3/padtranspose.opencl" - #include "../../kernels/level3/xgemm.opencl" + #include "../../kernels/level3/xgemm_part1.opencl" + #include "../../kernels/level3/xgemm_part2.opencl" ; } diff --git a/src/routines/level3/xherk.cc b/src/routines/level3/xherk.cc index 49fd12af..cbd0a188 100644 --- a/src/routines/level3/xherk.cc +++ b/src/routines/level3/xherk.cc @@ -34,7 +34,8 @@ Xherk::Xherk(Queue &queue, Event &event, const std::string &name): #include "../../kernels/level3/pad.opencl" #include "../../kernels/level3/transpose.opencl" #include "../../kernels/level3/padtranspose.opencl" - #include "../../kernels/level3/xgemm.opencl" + #include "../../kernels/level3/xgemm_part1.opencl" + #include "../../kernels/level3/xgemm_part2.opencl" ; } diff --git a/src/routines/level3/xsyr2k.cc b/src/routines/level3/xsyr2k.cc index 966a000f..79090871 100644 --- a/src/routines/level3/xsyr2k.cc +++ b/src/routines/level3/xsyr2k.cc @@ -36,7 +36,8 @@ Xsyr2k::Xsyr2k(Queue &queue, Event &event, const std::string &name): #include "../../kernels/level3/pad.opencl" #include "../../kernels/level3/transpose.opencl" #include "../../kernels/level3/padtranspose.opencl" - #include "../../kernels/level3/xgemm.opencl" + #include "../../kernels/level3/xgemm_part1.opencl" + #include "../../kernels/level3/xgemm_part2.opencl" ; } diff --git a/src/routines/level3/xsyrk.cc b/src/routines/level3/xsyrk.cc index 630cb731..ca429bd7 100644 --- a/src/routines/level3/xsyrk.cc +++ b/src/routines/level3/xsyrk.cc @@ -36,7 +36,8 @@ Xsyrk::Xsyrk(Queue &queue, Event &event, const std::string &name): #include "../../kernels/level3/pad.opencl" #include "../../kernels/level3/transpose.opencl" #include "../../kernels/level3/padtranspose.opencl" - #include "../../kernels/level3/xgemm.opencl" + #include "../../kernels/level3/xgemm_part1.opencl" + #include "../../kernels/level3/xgemm_part2.opencl" ; } diff --git a/src/tuning/xgemm.cc b/src/tuning/xgemm.cc index c06e3e72..2b4ff456 100644 --- a/src/tuning/xgemm.cc +++ b/src/tuning/xgemm.cc @@ -31,7 +31,8 @@ class TuneXgemm { static std::string GetSources() { return #include "../src/kernels/common.opencl" - #include "../src/kernels/level3/xgemm.opencl" + #include "../src/kernels/level3/xgemm_part1.opencl" + #include "../src/kernels/level3/xgemm_part2.opencl" ; } From fadd76207fed5aeb87de7caf744397b008c6d784 Mon Sep 17 00:00:00 2001 From: CNugteren Date: Mon, 8 Feb 2016 20:44:05 +0100 Subject: [PATCH 28/50] Fixed warnings under MSVC --- test/performance/client.cc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/test/performance/client.cc b/test/performance/client.cc index c0c91aec..ce97d273 100644 --- a/test/performance/client.cc +++ b/test/performance/client.cc @@ -272,13 +272,13 @@ void Client::PrintTableRow(const Arguments& args, const double ms_clblas // Outputs the argument values for (auto &argument: integers) { if (!args.no_abbrv && argument >= 1024*1024 && IsMultiple(argument, 1024*1024)) { - fprintf(stdout, "%8luM;", argument/(1024*1024)); + fprintf(stdout, "%8zuM;", argument/(1024*1024)); } else if (!args.no_abbrv && argument >= 1024 && IsMultiple(argument, 1024)) { - fprintf(stdout, "%8luK;", argument/1024); + fprintf(stdout, "%8zuK;", argument/1024); } else { - fprintf(stdout, "%9lu;", argument); + fprintf(stdout, "%9zu;", argument); } } for (auto &argument: strings) { From c457a70aa13d5a1bf20996f82f3684786d27581d Mon Sep 17 00:00:00 2001 From: Cedric Nugteren Date: Wed, 10 Feb 2016 21:32:09 +0100 Subject: [PATCH 29/50] Updated the changelog --- CHANGELOG | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/CHANGELOG b/CHANGELOG index 15fe8a88..c6cfa174 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,6 +1,8 @@ Development version (next release) -- +- Added support for MSVC (Visual Studio) 2015 +- Added tuned parameters for various devices (see README) +- Now automatically generates C++ code from JSON tuning results Version 0.5.0 - Improved structure and performance of level-2 routines (xSYMV/xHEMV) From 8854a731276b3f32c9e381a228733de7c6d95760 Mon Sep 17 00:00:00 2001 From: Cedric Nugteren Date: Sat, 20 Feb 2016 12:40:01 +0100 Subject: [PATCH 30/50] Added XGER routine, kernel, and tuner --- CMakeLists.txt | 4 +- include/internal/database.h | 1 + include/internal/database/xger.h | 88 ++++++++++++++ include/internal/routines/level2/xger.h | 58 +++++++++ scripts/database/database.py | 2 +- scripts/generator/generator.py | 2 +- src/clblast.cc | 27 +++-- src/database.cc | 2 + src/kernels/common.opencl | 7 ++ src/kernels/level2/xger.opencl | 149 ++++++++++++++++++++++++ src/routines/level2/xger.cc | 107 +++++++++++++++++ src/tuning/xger.cc | 128 ++++++++++++++++++++ test/routines/level2/xgemv.h | 2 +- test/routines/level2/xger.h | 131 +++++++++++++++++++++ 14 files changed, 695 insertions(+), 13 deletions(-) create mode 100644 include/internal/database/xger.h create mode 100644 include/internal/routines/level2/xger.h create mode 100644 src/kernels/level2/xger.opencl create mode 100644 src/routines/level2/xger.cc create mode 100644 src/tuning/xger.cc create mode 100644 test/routines/level2/xger.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 5918d3eb..8e3313d9 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -107,11 +107,11 @@ include_directories(${clblast_SOURCE_DIR}/include ${OPENCL_INCLUDE_DIRS}) # ================================================================================================== # Sets the supported routines and the used kernels. New routines and kernels should be added here. -set(KERNELS copy pad transpose padtranspose xaxpy xdot xgemm xgemv) +set(KERNELS copy pad transpose padtranspose xaxpy xdot xger xgemm xgemv) set(SAMPLE_PROGRAMS_CPP sgemm) set(SAMPLE_PROGRAMS_C sgemm) set(LEVEL1_ROUTINES xswap xscal xcopy xaxpy xdot xdotu xdotc) -set(LEVEL2_ROUTINES xgemv xgbmv xhemv xhbmv xhpmv xsymv xsbmv xspmv xtrmv xtbmv xtpmv) +set(LEVEL2_ROUTINES xgemv xgbmv xhemv xhbmv xhpmv xsymv xsbmv xspmv xtrmv xtbmv xtpmv xger) set(LEVEL3_ROUTINES xgemm xsymm xhemm xsyrk xherk xsyr2k xher2k xtrmm) set(ROUTINES ${LEVEL1_ROUTINES} ${LEVEL2_ROUTINES} ${LEVEL3_ROUTINES}) set(PRECISIONS 32 64 3232 6464) diff --git a/include/internal/database.h b/include/internal/database.h index 08e449fa..f26e354c 100644 --- a/include/internal/database.h +++ b/include/internal/database.h @@ -73,6 +73,7 @@ class Database { static const DatabaseEntry XaxpySingle, XaxpyDouble, XaxpyComplexSingle, XaxpyComplexDouble; static const DatabaseEntry XdotSingle, XdotDouble, XdotComplexSingle, XdotComplexDouble; static const DatabaseEntry XgemvSingle, XgemvDouble, XgemvComplexSingle, XgemvComplexDouble; + static const DatabaseEntry XgerSingle, XgerDouble, XgerComplexSingle, XgerComplexDouble; static const DatabaseEntry XgemmSingle, XgemmDouble, XgemmComplexSingle, XgemmComplexDouble; static const DatabaseEntry CopySingle, CopyDouble, CopyComplexSingle, CopyComplexDouble; static const DatabaseEntry PadSingle, PadDouble, PadComplexSingle, PadComplexDouble; diff --git a/include/internal/database/xger.h b/include/internal/database/xger.h new file mode 100644 index 00000000..c9cfb6cd --- /dev/null +++ b/include/internal/database/xger.h @@ -0,0 +1,88 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Database generator +// +// This file populates the database with best-found tuning parameters for the 'Xger' kernels. +// +// ================================================================================================= + +namespace clblast { +// ================================================================================================= + +const Database::DatabaseEntry Database::XgerSingle = { + "Xger", Precision::kSingle, { + { // Intel CPUs + kDeviceTypeCPU, "Intel", { + { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"WGS1",128}, {"WGS2",2}, {"WPT",4} } }, + { "default", { {"WGS1",128}, {"WGS2",2}, {"WPT",4} } }, + } + }, + { // Default + kDeviceTypeAll, "default", { + { "default", { {"WGS1",128}, {"WGS2",2}, {"WPT",4} } }, + } + }, + } +}; + +// ================================================================================================= + +const Database::DatabaseEntry Database::XgerComplexSingle = { + "Xger", Precision::kComplexSingle, { + { // Intel CPUs + kDeviceTypeCPU, "Intel", { + { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"WGS1",512}, {"WGS2",8}, {"WPT",2} } }, + { "default", { {"WGS1",512}, {"WGS2",8}, {"WPT",2} } }, + } + }, + { // Default + kDeviceTypeAll, "default", { + { "default", { {"WGS1",512}, {"WGS2",8}, {"WPT",2} } }, + } + }, + } +}; + +// ================================================================================================= + +const Database::DatabaseEntry Database::XgerDouble = { + "Xger", Precision::kDouble, { + { // Intel CPUs + kDeviceTypeCPU, "Intel", { + { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"WGS1",512}, {"WGS2",16}, {"WPT",1} } }, + { "default", { {"WGS1",512}, {"WGS2",16}, {"WPT",1} } }, + } + }, + { // Default + kDeviceTypeAll, "default", { + { "default", { {"WGS1",512}, {"WGS2",16}, {"WPT",1} } }, + } + }, + } +}; + +// ================================================================================================= + +const Database::DatabaseEntry Database::XgerComplexDouble = { + "Xger", Precision::kComplexDouble, { + { // Intel CPUs + kDeviceTypeCPU, "Intel", { + { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"WGS1",512}, {"WGS2",1}, {"WPT",1} } }, + { "default", { {"WGS1",512}, {"WGS2",1}, {"WPT",1} } }, + } + }, + { // Default + kDeviceTypeAll, "default", { + { "default", { {"WGS1",512}, {"WGS2",1}, {"WPT",1} } }, + } + }, + } +}; + +// ================================================================================================= +} // namespace clblast diff --git a/include/internal/routines/level2/xger.h b/include/internal/routines/level2/xger.h new file mode 100644 index 00000000..45ecea10 --- /dev/null +++ b/include/internal/routines/level2/xger.h @@ -0,0 +1,58 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren +// +// This file implements the Xger routine. The precision is implemented using a template argument. +// +// ================================================================================================= + +#ifndef CLBLAST_ROUTINES_XGER_H_ +#define CLBLAST_ROUTINES_XGER_H_ + +#include "internal/routine.h" + +namespace clblast { +// ================================================================================================= + +// See comment at top of file for a description of the class +template +class Xger: public Routine { + public: + + // Members and methods from the base class + using Routine::db_; + using Routine::source_string_; + using Routine::queue_; + using Routine::GetProgramFromCache; + using Routine::TestVectorX; + using Routine::TestVectorY; + using Routine::TestMatrixA; + using Routine::RunKernel; + using Routine::ErrorIn; + + // Constructor + Xger(Queue &queue, Event &event, const std::string &name = "GER"); + + // Templated-precision implementation of the routine + StatusCode DoGer(const Layout layout, + const size_t m, const size_t n, + const T alpha, + const Buffer &x_buffer, const size_t x_offset, const size_t x_inc, + const Buffer &y_buffer, const size_t y_offset, const size_t y_inc, + const Buffer &a_buffer, const size_t a_offset, const size_t a_ld); + + private: + // Static variable to get the precision + const static Precision precision_; +}; + +// ================================================================================================= +} // namespace clblast + +// CLBLAST_ROUTINES_XGER_H_ +#endif diff --git a/scripts/database/database.py b/scripts/database/database.py index f2d47717..6f3ce85e 100644 --- a/scripts/database/database.py +++ b/scripts/database/database.py @@ -281,7 +281,7 @@ if len(glob.glob(glob_json)) >= 1: SaveDatabase(database, file_db) # Retrieves the best performing results -print("## Calculting the best results per device/kernel...") +print("## Calculating the best results per device/kernel...") bests = GetBestResults(database) # Determines the defaults for other vendors and per vendor diff --git a/scripts/generator/generator.py b/scripts/generator/generator.py index 25f02861..6304f112 100644 --- a/scripts/generator/generator.py +++ b/scripts/generator/generator.py @@ -78,7 +78,7 @@ routines = [ Routine(False, "2a", "tbsv", T, [S,D,C,Z], ["n","k"], ["layout","triangle","a_transpose","diagonal"], ["a"], ["x"], [], False, "Solves a banded triangular system of equations"), Routine(False, "2a", "tpsv", T, [S,D,C,Z], ["n"], ["layout","triangle","a_transpose","diagonal"], ["ap"], ["x"], [], False, "Solves a packed triangular system of equations"), # Level 2: matrix update - Routine(False, "2b", "ger", T, [S,D], ["m","n"], ["layout"], ["x","y"], ["a"], ["alpha"], False, "General rank-1 matrix update"), + Routine(True, "2b", "ger", T, [S,D], ["m","n"], ["layout"], ["x","y"], ["a"], ["alpha"], False, "General rank-1 matrix update"), Routine(False, "2b", "geru", T, [C,Z], ["m","n"], ["layout"], ["x","y"], ["a"], ["alpha"], False, "General rank-1 complex matrix update"), Routine(False, "2b", "gerc", T, [C,Z], ["m","n"], ["layout"], ["x","y"], ["a"], ["alpha"], False, "General rank-1 complex conjugated matrix update"), Routine(False, "2b", "her", Tc, [Css,Zdd], ["n"], ["layout","triangle"], ["x"], ["a"], ["alpha"], False, "Hermitian rank-1 matrix update"), diff --git a/src/clblast.cc b/src/clblast.cc index 77999aaf..aed3f141 100644 --- a/src/clblast.cc +++ b/src/clblast.cc @@ -38,6 +38,7 @@ #include "internal/routines/level2/xtrmv.h" #include "internal/routines/level2/xtbmv.h" #include "internal/routines/level2/xtpmv.h" +#include "internal/routines/level2/xger.h" // BLAS level-3 includes #include "internal/routines/level3/xgemm.h" @@ -835,14 +836,24 @@ template StatusCode Tpsv(const Layout, const Triangle, const Transpose, // General rank-1 matrix update: SGER/DGER template -StatusCode Ger(const Layout, - const size_t, const size_t, - const T, - const cl_mem, const size_t, const size_t, - const cl_mem, const size_t, const size_t, - cl_mem, const size_t, const size_t, - cl_command_queue*, cl_event*) { - return StatusCode::kNotImplemented; +StatusCode Ger(const Layout layout, + const size_t m, const size_t n, + const T alpha, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, + cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + cl_command_queue* queue, cl_event* event) { + auto queue_cpp = Queue(*queue); + auto event_cpp = Event(*event); + auto routine = Xger(queue_cpp, event_cpp); + auto status = routine.SetUp(); + if (status != StatusCode::kSuccess) { return status; } + return routine.DoGer(layout, + m, n, + alpha, + Buffer(x_buffer), x_offset, x_inc, + Buffer(y_buffer), y_offset, y_inc, + Buffer(a_buffer), a_offset, a_ld); } template StatusCode Ger(const Layout, const size_t, const size_t, diff --git a/src/database.cc b/src/database.cc index ba0a56d9..addd85d3 100644 --- a/src/database.cc +++ b/src/database.cc @@ -15,6 +15,7 @@ #include "internal/database/xaxpy.h" #include "internal/database/xdot.h" #include "internal/database/xgemv.h" +#include "internal/database/xger.h" #include "internal/database/xgemm.h" #include "internal/database/copy.h" #include "internal/database/pad.h" @@ -31,6 +32,7 @@ const std::vector Database::database = { XaxpySingle, XaxpyDouble, XaxpyComplexSingle, XaxpyComplexDouble, XdotSingle, XdotDouble, XdotComplexSingle, XdotComplexDouble, XgemvSingle, XgemvDouble, XgemvComplexSingle, XgemvComplexDouble, + XgerSingle, XgerDouble, XgerComplexSingle, XgerComplexDouble, XgemmSingle, XgemmDouble, XgemmComplexSingle, XgemmComplexDouble, CopySingle, CopyDouble, CopyComplexSingle, CopyComplexDouble, PadSingle, PadDouble, PadComplexSingle, PadComplexDouble, diff --git a/src/kernels/common.opencl b/src/kernels/common.opencl index f2a2e7a7..973c123e 100644 --- a/src/kernels/common.opencl +++ b/src/kernels/common.opencl @@ -147,6 +147,13 @@ R"( #define AXPBY(e, a, b, c, d) e = a*b + c*d #endif +// The scalar GER function +#if PRECISION == 3232 || PRECISION == 6464 + #define GER(e, a, b, c, d) real ab; ab.x = MulReal(a,b); ab.y = MulImag(a,b); e.x = MulReal(ab,c) + d.x; e.y = MulImag(ab,c) + d.y +#else + #define GER(e, a, b, c, d) e = a*b*c + d +#endif + // The complex conjugate operation for complex transforms #if PRECISION == 3232 || PRECISION == 6464 #define COMPLEX_CONJUGATE(value) value.x = value.x; value.y = -value.y diff --git a/src/kernels/level2/xger.opencl b/src/kernels/level2/xger.opencl new file mode 100644 index 00000000..aa765b6c --- /dev/null +++ b/src/kernels/level2/xger.opencl @@ -0,0 +1,149 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren +// +// This file contains the Xger kernel (generic version) for rank-1 matrix update. +// +// ================================================================================================= + +// Enables loading of this file using the C++ pre-processor's #include (C++11 standard raw string +// literal). Comment-out this line for syntax-highlighting when developing. +R"( + +// ================================================================================================= + +// Parameters set by the tuner or by the database. Here they are given a basic default value in case +// this kernel file is used outside of the CLBlast library. + +#ifndef WGS1 + #define WGS1 8 // The local work-group size in first dimension +#endif +#ifndef WGS2 + #define WGS2 8 // The local work-group size in second dimension +#endif +#ifndef WPT + #define WPT 1 // The amount of work-per-thread in both dimensions +#endif + +// ================================================================================================= + +// Row-major version of the kernel +__attribute__((reqd_work_group_size(WGS1, WGS2, 1))) +__kernel void Xger(const int max_one, const int max_two, const real alpha, + const __global real* restrict xgm, const int x_offset, const int x_inc, + const __global real* ygm, const int y_offset, const int y_inc, + __global real* restrict agm, const int a_offset, const int a_ld, + const int is_rowmajor) { + + // Register storage for X and Y + real xvalues[WPT]; + real yvalues[WPT]; + + // Row-major version + if (is_rowmajor) { + + // Loads the X-vector + #pragma unroll + for (int w=0; w +// +// This file implements the Xger class (see the header for information about the class). +// +// ================================================================================================= + +#include "internal/routines/level2/xger.h" + +#include +#include + +namespace clblast { +// ================================================================================================= + +// Specific implementations to get the memory-type based on a template argument +template <> const Precision Xger::precision_ = Precision::kSingle; +template <> const Precision Xger::precision_ = Precision::kDouble; + +// ================================================================================================= + +// Constructor: forwards to base class constructor +template +Xger::Xger(Queue &queue, Event &event, const std::string &name): + Routine(queue, event, name, {"Xger"}, precision_) { + source_string_ = + #include "../../kernels/level2/xger.opencl" + ; +} + +// ================================================================================================= + +// The main routine +template +StatusCode Xger::DoGer(const Layout layout, + const size_t m, const size_t n, + const T alpha, + const Buffer &x_buffer, const size_t x_offset, const size_t x_inc, + const Buffer &y_buffer, const size_t y_offset, const size_t y_inc, + const Buffer &a_buffer, const size_t a_offset, const size_t a_ld) { + + // Makes sure all dimensions are larger than zero + if (m == 0 || n == 0) { return StatusCode::kInvalidDimension; } + + // Computes whether or not the matrix has an alternative layout (row or column-major). + const auto a_is_rowmajor = (layout == Layout::kRowMajor); + const auto a_one = (a_is_rowmajor) ? n : m; + const auto a_two = (a_is_rowmajor) ? m : n; + + // Tests the matrix and the vectors for validity + auto status = TestMatrixA(a_one, a_two, a_buffer, a_offset, a_ld, sizeof(T)); + if (ErrorIn(status)) { return status; } + status = TestVectorX(m, x_buffer, x_offset, x_inc, sizeof(T)); + if (ErrorIn(status)) { return status; } + status = TestVectorY(n, y_buffer, y_offset, y_inc, sizeof(T)); + if (ErrorIn(status)) { return status; } + + // Retrieves the Xgemv kernel from the compiled binary + try { + auto& program = GetProgramFromCache(); + auto kernel = Kernel(program, "Xger"); + + // Sets the kernel arguments + kernel.SetArgument(0, static_cast(a_one)); + kernel.SetArgument(1, static_cast(a_two)); + kernel.SetArgument(2, alpha); + kernel.SetArgument(3, x_buffer()); + kernel.SetArgument(4, static_cast(x_offset)); + kernel.SetArgument(5, static_cast(x_inc)); + kernel.SetArgument(6, y_buffer()); + kernel.SetArgument(7, static_cast(y_offset)); + kernel.SetArgument(8, static_cast(y_inc)); + kernel.SetArgument(9, a_buffer()); + kernel.SetArgument(10, static_cast(a_offset)); + kernel.SetArgument(11, static_cast(a_ld)); + kernel.SetArgument(12, static_cast(a_is_rowmajor)); + + // Launches the kernel + auto a_one_ceiled = CeilDiv(Ceil(a_one, db_["WGS1"]), db_["WPT"]); + auto a_two_ceiled = CeilDiv(Ceil(a_two, db_["WGS2"]), db_["WPT"]); + auto global = std::vector{a_one_ceiled, a_two_ceiled}; + auto local = std::vector{db_["WGS1"], db_["WGS2"]}; + status = RunKernel(kernel, global, local); + if (ErrorIn(status)) { return status; } + + // Waits for all kernels to finish + queue_.Finish(); + + // Succesfully finished the computation + return StatusCode::kSuccess; + } catch (...) { return StatusCode::kInvalidKernel; } +} + +// ================================================================================================= + +// Compiles the templated class +template class Xger; +template class Xger; + +// ================================================================================================= +} // namespace clblast diff --git a/src/tuning/xger.cc b/src/tuning/xger.cc new file mode 100644 index 00000000..a47f5a78 --- /dev/null +++ b/src/tuning/xger.cc @@ -0,0 +1,128 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren +// +// This file uses the CLTune auto-tuner to tune the xger OpenCL kernels. +// +// ================================================================================================= + +#include +#include + +#include "internal/utilities.h" +#include "internal/tuning.h" + +namespace clblast { +// ================================================================================================= + +// See comment at top of file for a description of the class +template +class TuneXger { + public: + + // The representative kernel and the source code + static std::string KernelFamily() { return "xger"; } + static std::string KernelName() { return "Xger"; } + static std::string GetSources() { + return + #include "../src/kernels/common.opencl" + #include "../src/kernels/level2/xger.opencl" + ; + } + + // The list of arguments relevant for this routine + static std::vector GetOptions() { return {kArgN, kArgM, kArgAlpha}; } + + // Tests for valid arguments + static void TestValidArguments(const Arguments &) { } + + // Sets the default values for the arguments + static size_t DefaultM() { return 1024; } + static size_t DefaultN() { return 1024; } + static size_t DefaultK() { return 1; } // N/A for this kernel + static double DefaultFraction() { return 1.0; } // N/A for this kernel + + // Describes how to obtain the sizes of the buffers + static size_t GetSizeX(const Arguments &args) { return args.m; } + static size_t GetSizeY(const Arguments &args) { return args.n; } + static size_t GetSizeA(const Arguments &args) { return args.m * args.n; } + static size_t GetSizeB(const Arguments &) { return 1; } // N/A for this kernel + static size_t GetSizeC(const Arguments &) { return 1; } // N/A for this kernel + static size_t GetSizeTemp(const Arguments &) { return 1; } // N/A for this kernel + + // Sets the tuning parameters and their possible values + static void SetParameters(cltune::Tuner &tuner, const size_t id) { + tuner.AddParameter(id, "WGS1", {4, 8, 16, 32, 64, 128, 256, 512}); + tuner.AddParameter(id, "WGS2", {1, 2, 4, 8, 16, 32, 64, 128, 256}); + tuner.AddParameter(id, "WPT", {1, 2, 4}); + } + + // Sets the constraints and local memory size + static void SetConstraints(cltune::Tuner &, const size_t) { } + static void SetLocalMemorySize(cltune::Tuner &, const size_t, const Arguments &) { } + + // Sets the base thread configuration + static std::vector GlobalSize(const Arguments &args) { return {args.m, args.n}; } + static std::vector GlobalSizeRef(const Arguments &args) { return GlobalSize(args); } + static std::vector LocalSize() { return {1, 1}; } + static std::vector LocalSizeRef() { return {8, 8}; } + + // Transforms the thread configuration based on the parameters + using TransformVector = std::vector>; + static TransformVector MulLocal() { return {{"WGS1", "WGS2"}}; } + static TransformVector DivLocal() { return {}; } + static TransformVector MulGlobal() { return {}; } + static TransformVector DivGlobal() { return {{"WPT", "WPT"}}; } + + // Sets the kernel's arguments + static void SetArguments(cltune::Tuner &tuner, const Arguments &args, + std::vector &x_vec, std::vector &y_vec, + std::vector &a_mat, std::vector &, std::vector &, + std::vector &) { + tuner.AddArgumentScalar(static_cast(args.m)); + tuner.AddArgumentScalar(static_cast(args.n)); + tuner.AddArgumentScalar(args.alpha); + tuner.AddArgumentInput(x_vec); + tuner.AddArgumentScalar(0); // x_offset + tuner.AddArgumentScalar(1); // x_increment + tuner.AddArgumentInput(y_vec); + tuner.AddArgumentScalar(0); // y_offset + tuner.AddArgumentScalar(1); // y_increment + tuner.AddArgumentOutput(a_mat); + tuner.AddArgumentScalar(0); // a_offset + tuner.AddArgumentScalar(static_cast(args.m)); // a_ld + tuner.AddArgumentScalar(0); // a_is_rowmajor + } + + // Describes how to compute the performance metrics + static size_t GetMetric(const Arguments &args) { + return (2*args.m*args.n + args.m + args.n) * GetBytes(args.precision); + } + static std::string PerformanceUnit() { return "GB/s"; } +}; + +// ================================================================================================= +} // namespace clblast + +// Shortcuts to the clblast namespace +using float2 = clblast::float2; +using double2 = clblast::double2; + +// Main function (not within the clblast namespace) +int main(int argc, char *argv[]) { + switch(clblast::GetPrecision(argc, argv)) { + case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode"); + case clblast::Precision::kSingle: clblast::Tuner, float>(argc, argv); break; + case clblast::Precision::kDouble: clblast::Tuner, double>(argc, argv); break; + case clblast::Precision::kComplexSingle: clblast::Tuner, float2>(argc, argv); break; + case clblast::Precision::kComplexDouble: clblast::Tuner, double2>(argc, argv); break; + } + return 0; +} + +// ================================================================================================= diff --git a/test/routines/level2/xgemv.h b/test/routines/level2/xgemv.h index 927bfaee..2924d498 100644 --- a/test/routines/level2/xgemv.h +++ b/test/routines/level2/xgemv.h @@ -35,7 +35,7 @@ class TestXgemv { // The list of arguments relevant for this routine static std::vector GetOptions() { return {kArgM, kArgN, - kArgLayout, kArgATransp, + kArgLayout, kArgATransp, kArgALeadDim, kArgXInc, kArgYInc, kArgAOffset, kArgXOffset, kArgYOffset, kArgAlpha, kArgBeta}; diff --git a/test/routines/level2/xger.h b/test/routines/level2/xger.h new file mode 100644 index 00000000..98296e92 --- /dev/null +++ b/test/routines/level2/xger.h @@ -0,0 +1,131 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren +// +// This file implements a class with static methods to describe the Xger routine. Examples of +// such 'descriptions' are how to calculate the size a of buffer or how to run the routine. These +// static methods are used by the correctness tester and the performance tester. +// +// ================================================================================================= + +#ifndef CLBLAST_TEST_ROUTINES_XGER_H_ +#define CLBLAST_TEST_ROUTINES_XGER_H_ + +#include +#include + +#include "wrapper_clblas.h" + +namespace clblast { +// ================================================================================================= + +// See comment at top of file for a description of the class +template +class TestXger { + public: + + // The BLAS level: 1, 2, or 3 + static size_t BLASLevel() { return 2; } + + // The list of arguments relevant for this routine + static std::vector GetOptions() { + return {kArgM, kArgN, + kArgLayout, + kArgALeadDim, kArgXInc, kArgYInc, + kArgAOffset, kArgXOffset, kArgYOffset, + kArgAlpha}; + } + + // Describes how to obtain the sizes of the buffers + static size_t GetSizeX(const Arguments &args) { + return args.m * args.x_inc + args.x_offset; + } + static size_t GetSizeY(const Arguments &args) { + return args.n * args.y_inc + args.y_offset; + } + static size_t GetSizeA(const Arguments &args) { + auto a_rotated = (args.layout == Layout::kRowMajor); + auto a_two = (a_rotated) ? args.m : args.n; + return a_two * args.a_ld + args.a_offset; + } + + // Describes how to set the sizes of all the buffers + static void SetSizes(Arguments &args) { + args.a_size = GetSizeA(args); + args.x_size = GetSizeX(args); + args.y_size = GetSizeY(args); + } + + // Describes what the default values of the leading dimensions of the matrices are + static size_t DefaultLDA(const Arguments &args) { return args.n; } + static size_t DefaultLDB(const Arguments &) { return 1; } // N/A for this routine + static size_t DefaultLDC(const Arguments &) { return 1; } // N/A for this routine + + // Describes which transpose options are relevant for this routine + using Transposes = std::vector; + static Transposes GetATransposes(const Transposes &) { return {}; } // N/A for this routine + static Transposes GetBTransposes(const Transposes &) { return {}; } // N/A for this routine + + // Describes how to run the CLBlast routine + static StatusCode RunRoutine(const Arguments &args, const Buffers &buffers, Queue &queue) { + auto queue_plain = queue(); + auto event = cl_event{}; + auto status = Ger(args.layout, + args.m, args.n, args.alpha, + buffers.x_vec(), args.x_offset, args.x_inc, + buffers.y_vec(), args.y_offset, args.y_inc, + buffers.a_mat(), args.a_offset, args.a_ld, + &queue_plain, &event); + clWaitForEvents(1, &event); + return status; + } + + // Describes how to run the clBLAS routine (for correctness/performance comparison) + static StatusCode RunReference(const Arguments &args, const Buffers &buffers, Queue &queue) { + auto queue_plain = queue(); + auto event = cl_event{}; + auto status = clblasXger(static_cast(args.layout), + args.m, args.n, args.alpha, + buffers.x_vec(), args.x_offset, args.x_inc, + buffers.y_vec(), args.y_offset, args.y_inc, + buffers.a_mat(), args.a_offset, args.a_ld, + 1, &queue_plain, 0, nullptr, &event); + clWaitForEvents(1, &event); + return static_cast(status); + } + + // Describes how to download the results of the computation (more importantly: which buffer) + static std::vector DownloadResult(const Arguments &args, Buffers &buffers, Queue &queue) { + std::vector result(args.a_size, static_cast(0)); + buffers.a_mat.Read(queue, args.a_size, result); + return result; + } + + // Describes how to compute the indices of the result buffer + static size_t ResultID1(const Arguments &args) { return args.m; } + static size_t ResultID2(const Arguments &args) { return args.n; } + static size_t GetResultIndex(const Arguments &args, const size_t id1, const size_t id2) { + return (args.layout == Layout::kRowMajor) ? + id1*args.a_ld + id2 + args.a_offset: + id2*args.a_ld + id1 + args.a_offset; + } + + // Describes how to compute performance metrics + static size_t GetFlops(const Arguments &args) { + return 2 * args.m * args.n; + } + static size_t GetBytes(const Arguments &args) { + return (2*args.m*args.n + args.m + args.n) * sizeof(T); + } +}; + +// ================================================================================================= +} // namespace clblast + +// CLBLAST_TEST_ROUTINES_XGER_H_ +#endif From 6dc44da07bc0209a399a3e40300aa859e41034d9 Mon Sep 17 00:00:00 2001 From: Cedric Nugteren Date: Sat, 20 Feb 2016 14:15:41 +0100 Subject: [PATCH 31/50] Added support for xGERU and xGERC routines --- CMakeLists.txt | 3 +- README.md | 8 +- include/internal/routines/level2/xgerc.h | 46 ++++++++ include/internal/routines/level2/xgeru.h | 46 ++++++++ scripts/generator/generator.py | 4 +- src/clblast.cc | 54 +++++++--- src/kernels/level2/xger.opencl | 6 ++ src/routines/level1/xdotu.cc | 1 - src/routines/level2/xger.cc | 4 + src/routines/level2/xgerc.cc | 53 +++++++++ src/routines/level2/xgeru.cc | 52 +++++++++ test/routines/level2/xgerc.h | 131 +++++++++++++++++++++++ test/routines/level2/xgeru.h | 131 +++++++++++++++++++++++ 13 files changed, 515 insertions(+), 24 deletions(-) create mode 100644 include/internal/routines/level2/xgerc.h create mode 100644 include/internal/routines/level2/xgeru.h create mode 100644 src/routines/level2/xgerc.cc create mode 100644 src/routines/level2/xgeru.cc create mode 100644 test/routines/level2/xgerc.h create mode 100644 test/routines/level2/xgeru.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 8e3313d9..342b72e5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -111,7 +111,8 @@ set(KERNELS copy pad transpose padtranspose xaxpy xdot xger xgemm xgemv) set(SAMPLE_PROGRAMS_CPP sgemm) set(SAMPLE_PROGRAMS_C sgemm) set(LEVEL1_ROUTINES xswap xscal xcopy xaxpy xdot xdotu xdotc) -set(LEVEL2_ROUTINES xgemv xgbmv xhemv xhbmv xhpmv xsymv xsbmv xspmv xtrmv xtbmv xtpmv xger) +set(LEVEL2_ROUTINES xgemv xgbmv xhemv xhbmv xhpmv xsymv xsbmv xspmv xtrmv xtbmv xtpmv + xger xgeru xgerc) set(LEVEL3_ROUTINES xgemm xsymm xhemm xsyrk xherk xsyr2k xher2k xtrmm) set(ROUTINES ${LEVEL1_ROUTINES} ${LEVEL2_ROUTINES} ${LEVEL3_ROUTINES}) set(PRECISIONS 32 64 3232 6464) diff --git a/README.md b/README.md index 2add9798..ae228836 100644 --- a/README.md +++ b/README.md @@ -155,7 +155,7 @@ These graphs can be generated automatically on your own device. First, compile C Supported routines ------------- -CLBlast is in active development but already supports the majority of BLAS routines. The currently supported routines are marked with '✔' in the following tables: +CLBlast is in active development but already supports almost all the BLAS routines. The currently supported routines are marked with '✔' in the following tables: | Level-1 | S | D | C | Z | Notes | | ---------|---|---|---|---|---------| @@ -191,9 +191,9 @@ CLBlast is in active development but already supports the majority of BLAS routi | xTRSV | | | | | | | xTBSV | | | | | | | xTPSV | | | | | | -| xGER | | | - | - | | -| xGERU | - | - | | | | -| xGERC | - | - | | | | +| xGER | ✔ | ✔ | - | - | | +| xGERU | - | - | ✔ | ✔ | | +| xGERC | - | - | ✔ | ✔ | | | xHER | - | - | | | | | xHPR | - | - | | | | | xHER2 | - | - | | | | diff --git a/include/internal/routines/level2/xgerc.h b/include/internal/routines/level2/xgerc.h new file mode 100644 index 00000000..8e515a14 --- /dev/null +++ b/include/internal/routines/level2/xgerc.h @@ -0,0 +1,46 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren +// +// This file implements the Xgerc routine. The precision is implemented using a template argument. +// +// ================================================================================================= + +#ifndef CLBLAST_ROUTINES_XGERC_H_ +#define CLBLAST_ROUTINES_XGERC_H_ + +#include "internal/routines/level2/xger.h" + +namespace clblast { +// ================================================================================================= + +// See comment at top of file for a description of the class +template +class Xgerc: public Xger { + public: + + // Uses the regular Xger routine + using Xger::DoGer; + + // Constructor + Xgerc(Queue &queue, Event &event, const std::string &name = "GERC"); + + // Templated-precision implementation of the routine + StatusCode DoGerc(const Layout layout, + const size_t m, const size_t n, + const T alpha, + const Buffer &x_buffer, const size_t x_offset, const size_t x_inc, + const Buffer &y_buffer, const size_t y_offset, const size_t y_inc, + const Buffer &a_buffer, const size_t a_offset, const size_t a_ld); +}; + +// ================================================================================================= +} // namespace clblast + +// CLBLAST_ROUTINES_XGERC_H_ +#endif diff --git a/include/internal/routines/level2/xgeru.h b/include/internal/routines/level2/xgeru.h new file mode 100644 index 00000000..ec485c37 --- /dev/null +++ b/include/internal/routines/level2/xgeru.h @@ -0,0 +1,46 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren +// +// This file implements the Xgeru routine. The precision is implemented using a template argument. +// +// ================================================================================================= + +#ifndef CLBLAST_ROUTINES_XGERU_H_ +#define CLBLAST_ROUTINES_XGERU_H_ + +#include "internal/routines/level2/xger.h" + +namespace clblast { +// ================================================================================================= + +// See comment at top of file for a description of the class +template +class Xgeru: public Xger { + public: + + // Uses the regular Xger routine + using Xger::DoGer; + + // Constructor + Xgeru(Queue &queue, Event &event, const std::string &name = "GERU"); + + // Templated-precision implementation of the routine + StatusCode DoGeru(const Layout layout, + const size_t m, const size_t n, + const T alpha, + const Buffer &x_buffer, const size_t x_offset, const size_t x_inc, + const Buffer &y_buffer, const size_t y_offset, const size_t y_inc, + const Buffer &a_buffer, const size_t a_offset, const size_t a_ld); +}; + +// ================================================================================================= +} // namespace clblast + +// CLBLAST_ROUTINES_XGERU_H_ +#endif diff --git a/scripts/generator/generator.py b/scripts/generator/generator.py index 6304f112..93ff8680 100644 --- a/scripts/generator/generator.py +++ b/scripts/generator/generator.py @@ -79,8 +79,8 @@ routines = [ Routine(False, "2a", "tpsv", T, [S,D,C,Z], ["n"], ["layout","triangle","a_transpose","diagonal"], ["ap"], ["x"], [], False, "Solves a packed triangular system of equations"), # Level 2: matrix update Routine(True, "2b", "ger", T, [S,D], ["m","n"], ["layout"], ["x","y"], ["a"], ["alpha"], False, "General rank-1 matrix update"), - Routine(False, "2b", "geru", T, [C,Z], ["m","n"], ["layout"], ["x","y"], ["a"], ["alpha"], False, "General rank-1 complex matrix update"), - Routine(False, "2b", "gerc", T, [C,Z], ["m","n"], ["layout"], ["x","y"], ["a"], ["alpha"], False, "General rank-1 complex conjugated matrix update"), + Routine(True, "2b", "geru", T, [C,Z], ["m","n"], ["layout"], ["x","y"], ["a"], ["alpha"], False, "General rank-1 complex matrix update"), + Routine(True, "2b", "gerc", T, [C,Z], ["m","n"], ["layout"], ["x","y"], ["a"], ["alpha"], False, "General rank-1 complex conjugated matrix update"), Routine(False, "2b", "her", Tc, [Css,Zdd], ["n"], ["layout","triangle"], ["x"], ["a"], ["alpha"], False, "Hermitian rank-1 matrix update"), Routine(False, "2b", "hpr", Tc, [Css,Zdd], ["n"], ["layout","triangle"], ["x"], ["ap"], ["alpha"], False, "Hermitian packed rank-1 matrix update"), Routine(False, "2b", "her2", T, [C,Z], ["n"], ["layout","triangle"], ["x","y"], ["a"], ["alpha"], False, "Hermitian rank-2 matrix update"), diff --git a/src/clblast.cc b/src/clblast.cc index aed3f141..47f2c59d 100644 --- a/src/clblast.cc +++ b/src/clblast.cc @@ -39,6 +39,8 @@ #include "internal/routines/level2/xtbmv.h" #include "internal/routines/level2/xtpmv.h" #include "internal/routines/level2/xger.h" +#include "internal/routines/level2/xgeru.h" +#include "internal/routines/level2/xgerc.h" // BLAS level-3 includes #include "internal/routines/level3/xgemm.h" @@ -872,14 +874,24 @@ template StatusCode Ger(const Layout, // General rank-1 complex matrix update: CGERU/ZGERU template -StatusCode Geru(const Layout, - const size_t, const size_t, - const T, - const cl_mem, const size_t, const size_t, - const cl_mem, const size_t, const size_t, - cl_mem, const size_t, const size_t, - cl_command_queue*, cl_event*) { - return StatusCode::kNotImplemented; +StatusCode Geru(const Layout layout, + const size_t m, const size_t n, + const T alpha, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, + cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + cl_command_queue* queue, cl_event* event) { + auto queue_cpp = Queue(*queue); + auto event_cpp = Event(*event); + auto routine = Xgeru(queue_cpp, event_cpp); + auto status = routine.SetUp(); + if (status != StatusCode::kSuccess) { return status; } + return routine.DoGeru(layout, + m, n, + alpha, + Buffer(x_buffer), x_offset, x_inc, + Buffer(y_buffer), y_offset, y_inc, + Buffer(a_buffer), a_offset, a_ld); } template StatusCode Geru(const Layout, const size_t, const size_t, @@ -898,14 +910,24 @@ template StatusCode Geru(const Layout, // General rank-1 complex conjugated matrix update: CGERC/ZGERC template -StatusCode Gerc(const Layout, - const size_t, const size_t, - const T, - const cl_mem, const size_t, const size_t, - const cl_mem, const size_t, const size_t, - cl_mem, const size_t, const size_t, - cl_command_queue*, cl_event*) { - return StatusCode::kNotImplemented; +StatusCode Gerc(const Layout layout, + const size_t m, const size_t n, + const T alpha, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, + cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + cl_command_queue* queue, cl_event* event) { + auto queue_cpp = Queue(*queue); + auto event_cpp = Event(*event); + auto routine = Xgerc(queue_cpp, event_cpp); + auto status = routine.SetUp(); + if (status != StatusCode::kSuccess) { return status; } + return routine.DoGerc(layout, + m, n, + alpha, + Buffer(x_buffer), x_offset, x_inc, + Buffer(y_buffer), y_offset, y_inc, + Buffer(a_buffer), a_offset, a_ld); } template StatusCode Gerc(const Layout, const size_t, const size_t, diff --git a/src/kernels/level2/xger.opencl b/src/kernels/level2/xger.opencl index aa765b6c..c1cddea1 100644 --- a/src/kernels/level2/xger.opencl +++ b/src/kernels/level2/xger.opencl @@ -62,6 +62,9 @@ __kernel void Xger(const int max_one, const int max_two, const real alpha, const int id1 = w*get_global_size(0) + get_global_id(0); if (id1 < max_one) { yvalues[w] = ygm[id1*y_inc + y_offset]; + #if defined(ROUTINE_GERC) + COMPLEX_CONJUGATE(yvalues[w]); + #endif } } @@ -110,6 +113,9 @@ __kernel void Xger(const int max_one, const int max_two, const real alpha, const int id2 = w*get_global_size(1) + get_global_id(1); if (id2 < max_two) { yvalues[w] = ygm[id2*y_inc + y_offset]; + #if defined(ROUTINE_GERC) + COMPLEX_CONJUGATE(yvalues[w]); + #endif } } diff --git a/src/routines/level1/xdotu.cc b/src/routines/level1/xdotu.cc index 0b1bd2a8..28d9b730 100644 --- a/src/routines/level1/xdotu.cc +++ b/src/routines/level1/xdotu.cc @@ -14,7 +14,6 @@ #include "internal/routines/level1/xdotu.h" #include -#include namespace clblast { // ================================================================================================= diff --git a/src/routines/level2/xger.cc b/src/routines/level2/xger.cc index c3a24264..55fa26d4 100644 --- a/src/routines/level2/xger.cc +++ b/src/routines/level2/xger.cc @@ -22,6 +22,8 @@ namespace clblast { // Specific implementations to get the memory-type based on a template argument template <> const Precision Xger::precision_ = Precision::kSingle; template <> const Precision Xger::precision_ = Precision::kDouble; +template <> const Precision Xger::precision_ = Precision::kComplexSingle; +template <> const Precision Xger::precision_ = Precision::kComplexDouble; // ================================================================================================= @@ -102,6 +104,8 @@ StatusCode Xger::DoGer(const Layout layout, // Compiles the templated class template class Xger; template class Xger; +template class Xger; +template class Xger; // ================================================================================================= } // namespace clblast diff --git a/src/routines/level2/xgerc.cc b/src/routines/level2/xgerc.cc new file mode 100644 index 00000000..09408898 --- /dev/null +++ b/src/routines/level2/xgerc.cc @@ -0,0 +1,53 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren +// +// This file implements the Xgerc class (see the header for information about the class). +// +// ================================================================================================= + +#include "internal/routines/level2/xgerc.h" + +#include + +namespace clblast { +// ================================================================================================= + +// Constructor: forwards to base class constructor +template +Xgerc::Xgerc(Queue &queue, Event &event, const std::string &name): + Xger(queue, event, name) { +} + +// ================================================================================================= + +// The main routine +template +StatusCode Xgerc::DoGerc(const Layout layout, + const size_t m, const size_t n, + const T alpha, + const Buffer &x_buffer, const size_t x_offset, const size_t x_inc, + const Buffer &y_buffer, const size_t y_offset, const size_t y_inc, + const Buffer &a_buffer, const size_t a_offset, const size_t a_ld) { + + // Regular Ger operation on complex data, plus conjugation in the kernel guarded by the + // ROUTINE_GERC guard. + return DoGer(layout, m, n, alpha, + x_buffer, x_offset, x_inc, + y_buffer, y_offset, y_inc, + a_buffer, a_offset, a_ld); +} + +// ================================================================================================= + +// Compiles the templated class +template class Xgerc; +template class Xgerc; + +// ================================================================================================= +} // namespace clblast diff --git a/src/routines/level2/xgeru.cc b/src/routines/level2/xgeru.cc new file mode 100644 index 00000000..36fd9d0a --- /dev/null +++ b/src/routines/level2/xgeru.cc @@ -0,0 +1,52 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren +// +// This file implements the Xgeru class (see the header for information about the class). +// +// ================================================================================================= + +#include "internal/routines/level2/xgeru.h" + +#include + +namespace clblast { +// ================================================================================================= + +// Constructor: forwards to base class constructor +template +Xgeru::Xgeru(Queue &queue, Event &event, const std::string &name): + Xger(queue, event, name) { +} + +// ================================================================================================= + +// The main routine +template +StatusCode Xgeru::DoGeru(const Layout layout, + const size_t m, const size_t n, + const T alpha, + const Buffer &x_buffer, const size_t x_offset, const size_t x_inc, + const Buffer &y_buffer, const size_t y_offset, const size_t y_inc, + const Buffer &a_buffer, const size_t a_offset, const size_t a_ld) { + + // Regular Ger operation on complex data + return DoGer(layout, m, n, alpha, + x_buffer, x_offset, x_inc, + y_buffer, y_offset, y_inc, + a_buffer, a_offset, a_ld); +} + +// ================================================================================================= + +// Compiles the templated class +template class Xgeru; +template class Xgeru; + +// ================================================================================================= +} // namespace clblast diff --git a/test/routines/level2/xgerc.h b/test/routines/level2/xgerc.h new file mode 100644 index 00000000..77258d92 --- /dev/null +++ b/test/routines/level2/xgerc.h @@ -0,0 +1,131 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren +// +// This file implements a class with static methods to describe the Xgerc routine. Examples of +// such 'descriptions' are how to calculate the size a of buffer or how to run the routine. These +// static methods are used by the correctness tester and the performance tester. +// +// ================================================================================================= + +#ifndef CLBLAST_TEST_ROUTINES_XGERC_H_ +#define CLBLAST_TEST_ROUTINES_XGERC_H_ + +#include +#include + +#include "wrapper_clblas.h" + +namespace clblast { +// ================================================================================================= + +// See comment at top of file for a description of the class +template +class TestXgerc { + public: + + // The BLAS level: 1, 2, or 3 + static size_t BLASLevel() { return 2; } + + // The list of arguments relevant for this routine + static std::vector GetOptions() { + return {kArgM, kArgN, + kArgLayout, + kArgALeadDim, kArgXInc, kArgYInc, + kArgAOffset, kArgXOffset, kArgYOffset, + kArgAlpha}; + } + + // Describes how to obtain the sizes of the buffers + static size_t GetSizeX(const Arguments &args) { + return args.m * args.x_inc + args.x_offset; + } + static size_t GetSizeY(const Arguments &args) { + return args.n * args.y_inc + args.y_offset; + } + static size_t GetSizeA(const Arguments &args) { + auto a_rotated = (args.layout == Layout::kRowMajor); + auto a_two = (a_rotated) ? args.m : args.n; + return a_two * args.a_ld + args.a_offset; + } + + // Describes how to set the sizes of all the buffers + static void SetSizes(Arguments &args) { + args.a_size = GetSizeA(args); + args.x_size = GetSizeX(args); + args.y_size = GetSizeY(args); + } + + // Describes what the default values of the leading dimensions of the matrices are + static size_t DefaultLDA(const Arguments &args) { return args.n; } + static size_t DefaultLDB(const Arguments &) { return 1; } // N/A for this routine + static size_t DefaultLDC(const Arguments &) { return 1; } // N/A for this routine + + // Describes which transpose options are relevant for this routine + using Transposes = std::vector; + static Transposes GetATransposes(const Transposes &) { return {}; } // N/A for this routine + static Transposes GetBTransposes(const Transposes &) { return {}; } // N/A for this routine + + // Describes how to run the CLBlast routine + static StatusCode RunRoutine(const Arguments &args, const Buffers &buffers, Queue &queue) { + auto queue_plain = queue(); + auto event = cl_event{}; + auto status = Gerc(args.layout, + args.m, args.n, args.alpha, + buffers.x_vec(), args.x_offset, args.x_inc, + buffers.y_vec(), args.y_offset, args.y_inc, + buffers.a_mat(), args.a_offset, args.a_ld, + &queue_plain, &event); + clWaitForEvents(1, &event); + return status; + } + + // Describes how to run the clBLAS routine (for correctness/performance comparison) + static StatusCode RunReference(const Arguments &args, const Buffers &buffers, Queue &queue) { + auto queue_plain = queue(); + auto event = cl_event{}; + auto status = clblasXgerc(static_cast(args.layout), + args.m, args.n, args.alpha, + buffers.x_vec(), args.x_offset, args.x_inc, + buffers.y_vec(), args.y_offset, args.y_inc, + buffers.a_mat(), args.a_offset, args.a_ld, + 1, &queue_plain, 0, nullptr, &event); + clWaitForEvents(1, &event); + return static_cast(status); + } + + // Describes how to download the results of the computation (more importantly: which buffer) + static std::vector DownloadResult(const Arguments &args, Buffers &buffers, Queue &queue) { + std::vector result(args.a_size, static_cast(0)); + buffers.a_mat.Read(queue, args.a_size, result); + return result; + } + + // Describes how to compute the indices of the result buffer + static size_t ResultID1(const Arguments &args) { return args.m; } + static size_t ResultID2(const Arguments &args) { return args.n; } + static size_t GetResultIndex(const Arguments &args, const size_t id1, const size_t id2) { + return (args.layout == Layout::kRowMajor) ? + id1*args.a_ld + id2 + args.a_offset: + id2*args.a_ld + id1 + args.a_offset; + } + + // Describes how to compute performance metrics + static size_t GetFlops(const Arguments &args) { + return 2 * args.m * args.n; + } + static size_t GetBytes(const Arguments &args) { + return (2*args.m*args.n + args.m + args.n) * sizeof(T); + } +}; + +// ================================================================================================= +} // namespace clblast + +// CLBLAST_TEST_ROUTINES_XGERC_H_ +#endif diff --git a/test/routines/level2/xgeru.h b/test/routines/level2/xgeru.h new file mode 100644 index 00000000..e5f5f235 --- /dev/null +++ b/test/routines/level2/xgeru.h @@ -0,0 +1,131 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren +// +// This file implements a class with static methods to describe the Xgeru routine. Examples of +// such 'descriptions' are how to calculate the size a of buffer or how to run the routine. These +// static methods are used by the correctness tester and the performance tester. +// +// ================================================================================================= + +#ifndef CLBLAST_TEST_ROUTINES_XGERU_H_ +#define CLBLAST_TEST_ROUTINES_XGERU_H_ + +#include +#include + +#include "wrapper_clblas.h" + +namespace clblast { +// ================================================================================================= + +// See comment at top of file for a description of the class +template +class TestXgeru { + public: + + // The BLAS level: 1, 2, or 3 + static size_t BLASLevel() { return 2; } + + // The list of arguments relevant for this routine + static std::vector GetOptions() { + return {kArgM, kArgN, + kArgLayout, + kArgALeadDim, kArgXInc, kArgYInc, + kArgAOffset, kArgXOffset, kArgYOffset, + kArgAlpha}; + } + + // Describes how to obtain the sizes of the buffers + static size_t GetSizeX(const Arguments &args) { + return args.m * args.x_inc + args.x_offset; + } + static size_t GetSizeY(const Arguments &args) { + return args.n * args.y_inc + args.y_offset; + } + static size_t GetSizeA(const Arguments &args) { + auto a_rotated = (args.layout == Layout::kRowMajor); + auto a_two = (a_rotated) ? args.m : args.n; + return a_two * args.a_ld + args.a_offset; + } + + // Describes how to set the sizes of all the buffers + static void SetSizes(Arguments &args) { + args.a_size = GetSizeA(args); + args.x_size = GetSizeX(args); + args.y_size = GetSizeY(args); + } + + // Describes what the default values of the leading dimensions of the matrices are + static size_t DefaultLDA(const Arguments &args) { return args.n; } + static size_t DefaultLDB(const Arguments &) { return 1; } // N/A for this routine + static size_t DefaultLDC(const Arguments &) { return 1; } // N/A for this routine + + // Describes which transpose options are relevant for this routine + using Transposes = std::vector; + static Transposes GetATransposes(const Transposes &) { return {}; } // N/A for this routine + static Transposes GetBTransposes(const Transposes &) { return {}; } // N/A for this routine + + // Describes how to run the CLBlast routine + static StatusCode RunRoutine(const Arguments &args, const Buffers &buffers, Queue &queue) { + auto queue_plain = queue(); + auto event = cl_event{}; + auto status = Geru(args.layout, + args.m, args.n, args.alpha, + buffers.x_vec(), args.x_offset, args.x_inc, + buffers.y_vec(), args.y_offset, args.y_inc, + buffers.a_mat(), args.a_offset, args.a_ld, + &queue_plain, &event); + clWaitForEvents(1, &event); + return status; + } + + // Describes how to run the clBLAS routine (for correctness/performance comparison) + static StatusCode RunReference(const Arguments &args, const Buffers &buffers, Queue &queue) { + auto queue_plain = queue(); + auto event = cl_event{}; + auto status = clblasXgeru(static_cast(args.layout), + args.m, args.n, args.alpha, + buffers.x_vec(), args.x_offset, args.x_inc, + buffers.y_vec(), args.y_offset, args.y_inc, + buffers.a_mat(), args.a_offset, args.a_ld, + 1, &queue_plain, 0, nullptr, &event); + clWaitForEvents(1, &event); + return static_cast(status); + } + + // Describes how to download the results of the computation (more importantly: which buffer) + static std::vector DownloadResult(const Arguments &args, Buffers &buffers, Queue &queue) { + std::vector result(args.a_size, static_cast(0)); + buffers.a_mat.Read(queue, args.a_size, result); + return result; + } + + // Describes how to compute the indices of the result buffer + static size_t ResultID1(const Arguments &args) { return args.m; } + static size_t ResultID2(const Arguments &args) { return args.n; } + static size_t GetResultIndex(const Arguments &args, const size_t id1, const size_t id2) { + return (args.layout == Layout::kRowMajor) ? + id1*args.a_ld + id2 + args.a_offset: + id2*args.a_ld + id1 + args.a_offset; + } + + // Describes how to compute performance metrics + static size_t GetFlops(const Arguments &args) { + return 2 * args.m * args.n; + } + static size_t GetBytes(const Arguments &args) { + return (2*args.m*args.n + args.m + args.n) * sizeof(T); + } +}; + +// ================================================================================================= +} // namespace clblast + +// CLBLAST_TEST_ROUTINES_XGERU_H_ +#endif From 9f682aa66bf38828e915431fe61603f535c61363 Mon Sep 17 00:00:00 2001 From: Cedric Nugteren Date: Sat, 20 Feb 2016 14:41:53 +0100 Subject: [PATCH 32/50] Set a proper default precision for the CLBlast clients --- include/internal/utilities.h | 3 ++- scripts/generator/generator.py | 24 +++++++++++++--------- src/utilities.cc | 4 ++-- test/performance/routines/level1/xaxpy.cc | 2 +- test/performance/routines/level1/xcopy.cc | 2 +- test/performance/routines/level1/xdot.cc | 2 +- test/performance/routines/level1/xdotc.cc | 2 +- test/performance/routines/level1/xdotu.cc | 2 +- test/performance/routines/level1/xscal.cc | 2 +- test/performance/routines/level1/xswap.cc | 2 +- test/performance/routines/level2/xgbmv.cc | 2 +- test/performance/routines/level2/xgemv.cc | 2 +- test/performance/routines/level2/xger.cc | 2 +- test/performance/routines/level2/xgerc.cc | 2 +- test/performance/routines/level2/xgeru.cc | 2 +- test/performance/routines/level2/xhbmv.cc | 2 +- test/performance/routines/level2/xhemv.cc | 2 +- test/performance/routines/level2/xher.cc | 2 +- test/performance/routines/level2/xher2.cc | 2 +- test/performance/routines/level2/xhpmv.cc | 2 +- test/performance/routines/level2/xhpr.cc | 2 +- test/performance/routines/level2/xhpr2.cc | 2 +- test/performance/routines/level2/xsbmv.cc | 2 +- test/performance/routines/level2/xspmv.cc | 2 +- test/performance/routines/level2/xspr.cc | 2 +- test/performance/routines/level2/xspr2.cc | 2 +- test/performance/routines/level2/xsymv.cc | 2 +- test/performance/routines/level2/xsyr.cc | 2 +- test/performance/routines/level2/xsyr2.cc | 2 +- test/performance/routines/level2/xtbmv.cc | 2 +- test/performance/routines/level2/xtbsv.cc | 2 +- test/performance/routines/level2/xtpmv.cc | 2 +- test/performance/routines/level2/xtpsv.cc | 2 +- test/performance/routines/level2/xtrmv.cc | 2 +- test/performance/routines/level2/xtrsv.cc | 2 +- test/performance/routines/level3/xgemm.cc | 2 +- test/performance/routines/level3/xhemm.cc | 2 +- test/performance/routines/level3/xher2k.cc | 2 +- test/performance/routines/level3/xherk.cc | 2 +- test/performance/routines/level3/xsymm.cc | 2 +- test/performance/routines/level3/xsyr2k.cc | 2 +- test/performance/routines/level3/xsyrk.cc | 2 +- test/performance/routines/level3/xtrmm.cc | 2 +- test/performance/routines/level3/xtrsm.cc | 2 +- 44 files changed, 59 insertions(+), 54 deletions(-) diff --git a/include/internal/utilities.h b/include/internal/utilities.h index ed17271f..b6307a85 100644 --- a/include/internal/utilities.h +++ b/include/internal/utilities.h @@ -171,7 +171,8 @@ T GetArgument(const int argc, char *argv[], std::string &help, const std::string &option, const T default_value); // Returns the precision only -Precision GetPrecision(const int argc, char *argv[]); +Precision GetPrecision(const int argc, char *argv[], + const Precision default_precision = Precision::kSingle); // As in "GetArgument", but now only checks whether an argument is given or not bool CheckArgument(const int argc, char *argv[], std::string &help, const std::string &option); diff --git a/scripts/generator/generator.py b/scripts/generator/generator.py index 93ff8680..382d728a 100644 --- a/scripts/generator/generator.py +++ b/scripts/generator/generator.py @@ -103,7 +103,17 @@ routines = [ ]] # ================================================================================================== +# Translates an option name to a CLBlast data-type +def PrecisionToFullName(x): + return { + 'H': "Half", + 'S': "Single", + 'D': "Double", + 'C': "ComplexSingle", + 'Z': "ComplexDouble", + }[x] +# ================================================================================================== # Separators for the BLAS levels separators = [""" // ================================================================================================= @@ -237,7 +247,7 @@ files = [ path_clblast+"/src/clblast_c.cc", path_clblast+"/test/wrapper_clblas.h", ] -header_lines = [84, 52, 80, 24, 22] +header_lines = [84, 55, 80, 24, 22] footer_lines = [6, 3, 5, 2, 6] # Checks whether the command-line arguments are valid; exists otherwise @@ -315,16 +325,10 @@ for level in [1,2,3]: body += "using double2 = clblast::double2;\n\n" body += "// Main function (not within the clblast namespace)\n" body += "int main(int argc, char *argv[]) {\n" - body += " switch(clblast::GetPrecision(argc, argv)) {\n" + default = PrecisionToFullName(routine.flavours[0].name) + body += " switch(clblast::GetPrecision(argc, argv, clblast::Precision::k"+default+")) {\n" for precision in ["H","S","D","C","Z"]: - enum = { - 'H': "Half", - 'S': "Single", - 'D': "Double", - 'C': "ComplexSingle", - 'Z': "ComplexDouble", - }[precision] - body += " case clblast::Precision::k"+enum+":" + body += " case clblast::Precision::k"+PrecisionToFullName(precision)+":" found = False for flavour in routine.flavours: if flavour.name == precision: diff --git a/src/utilities.cc b/src/utilities.cc index 24efb14c..68a4f02a 100644 --- a/src/utilities.cc +++ b/src/utilities.cc @@ -161,9 +161,9 @@ template Precision GetArgument(const int, char **, std::string&, cons // ================================================================================================= // Returns only the precision argument -Precision GetPrecision(const int argc, char *argv[]) { +Precision GetPrecision(const int argc, char *argv[], const Precision default_precision) { auto dummy = std::string{}; - return GetArgument(argc, argv, dummy, kArgPrecision, Precision::kSingle); + return GetArgument(argc, argv, dummy, kArgPrecision, default_precision); } // ================================================================================================= diff --git a/test/performance/routines/level1/xaxpy.cc b/test/performance/routines/level1/xaxpy.cc index 7ab15f28..b423bc3a 100644 --- a/test/performance/routines/level1/xaxpy.cc +++ b/test/performance/routines/level1/xaxpy.cc @@ -18,7 +18,7 @@ using double2 = clblast::double2; // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { - switch(clblast::GetPrecision(argc, argv)) { + switch(clblast::GetPrecision(argc, argv, clblast::Precision::kSingle)) { case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode"); case clblast::Precision::kSingle: clblast::RunClient, float, float>(argc, argv); break; diff --git a/test/performance/routines/level1/xcopy.cc b/test/performance/routines/level1/xcopy.cc index 6277e8fb..c04c6c1c 100644 --- a/test/performance/routines/level1/xcopy.cc +++ b/test/performance/routines/level1/xcopy.cc @@ -18,7 +18,7 @@ using double2 = clblast::double2; // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { - switch(clblast::GetPrecision(argc, argv)) { + switch(clblast::GetPrecision(argc, argv, clblast::Precision::kSingle)) { case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode"); case clblast::Precision::kSingle: clblast::RunClient, float, float>(argc, argv); break; diff --git a/test/performance/routines/level1/xdot.cc b/test/performance/routines/level1/xdot.cc index 5aa76762..f4616464 100644 --- a/test/performance/routines/level1/xdot.cc +++ b/test/performance/routines/level1/xdot.cc @@ -18,7 +18,7 @@ using double2 = clblast::double2; // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { - switch(clblast::GetPrecision(argc, argv)) { + switch(clblast::GetPrecision(argc, argv, clblast::Precision::kSingle)) { case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode"); case clblast::Precision::kSingle: clblast::RunClient, float, float>(argc, argv); break; diff --git a/test/performance/routines/level1/xdotc.cc b/test/performance/routines/level1/xdotc.cc index 81511085..5f36b80e 100644 --- a/test/performance/routines/level1/xdotc.cc +++ b/test/performance/routines/level1/xdotc.cc @@ -18,7 +18,7 @@ using double2 = clblast::double2; // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { - switch(clblast::GetPrecision(argc, argv)) { + switch(clblast::GetPrecision(argc, argv, clblast::Precision::kComplexSingle)) { case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode"); case clblast::Precision::kSingle: throw std::runtime_error("Unsupported precision mode"); case clblast::Precision::kDouble: throw std::runtime_error("Unsupported precision mode"); diff --git a/test/performance/routines/level1/xdotu.cc b/test/performance/routines/level1/xdotu.cc index 888eede3..f19f751b 100644 --- a/test/performance/routines/level1/xdotu.cc +++ b/test/performance/routines/level1/xdotu.cc @@ -18,7 +18,7 @@ using double2 = clblast::double2; // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { - switch(clblast::GetPrecision(argc, argv)) { + switch(clblast::GetPrecision(argc, argv, clblast::Precision::kComplexSingle)) { case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode"); case clblast::Precision::kSingle: throw std::runtime_error("Unsupported precision mode"); case clblast::Precision::kDouble: throw std::runtime_error("Unsupported precision mode"); diff --git a/test/performance/routines/level1/xscal.cc b/test/performance/routines/level1/xscal.cc index be49c066..bd38f43e 100644 --- a/test/performance/routines/level1/xscal.cc +++ b/test/performance/routines/level1/xscal.cc @@ -18,7 +18,7 @@ using double2 = clblast::double2; // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { - switch(clblast::GetPrecision(argc, argv)) { + switch(clblast::GetPrecision(argc, argv, clblast::Precision::kSingle)) { case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode"); case clblast::Precision::kSingle: clblast::RunClient, float, float>(argc, argv); break; diff --git a/test/performance/routines/level1/xswap.cc b/test/performance/routines/level1/xswap.cc index 52fdc580..112641d3 100644 --- a/test/performance/routines/level1/xswap.cc +++ b/test/performance/routines/level1/xswap.cc @@ -18,7 +18,7 @@ using double2 = clblast::double2; // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { - switch(clblast::GetPrecision(argc, argv)) { + switch(clblast::GetPrecision(argc, argv, clblast::Precision::kSingle)) { case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode"); case clblast::Precision::kSingle: clblast::RunClient, float, float>(argc, argv); break; diff --git a/test/performance/routines/level2/xgbmv.cc b/test/performance/routines/level2/xgbmv.cc index 629e2182..b050184d 100644 --- a/test/performance/routines/level2/xgbmv.cc +++ b/test/performance/routines/level2/xgbmv.cc @@ -18,7 +18,7 @@ using double2 = clblast::double2; // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { - switch(clblast::GetPrecision(argc, argv)) { + switch(clblast::GetPrecision(argc, argv, clblast::Precision::kSingle)) { case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode"); case clblast::Precision::kSingle: clblast::RunClient, float, float>(argc, argv); break; diff --git a/test/performance/routines/level2/xgemv.cc b/test/performance/routines/level2/xgemv.cc index 2a1983de..51ab9a10 100644 --- a/test/performance/routines/level2/xgemv.cc +++ b/test/performance/routines/level2/xgemv.cc @@ -18,7 +18,7 @@ using double2 = clblast::double2; // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { - switch(clblast::GetPrecision(argc, argv)) { + switch(clblast::GetPrecision(argc, argv, clblast::Precision::kSingle)) { case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode"); case clblast::Precision::kSingle: clblast::RunClient, float, float>(argc, argv); break; diff --git a/test/performance/routines/level2/xger.cc b/test/performance/routines/level2/xger.cc index 5fb0d91d..2d956346 100644 --- a/test/performance/routines/level2/xger.cc +++ b/test/performance/routines/level2/xger.cc @@ -18,7 +18,7 @@ using double2 = clblast::double2; // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { - switch(clblast::GetPrecision(argc, argv)) { + switch(clblast::GetPrecision(argc, argv, clblast::Precision::kSingle)) { case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode"); case clblast::Precision::kSingle: clblast::RunClient, float, float>(argc, argv); break; diff --git a/test/performance/routines/level2/xgerc.cc b/test/performance/routines/level2/xgerc.cc index fd511e42..acd0fab7 100644 --- a/test/performance/routines/level2/xgerc.cc +++ b/test/performance/routines/level2/xgerc.cc @@ -18,7 +18,7 @@ using double2 = clblast::double2; // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { - switch(clblast::GetPrecision(argc, argv)) { + switch(clblast::GetPrecision(argc, argv, clblast::Precision::kComplexSingle)) { case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode"); case clblast::Precision::kSingle: throw std::runtime_error("Unsupported precision mode"); case clblast::Precision::kDouble: throw std::runtime_error("Unsupported precision mode"); diff --git a/test/performance/routines/level2/xgeru.cc b/test/performance/routines/level2/xgeru.cc index 689ab2b1..a5973777 100644 --- a/test/performance/routines/level2/xgeru.cc +++ b/test/performance/routines/level2/xgeru.cc @@ -18,7 +18,7 @@ using double2 = clblast::double2; // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { - switch(clblast::GetPrecision(argc, argv)) { + switch(clblast::GetPrecision(argc, argv, clblast::Precision::kComplexSingle)) { case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode"); case clblast::Precision::kSingle: throw std::runtime_error("Unsupported precision mode"); case clblast::Precision::kDouble: throw std::runtime_error("Unsupported precision mode"); diff --git a/test/performance/routines/level2/xhbmv.cc b/test/performance/routines/level2/xhbmv.cc index dabe6ec8..28b71045 100644 --- a/test/performance/routines/level2/xhbmv.cc +++ b/test/performance/routines/level2/xhbmv.cc @@ -18,7 +18,7 @@ using double2 = clblast::double2; // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { - switch(clblast::GetPrecision(argc, argv)) { + switch(clblast::GetPrecision(argc, argv, clblast::Precision::kComplexSingle)) { case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode"); case clblast::Precision::kSingle: throw std::runtime_error("Unsupported precision mode"); case clblast::Precision::kDouble: throw std::runtime_error("Unsupported precision mode"); diff --git a/test/performance/routines/level2/xhemv.cc b/test/performance/routines/level2/xhemv.cc index 77447d76..622854a7 100644 --- a/test/performance/routines/level2/xhemv.cc +++ b/test/performance/routines/level2/xhemv.cc @@ -18,7 +18,7 @@ using double2 = clblast::double2; // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { - switch(clblast::GetPrecision(argc, argv)) { + switch(clblast::GetPrecision(argc, argv, clblast::Precision::kComplexSingle)) { case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode"); case clblast::Precision::kSingle: throw std::runtime_error("Unsupported precision mode"); case clblast::Precision::kDouble: throw std::runtime_error("Unsupported precision mode"); diff --git a/test/performance/routines/level2/xher.cc b/test/performance/routines/level2/xher.cc index 4ef87e45..613d7766 100644 --- a/test/performance/routines/level2/xher.cc +++ b/test/performance/routines/level2/xher.cc @@ -18,7 +18,7 @@ using double2 = clblast::double2; // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { - switch(clblast::GetPrecision(argc, argv)) { + switch(clblast::GetPrecision(argc, argv, clblast::Precision::kComplexSingle)) { case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode"); case clblast::Precision::kSingle: throw std::runtime_error("Unsupported precision mode"); case clblast::Precision::kDouble: throw std::runtime_error("Unsupported precision mode"); diff --git a/test/performance/routines/level2/xher2.cc b/test/performance/routines/level2/xher2.cc index 2d7e17ab..c335d3be 100644 --- a/test/performance/routines/level2/xher2.cc +++ b/test/performance/routines/level2/xher2.cc @@ -18,7 +18,7 @@ using double2 = clblast::double2; // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { - switch(clblast::GetPrecision(argc, argv)) { + switch(clblast::GetPrecision(argc, argv, clblast::Precision::kComplexSingle)) { case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode"); case clblast::Precision::kSingle: throw std::runtime_error("Unsupported precision mode"); case clblast::Precision::kDouble: throw std::runtime_error("Unsupported precision mode"); diff --git a/test/performance/routines/level2/xhpmv.cc b/test/performance/routines/level2/xhpmv.cc index b9dd3f82..1e726569 100644 --- a/test/performance/routines/level2/xhpmv.cc +++ b/test/performance/routines/level2/xhpmv.cc @@ -18,7 +18,7 @@ using double2 = clblast::double2; // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { - switch(clblast::GetPrecision(argc, argv)) { + switch(clblast::GetPrecision(argc, argv, clblast::Precision::kComplexSingle)) { case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode"); case clblast::Precision::kSingle: throw std::runtime_error("Unsupported precision mode"); case clblast::Precision::kDouble: throw std::runtime_error("Unsupported precision mode"); diff --git a/test/performance/routines/level2/xhpr.cc b/test/performance/routines/level2/xhpr.cc index f596682c..000b69af 100644 --- a/test/performance/routines/level2/xhpr.cc +++ b/test/performance/routines/level2/xhpr.cc @@ -18,7 +18,7 @@ using double2 = clblast::double2; // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { - switch(clblast::GetPrecision(argc, argv)) { + switch(clblast::GetPrecision(argc, argv, clblast::Precision::kComplexSingle)) { case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode"); case clblast::Precision::kSingle: throw std::runtime_error("Unsupported precision mode"); case clblast::Precision::kDouble: throw std::runtime_error("Unsupported precision mode"); diff --git a/test/performance/routines/level2/xhpr2.cc b/test/performance/routines/level2/xhpr2.cc index 1c493226..19bafc46 100644 --- a/test/performance/routines/level2/xhpr2.cc +++ b/test/performance/routines/level2/xhpr2.cc @@ -18,7 +18,7 @@ using double2 = clblast::double2; // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { - switch(clblast::GetPrecision(argc, argv)) { + switch(clblast::GetPrecision(argc, argv, clblast::Precision::kComplexSingle)) { case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode"); case clblast::Precision::kSingle: throw std::runtime_error("Unsupported precision mode"); case clblast::Precision::kDouble: throw std::runtime_error("Unsupported precision mode"); diff --git a/test/performance/routines/level2/xsbmv.cc b/test/performance/routines/level2/xsbmv.cc index febc6bfd..eabab3b7 100644 --- a/test/performance/routines/level2/xsbmv.cc +++ b/test/performance/routines/level2/xsbmv.cc @@ -18,7 +18,7 @@ using double2 = clblast::double2; // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { - switch(clblast::GetPrecision(argc, argv)) { + switch(clblast::GetPrecision(argc, argv, clblast::Precision::kSingle)) { case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode"); case clblast::Precision::kSingle: clblast::RunClient, float, float>(argc, argv); break; diff --git a/test/performance/routines/level2/xspmv.cc b/test/performance/routines/level2/xspmv.cc index 97c6b032..2a9ef925 100644 --- a/test/performance/routines/level2/xspmv.cc +++ b/test/performance/routines/level2/xspmv.cc @@ -18,7 +18,7 @@ using double2 = clblast::double2; // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { - switch(clblast::GetPrecision(argc, argv)) { + switch(clblast::GetPrecision(argc, argv, clblast::Precision::kSingle)) { case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode"); case clblast::Precision::kSingle: clblast::RunClient, float, float>(argc, argv); break; diff --git a/test/performance/routines/level2/xspr.cc b/test/performance/routines/level2/xspr.cc index cc18d9b6..84331d74 100644 --- a/test/performance/routines/level2/xspr.cc +++ b/test/performance/routines/level2/xspr.cc @@ -18,7 +18,7 @@ using double2 = clblast::double2; // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { - switch(clblast::GetPrecision(argc, argv)) { + switch(clblast::GetPrecision(argc, argv, clblast::Precision::kSingle)) { case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode"); case clblast::Precision::kSingle: clblast::RunClient, float, float>(argc, argv); break; diff --git a/test/performance/routines/level2/xspr2.cc b/test/performance/routines/level2/xspr2.cc index 768452be..c42009a1 100644 --- a/test/performance/routines/level2/xspr2.cc +++ b/test/performance/routines/level2/xspr2.cc @@ -18,7 +18,7 @@ using double2 = clblast::double2; // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { - switch(clblast::GetPrecision(argc, argv)) { + switch(clblast::GetPrecision(argc, argv, clblast::Precision::kSingle)) { case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode"); case clblast::Precision::kSingle: clblast::RunClient, float, float>(argc, argv); break; diff --git a/test/performance/routines/level2/xsymv.cc b/test/performance/routines/level2/xsymv.cc index 6748026f..3f72fe77 100644 --- a/test/performance/routines/level2/xsymv.cc +++ b/test/performance/routines/level2/xsymv.cc @@ -18,7 +18,7 @@ using double2 = clblast::double2; // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { - switch(clblast::GetPrecision(argc, argv)) { + switch(clblast::GetPrecision(argc, argv, clblast::Precision::kSingle)) { case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode"); case clblast::Precision::kSingle: clblast::RunClient, float, float>(argc, argv); break; diff --git a/test/performance/routines/level2/xsyr.cc b/test/performance/routines/level2/xsyr.cc index 84510e5d..6b31d3a9 100644 --- a/test/performance/routines/level2/xsyr.cc +++ b/test/performance/routines/level2/xsyr.cc @@ -18,7 +18,7 @@ using double2 = clblast::double2; // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { - switch(clblast::GetPrecision(argc, argv)) { + switch(clblast::GetPrecision(argc, argv, clblast::Precision::kSingle)) { case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode"); case clblast::Precision::kSingle: clblast::RunClient, float, float>(argc, argv); break; diff --git a/test/performance/routines/level2/xsyr2.cc b/test/performance/routines/level2/xsyr2.cc index b8c177d8..0ad59d2d 100644 --- a/test/performance/routines/level2/xsyr2.cc +++ b/test/performance/routines/level2/xsyr2.cc @@ -18,7 +18,7 @@ using double2 = clblast::double2; // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { - switch(clblast::GetPrecision(argc, argv)) { + switch(clblast::GetPrecision(argc, argv, clblast::Precision::kSingle)) { case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode"); case clblast::Precision::kSingle: clblast::RunClient, float, float>(argc, argv); break; diff --git a/test/performance/routines/level2/xtbmv.cc b/test/performance/routines/level2/xtbmv.cc index 1663dca0..a3297f34 100644 --- a/test/performance/routines/level2/xtbmv.cc +++ b/test/performance/routines/level2/xtbmv.cc @@ -18,7 +18,7 @@ using double2 = clblast::double2; // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { - switch(clblast::GetPrecision(argc, argv)) { + switch(clblast::GetPrecision(argc, argv, clblast::Precision::kSingle)) { case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode"); case clblast::Precision::kSingle: clblast::RunClient, float, float>(argc, argv); break; diff --git a/test/performance/routines/level2/xtbsv.cc b/test/performance/routines/level2/xtbsv.cc index e0cb9f2e..4dcd9a06 100644 --- a/test/performance/routines/level2/xtbsv.cc +++ b/test/performance/routines/level2/xtbsv.cc @@ -18,7 +18,7 @@ using double2 = clblast::double2; // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { - switch(clblast::GetPrecision(argc, argv)) { + switch(clblast::GetPrecision(argc, argv, clblast::Precision::kSingle)) { case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode"); case clblast::Precision::kSingle: clblast::RunClient, float, float>(argc, argv); break; diff --git a/test/performance/routines/level2/xtpmv.cc b/test/performance/routines/level2/xtpmv.cc index 407fdc8c..72477f2d 100644 --- a/test/performance/routines/level2/xtpmv.cc +++ b/test/performance/routines/level2/xtpmv.cc @@ -18,7 +18,7 @@ using double2 = clblast::double2; // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { - switch(clblast::GetPrecision(argc, argv)) { + switch(clblast::GetPrecision(argc, argv, clblast::Precision::kSingle)) { case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode"); case clblast::Precision::kSingle: clblast::RunClient, float, float>(argc, argv); break; diff --git a/test/performance/routines/level2/xtpsv.cc b/test/performance/routines/level2/xtpsv.cc index e402dc60..a3e3f7f1 100644 --- a/test/performance/routines/level2/xtpsv.cc +++ b/test/performance/routines/level2/xtpsv.cc @@ -18,7 +18,7 @@ using double2 = clblast::double2; // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { - switch(clblast::GetPrecision(argc, argv)) { + switch(clblast::GetPrecision(argc, argv, clblast::Precision::kSingle)) { case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode"); case clblast::Precision::kSingle: clblast::RunClient, float, float>(argc, argv); break; diff --git a/test/performance/routines/level2/xtrmv.cc b/test/performance/routines/level2/xtrmv.cc index c5563240..894a7952 100644 --- a/test/performance/routines/level2/xtrmv.cc +++ b/test/performance/routines/level2/xtrmv.cc @@ -18,7 +18,7 @@ using double2 = clblast::double2; // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { - switch(clblast::GetPrecision(argc, argv)) { + switch(clblast::GetPrecision(argc, argv, clblast::Precision::kSingle)) { case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode"); case clblast::Precision::kSingle: clblast::RunClient, float, float>(argc, argv); break; diff --git a/test/performance/routines/level2/xtrsv.cc b/test/performance/routines/level2/xtrsv.cc index 136e2108..e8c65b0f 100644 --- a/test/performance/routines/level2/xtrsv.cc +++ b/test/performance/routines/level2/xtrsv.cc @@ -18,7 +18,7 @@ using double2 = clblast::double2; // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { - switch(clblast::GetPrecision(argc, argv)) { + switch(clblast::GetPrecision(argc, argv, clblast::Precision::kSingle)) { case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode"); case clblast::Precision::kSingle: clblast::RunClient, float, float>(argc, argv); break; diff --git a/test/performance/routines/level3/xgemm.cc b/test/performance/routines/level3/xgemm.cc index 2082ceac..91897ee1 100644 --- a/test/performance/routines/level3/xgemm.cc +++ b/test/performance/routines/level3/xgemm.cc @@ -18,7 +18,7 @@ using double2 = clblast::double2; // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { - switch(clblast::GetPrecision(argc, argv)) { + switch(clblast::GetPrecision(argc, argv, clblast::Precision::kSingle)) { case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode"); case clblast::Precision::kSingle: clblast::RunClient, float, float>(argc, argv); break; diff --git a/test/performance/routines/level3/xhemm.cc b/test/performance/routines/level3/xhemm.cc index cc68e937..87650b9e 100644 --- a/test/performance/routines/level3/xhemm.cc +++ b/test/performance/routines/level3/xhemm.cc @@ -18,7 +18,7 @@ using double2 = clblast::double2; // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { - switch(clblast::GetPrecision(argc, argv)) { + switch(clblast::GetPrecision(argc, argv, clblast::Precision::kComplexSingle)) { case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode"); case clblast::Precision::kSingle: throw std::runtime_error("Unsupported precision mode"); case clblast::Precision::kDouble: throw std::runtime_error("Unsupported precision mode"); diff --git a/test/performance/routines/level3/xher2k.cc b/test/performance/routines/level3/xher2k.cc index 70d76bed..06894816 100644 --- a/test/performance/routines/level3/xher2k.cc +++ b/test/performance/routines/level3/xher2k.cc @@ -18,7 +18,7 @@ using double2 = clblast::double2; // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { - switch(clblast::GetPrecision(argc, argv)) { + switch(clblast::GetPrecision(argc, argv, clblast::Precision::kComplexSingle)) { case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode"); case clblast::Precision::kSingle: throw std::runtime_error("Unsupported precision mode"); case clblast::Precision::kDouble: throw std::runtime_error("Unsupported precision mode"); diff --git a/test/performance/routines/level3/xherk.cc b/test/performance/routines/level3/xherk.cc index b3b5dddf..d6f38fb2 100644 --- a/test/performance/routines/level3/xherk.cc +++ b/test/performance/routines/level3/xherk.cc @@ -18,7 +18,7 @@ using double2 = clblast::double2; // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { - switch(clblast::GetPrecision(argc, argv)) { + switch(clblast::GetPrecision(argc, argv, clblast::Precision::kComplexSingle)) { case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode"); case clblast::Precision::kSingle: throw std::runtime_error("Unsupported precision mode"); case clblast::Precision::kDouble: throw std::runtime_error("Unsupported precision mode"); diff --git a/test/performance/routines/level3/xsymm.cc b/test/performance/routines/level3/xsymm.cc index f2292273..e0feadd1 100644 --- a/test/performance/routines/level3/xsymm.cc +++ b/test/performance/routines/level3/xsymm.cc @@ -18,7 +18,7 @@ using double2 = clblast::double2; // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { - switch(clblast::GetPrecision(argc, argv)) { + switch(clblast::GetPrecision(argc, argv, clblast::Precision::kSingle)) { case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode"); case clblast::Precision::kSingle: clblast::RunClient, float, float>(argc, argv); break; diff --git a/test/performance/routines/level3/xsyr2k.cc b/test/performance/routines/level3/xsyr2k.cc index 0c8f8f7c..4a82ddc4 100644 --- a/test/performance/routines/level3/xsyr2k.cc +++ b/test/performance/routines/level3/xsyr2k.cc @@ -18,7 +18,7 @@ using double2 = clblast::double2; // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { - switch(clblast::GetPrecision(argc, argv)) { + switch(clblast::GetPrecision(argc, argv, clblast::Precision::kSingle)) { case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode"); case clblast::Precision::kSingle: clblast::RunClient, float, float>(argc, argv); break; diff --git a/test/performance/routines/level3/xsyrk.cc b/test/performance/routines/level3/xsyrk.cc index ccd4511a..70f61322 100644 --- a/test/performance/routines/level3/xsyrk.cc +++ b/test/performance/routines/level3/xsyrk.cc @@ -18,7 +18,7 @@ using double2 = clblast::double2; // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { - switch(clblast::GetPrecision(argc, argv)) { + switch(clblast::GetPrecision(argc, argv, clblast::Precision::kSingle)) { case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode"); case clblast::Precision::kSingle: clblast::RunClient, float, float>(argc, argv); break; diff --git a/test/performance/routines/level3/xtrmm.cc b/test/performance/routines/level3/xtrmm.cc index 8278d077..6f6041e4 100644 --- a/test/performance/routines/level3/xtrmm.cc +++ b/test/performance/routines/level3/xtrmm.cc @@ -18,7 +18,7 @@ using double2 = clblast::double2; // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { - switch(clblast::GetPrecision(argc, argv)) { + switch(clblast::GetPrecision(argc, argv, clblast::Precision::kSingle)) { case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode"); case clblast::Precision::kSingle: clblast::RunClient, float, float>(argc, argv); break; diff --git a/test/performance/routines/level3/xtrsm.cc b/test/performance/routines/level3/xtrsm.cc index 45f71c5e..76ef255a 100644 --- a/test/performance/routines/level3/xtrsm.cc +++ b/test/performance/routines/level3/xtrsm.cc @@ -18,7 +18,7 @@ using double2 = clblast::double2; // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { - switch(clblast::GetPrecision(argc, argv)) { + switch(clblast::GetPrecision(argc, argv, clblast::Precision::kSingle)) { case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode"); case clblast::Precision::kSingle: clblast::RunClient, float, float>(argc, argv); break; From cef78c7356102d7e1f4341f4670536f56a1ec6b7 Mon Sep 17 00:00:00 2001 From: Cedric Nugteren Date: Sun, 28 Feb 2016 14:14:50 +0100 Subject: [PATCH 33/50] Fixed a compilation issue under AppleClang --- include/internal/database.h | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/include/internal/database.h b/include/internal/database.h index f26e354c..ca79fdad 100644 --- a/include/internal/database.h +++ b/include/internal/database.h @@ -57,16 +57,13 @@ class Database { // The OpenCL device vendors static constexpr auto kDeviceVendorAll = "default"; - static constexpr auto kDeviceVendorIntel = "Intel"; - static constexpr auto kDeviceVendorAMD = "AMD"; - static constexpr auto kDeviceVendorNVIDIA = "NVIDIA"; - // Alternative names for the above vendors + // Alternative names for some OpenCL vendors const std::unordered_map kVendorNames { - {"Intel(R) Corporation", kDeviceVendorIntel}, - {"GenuineIntel", kDeviceVendorIntel}, - {"Advanced Micro Devices, Inc.", kDeviceVendorAMD}, - {"NVIDIA Corporation", kDeviceVendorNVIDIA}, + {"Intel(R) Corporation", "Intel"}, + {"GenuineIntel", "Intel"}, + {"Advanced Micro Devices, Inc.", "AMD"}, + {"NVIDIA Corporation", "NVIDIA"}, }; // The database consists of separate database entries, stored together in a vector From e3545215a54c096e1c889124a9076cfb2f42df17 Mon Sep 17 00:00:00 2001 From: Cedric Nugteren Date: Sun, 28 Feb 2016 14:16:48 +0100 Subject: [PATCH 34/50] Added support for xHER, xHPR, xSYR, and xSPR routines --- CMakeLists.txt | 2 +- README.md | 8 +- include/internal/routines/level2/xher.h | 61 ++++++++++++ include/internal/routines/level2/xhpr.h | 45 +++++++++ include/internal/routines/level2/xspr.h | 45 +++++++++ include/internal/routines/level2/xsyr.h | 45 +++++++++ scripts/generator/generator.py | 10 +- src/clblast.cc | 96 +++++++++++++------ src/kernels/level2/level2.opencl | 94 ++++++++++++++++++ src/kernels/level2/xger.opencl | 75 +++------------ src/kernels/level2/xher.opencl | 73 ++++++++++++++ src/routines/level2/xger.cc | 1 + src/routines/level2/xher.cc | 119 +++++++++++++++++++++++ src/routines/level2/xhpr.cc | 50 ++++++++++ src/routines/level2/xspr.cc | 50 ++++++++++ src/routines/level2/xsyr.cc | 50 ++++++++++ src/tuning/xger.cc | 1 + test/routines/level2/xher.h | 122 ++++++++++++++++++++++++ test/routines/level2/xhpr.h | 122 ++++++++++++++++++++++++ test/routines/level2/xspr.h | 122 ++++++++++++++++++++++++ test/routines/level2/xsyr.h | 122 ++++++++++++++++++++++++ 21 files changed, 1213 insertions(+), 100 deletions(-) create mode 100644 include/internal/routines/level2/xher.h create mode 100644 include/internal/routines/level2/xhpr.h create mode 100644 include/internal/routines/level2/xspr.h create mode 100644 include/internal/routines/level2/xsyr.h create mode 100644 src/kernels/level2/level2.opencl create mode 100644 src/kernels/level2/xher.opencl create mode 100644 src/routines/level2/xher.cc create mode 100644 src/routines/level2/xhpr.cc create mode 100644 src/routines/level2/xspr.cc create mode 100644 src/routines/level2/xsyr.cc create mode 100644 test/routines/level2/xher.h create mode 100644 test/routines/level2/xhpr.h create mode 100644 test/routines/level2/xspr.h create mode 100644 test/routines/level2/xsyr.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 342b72e5..e35fda7a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -112,7 +112,7 @@ set(SAMPLE_PROGRAMS_CPP sgemm) set(SAMPLE_PROGRAMS_C sgemm) set(LEVEL1_ROUTINES xswap xscal xcopy xaxpy xdot xdotu xdotc) set(LEVEL2_ROUTINES xgemv xgbmv xhemv xhbmv xhpmv xsymv xsbmv xspmv xtrmv xtbmv xtpmv - xger xgeru xgerc) + xger xgeru xgerc xher xhpr xsyr xspr) set(LEVEL3_ROUTINES xgemm xsymm xhemm xsyrk xherk xsyr2k xher2k xtrmm) set(ROUTINES ${LEVEL1_ROUTINES} ${LEVEL2_ROUTINES} ${LEVEL3_ROUTINES}) set(PRECISIONS 32 64 3232 6464) diff --git a/README.md b/README.md index ae228836..1d6e0bad 100644 --- a/README.md +++ b/README.md @@ -194,12 +194,12 @@ CLBlast is in active development but already supports almost all the BLAS routin | xGER | ✔ | ✔ | - | - | | | xGERU | - | - | ✔ | ✔ | | | xGERC | - | - | ✔ | ✔ | | -| xHER | - | - | | | | -| xHPR | - | - | | | | +| xHER | - | - | ✔ | ✔ | | +| xHPR | - | - | ✔ | ✔ | | | xHER2 | - | - | | | | | xHPR2 | - | - | | | | -| xSYR | | | - | - | | -| xSPR | | | - | - | | +| xSYR | ✔ | ✔ | - | - | | +| xSPR | ✔ | ✔ | - | - | | | xSYR2 | | | - | - | | | xSPR2 | | | - | - | | diff --git a/include/internal/routines/level2/xher.h b/include/internal/routines/level2/xher.h new file mode 100644 index 00000000..6322265b --- /dev/null +++ b/include/internal/routines/level2/xher.h @@ -0,0 +1,61 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren +// +// This file implements the Xher routine. The precision is implemented using a template argument. +// +// ================================================================================================= + +#ifndef CLBLAST_ROUTINES_XHER_H_ +#define CLBLAST_ROUTINES_XHER_H_ + +#include "internal/routine.h" + +namespace clblast { +// ================================================================================================= + +// See comment at top of file for a description of the class +template +class Xher: public Routine { + public: + + // Members and methods from the base class + using Routine::db_; + using Routine::source_string_; + using Routine::queue_; + using Routine::GetProgramFromCache; + using Routine::TestVectorX; + using Routine::TestMatrixA; + using Routine::TestMatrixAP; + using Routine::RunKernel; + using Routine::ErrorIn; + + // Constructor + Xher(Queue &queue, Event &event, const std::string &name = "HER"); + + // Translates alpha of type 'U' into type 'T' + T GetAlpha(const U alpha); + + // Templated-precision implementation of the routine + StatusCode DoHer(const Layout layout, const Triangle triangle, + const size_t n, + const U alpha, + const Buffer &x_buffer, const size_t x_offset, const size_t x_inc, + const Buffer &a_buffer, const size_t a_offset, const size_t a_ld, + const bool packed = false); + + private: + // Static variable to get the precision + const static Precision precision_; +}; + +// ================================================================================================= +} // namespace clblast + +// CLBLAST_ROUTINES_XHER_H_ +#endif diff --git a/include/internal/routines/level2/xhpr.h b/include/internal/routines/level2/xhpr.h new file mode 100644 index 00000000..a0c3cb92 --- /dev/null +++ b/include/internal/routines/level2/xhpr.h @@ -0,0 +1,45 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren +// +// This file implements the Xhpr routine. The precision is implemented using a template argument. +// +// ================================================================================================= + +#ifndef CLBLAST_ROUTINES_XHPR_H_ +#define CLBLAST_ROUTINES_XHPR_H_ + +#include "internal/routines/level2/xher.h" + +namespace clblast { +// ================================================================================================= + +// See comment at top of file for a description of the class +template +class Xhpr: public Xher { + public: + + // Uses the regular Xher routine + using Xher::DoHer; + + // Constructor + Xhpr(Queue &queue, Event &event, const std::string &name = "HPR"); + + // Templated-precision implementation of the routine + StatusCode DoHpr(const Layout layout, const Triangle triangle, + const size_t n, + const U alpha, + const Buffer &x_buffer, const size_t x_offset, const size_t x_inc, + const Buffer &ap_buffer, const size_t ap_offset); +}; + +// ================================================================================================= +} // namespace clblast + +// CLBLAST_ROUTINES_XHPR_H_ +#endif diff --git a/include/internal/routines/level2/xspr.h b/include/internal/routines/level2/xspr.h new file mode 100644 index 00000000..5b01d2cb --- /dev/null +++ b/include/internal/routines/level2/xspr.h @@ -0,0 +1,45 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren +// +// This file implements the Xspr routine. The precision is implemented using a template argument. +// +// ================================================================================================= + +#ifndef CLBLAST_ROUTINES_XSPR_H_ +#define CLBLAST_ROUTINES_XSPR_H_ + +#include "internal/routines/level2/xher.h" + +namespace clblast { +// ================================================================================================= + +// See comment at top of file for a description of the class +template +class Xspr: public Xher { + public: + + // Uses the regular Xher routine + using Xher::DoHer; + + // Constructor + Xspr(Queue &queue, Event &event, const std::string &name = "SPR"); + + // Templated-precision implementation of the routine + StatusCode DoSpr(const Layout layout, const Triangle triangle, + const size_t n, + const T alpha, + const Buffer &x_buffer, const size_t x_offset, const size_t x_inc, + const Buffer &ap_buffer, const size_t ap_offset); +}; + +// ================================================================================================= +} // namespace clblast + +// CLBLAST_ROUTINES_XSPR_H_ +#endif diff --git a/include/internal/routines/level2/xsyr.h b/include/internal/routines/level2/xsyr.h new file mode 100644 index 00000000..9704a881 --- /dev/null +++ b/include/internal/routines/level2/xsyr.h @@ -0,0 +1,45 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren +// +// This file implements the Xsyr routine. The precision is implemented using a template argument. +// +// ================================================================================================= + +#ifndef CLBLAST_ROUTINES_XSYR_H_ +#define CLBLAST_ROUTINES_XSYR_H_ + +#include "internal/routines/level2/xher.h" + +namespace clblast { +// ================================================================================================= + +// See comment at top of file for a description of the class +template +class Xsyr: public Xher { + public: + + // Uses the regular Xher routine + using Xher::DoHer; + + // Constructor + Xsyr(Queue &queue, Event &event, const std::string &name = "SYR"); + + // Templated-precision implementation of the routine + StatusCode DoSyr(const Layout layout, const Triangle triangle, + const size_t n, + const T alpha, + const Buffer &x_buffer, const size_t x_offset, const size_t x_inc, + const Buffer &a_buffer, const size_t a_offset, const size_t a_ld); +}; + +// ================================================================================================= +} // namespace clblast + +// CLBLAST_ROUTINES_XSYR_H_ +#endif diff --git a/scripts/generator/generator.py b/scripts/generator/generator.py index 382d728a..0f5fbfa7 100644 --- a/scripts/generator/generator.py +++ b/scripts/generator/generator.py @@ -81,12 +81,12 @@ routines = [ Routine(True, "2b", "ger", T, [S,D], ["m","n"], ["layout"], ["x","y"], ["a"], ["alpha"], False, "General rank-1 matrix update"), Routine(True, "2b", "geru", T, [C,Z], ["m","n"], ["layout"], ["x","y"], ["a"], ["alpha"], False, "General rank-1 complex matrix update"), Routine(True, "2b", "gerc", T, [C,Z], ["m","n"], ["layout"], ["x","y"], ["a"], ["alpha"], False, "General rank-1 complex conjugated matrix update"), - Routine(False, "2b", "her", Tc, [Css,Zdd], ["n"], ["layout","triangle"], ["x"], ["a"], ["alpha"], False, "Hermitian rank-1 matrix update"), - Routine(False, "2b", "hpr", Tc, [Css,Zdd], ["n"], ["layout","triangle"], ["x"], ["ap"], ["alpha"], False, "Hermitian packed rank-1 matrix update"), + Routine(True, "2b", "her", Tc, [Css,Zdd], ["n"], ["layout","triangle"], ["x"], ["a"], ["alpha"], False, "Hermitian rank-1 matrix update"), + Routine(True, "2b", "hpr", Tc, [Css,Zdd], ["n"], ["layout","triangle"], ["x"], ["ap"], ["alpha"], False, "Hermitian packed rank-1 matrix update"), Routine(False, "2b", "her2", T, [C,Z], ["n"], ["layout","triangle"], ["x","y"], ["a"], ["alpha"], False, "Hermitian rank-2 matrix update"), Routine(False, "2b", "hpr2", T, [C,Z], ["n"], ["layout","triangle"], ["x","y"], ["ap"], ["alpha"], False, "Hermitian packed rank-2 matrix update"), - Routine(False, "2b", "syr", T, [S,D], ["n"], ["layout","triangle"], ["x"], ["a"], ["alpha"], False, "Symmetric rank-1 matrix update"), - Routine(False, "2b", "spr", T, [S,D], ["n"], ["layout","triangle"], ["x"], ["ap"], ["alpha"], False, "Symmetric packed rank-1 matrix update"), + Routine(True, "2b", "syr", T, [S,D], ["n"], ["layout","triangle"], ["x"], ["a"], ["alpha"], False, "Symmetric rank-1 matrix update"), + Routine(True, "2b", "spr", T, [S,D], ["n"], ["layout","triangle"], ["x"], ["ap"], ["alpha"], False, "Symmetric packed rank-1 matrix update"), Routine(False, "2b", "syr2", T, [S,D], ["n"], ["layout","triangle"], ["x","y"], ["a"], ["alpha"], False, "Symmetric rank-2 matrix update"), Routine(False, "2b", "spr2", T, [S,D], ["n"], ["layout","triangle"], ["x","y"], ["ap"], ["alpha"], False, "Symmetric packed rank-2 matrix update"), ], @@ -247,7 +247,7 @@ files = [ path_clblast+"/src/clblast_c.cc", path_clblast+"/test/wrapper_clblas.h", ] -header_lines = [84, 55, 80, 24, 22] +header_lines = [84, 59, 80, 24, 22] footer_lines = [6, 3, 5, 2, 6] # Checks whether the command-line arguments are valid; exists otherwise diff --git a/src/clblast.cc b/src/clblast.cc index 47f2c59d..466de83e 100644 --- a/src/clblast.cc +++ b/src/clblast.cc @@ -41,6 +41,10 @@ #include "internal/routines/level2/xger.h" #include "internal/routines/level2/xgeru.h" #include "internal/routines/level2/xgerc.h" +#include "internal/routines/level2/xher.h" +#include "internal/routines/level2/xhpr.h" +#include "internal/routines/level2/xsyr.h" +#include "internal/routines/level2/xspr.h" // BLAS level-3 includes #include "internal/routines/level3/xgemm.h" @@ -946,13 +950,22 @@ template StatusCode Gerc(const Layout, // Hermitian rank-1 matrix update: CHER/ZHER template -StatusCode Her(const Layout, const Triangle, - const size_t, - const T, - const cl_mem, const size_t, const size_t, - cl_mem, const size_t, const size_t, - cl_command_queue*, cl_event*) { - return StatusCode::kNotImplemented; +StatusCode Her(const Layout layout, const Triangle triangle, + const size_t n, + const T alpha, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + cl_command_queue* queue, cl_event* event) { + auto queue_cpp = Queue(*queue); + auto event_cpp = Event(*event); + auto routine = Xher,T>(queue_cpp, event_cpp); + auto status = routine.SetUp(); + if (status != StatusCode::kSuccess) { return status; } + return routine.DoHer(layout, triangle, + n, + alpha, + Buffer>(x_buffer), x_offset, x_inc, + Buffer>(a_buffer), a_offset, a_ld); } template StatusCode Her(const Layout, const Triangle, const size_t, @@ -969,13 +982,22 @@ template StatusCode Her(const Layout, const Triangle, // Hermitian packed rank-1 matrix update: CHPR/ZHPR template -StatusCode Hpr(const Layout, const Triangle, - const size_t, - const T, - const cl_mem, const size_t, const size_t, - cl_mem, const size_t, - cl_command_queue*, cl_event*) { - return StatusCode::kNotImplemented; +StatusCode Hpr(const Layout layout, const Triangle triangle, + const size_t n, + const T alpha, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_mem ap_buffer, const size_t ap_offset, + cl_command_queue* queue, cl_event* event) { + auto queue_cpp = Queue(*queue); + auto event_cpp = Event(*event); + auto routine = Xhpr,T>(queue_cpp, event_cpp); + auto status = routine.SetUp(); + if (status != StatusCode::kSuccess) { return status; } + return routine.DoHpr(layout, triangle, + n, + alpha, + Buffer>(x_buffer), x_offset, x_inc, + Buffer>(ap_buffer), ap_offset); } template StatusCode Hpr(const Layout, const Triangle, const size_t, @@ -1044,13 +1066,22 @@ template StatusCode Hpr2(const Layout, const Triangle, // Symmetric rank-1 matrix update: SSYR/DSYR template -StatusCode Syr(const Layout, const Triangle, - const size_t, - const T, - const cl_mem, const size_t, const size_t, - cl_mem, const size_t, const size_t, - cl_command_queue*, cl_event*) { - return StatusCode::kNotImplemented; +StatusCode Syr(const Layout layout, const Triangle triangle, + const size_t n, + const T alpha, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + cl_command_queue* queue, cl_event* event) { + auto queue_cpp = Queue(*queue); + auto event_cpp = Event(*event); + auto routine = Xsyr(queue_cpp, event_cpp); + auto status = routine.SetUp(); + if (status != StatusCode::kSuccess) { return status; } + return routine.DoSyr(layout, triangle, + n, + alpha, + Buffer(x_buffer), x_offset, x_inc, + Buffer(a_buffer), a_offset, a_ld); } template StatusCode Syr(const Layout, const Triangle, const size_t, @@ -1067,13 +1098,22 @@ template StatusCode Syr(const Layout, const Triangle, // Symmetric packed rank-1 matrix update: SSPR/DSPR template -StatusCode Spr(const Layout, const Triangle, - const size_t, - const T, - const cl_mem, const size_t, const size_t, - cl_mem, const size_t, - cl_command_queue*, cl_event*) { - return StatusCode::kNotImplemented; +StatusCode Spr(const Layout layout, const Triangle triangle, + const size_t n, + const T alpha, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_mem ap_buffer, const size_t ap_offset, + cl_command_queue* queue, cl_event* event) { + auto queue_cpp = Queue(*queue); + auto event_cpp = Event(*event); + auto routine = Xspr(queue_cpp, event_cpp); + auto status = routine.SetUp(); + if (status != StatusCode::kSuccess) { return status; } + return routine.DoSpr(layout, triangle, + n, + alpha, + Buffer(x_buffer), x_offset, x_inc, + Buffer(ap_buffer), ap_offset); } template StatusCode Spr(const Layout, const Triangle, const size_t, diff --git a/src/kernels/level2/level2.opencl b/src/kernels/level2/level2.opencl new file mode 100644 index 00000000..ea7d7afd --- /dev/null +++ b/src/kernels/level2/level2.opencl @@ -0,0 +1,94 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren +// +// This file contains common functions for matrix update kernels (Xger, Xher). +// +// ================================================================================================= + +// Enables loading of this file using the C++ pre-processor's #include (C++11 standard raw string +// literal). Comment-out this line for syntax-highlighting when developing. +R"( + +// ================================================================================================= + +// Parameters set by the tuner or by the database. Here they are given a basic default value in case +// this kernel file is used outside of the CLBlast library. + +#ifndef WGS1 + #define WGS1 8 // The local work-group size in first dimension +#endif +#ifndef WGS2 + #define WGS2 8 // The local work-group size in second dimension +#endif +#ifndef WPT + #define WPT 1 // The amount of work-per-thread in both dimensions +#endif + +// ================================================================================================= + +// Returns an element from a vector +inline real LoadVector(const int id, const int max, + __global real* restrict gm, const int offset, const int inc, + const int do_conjugate) { + if (id < max) { + real result = gm[id*inc + offset]; + if (do_conjugate) { + #if defined(ROUTINE_GERC) + COMPLEX_CONJUGATE(result); + #endif + #if defined(ROUTINE_HER) + COMPLEX_CONJUGATE(result); + #endif + } + return result; + } + else { + real default_result; + SetToZero(default_result); + return default_result; + } +} + +// Performs the rank-1 matrix update +inline void MatrixUpdate(const int id1, const int id2, const int max1, const int max2, + __global real* restrict agm, const int a_offset, const int a_ld, + const real alpha, const real xvalue, const real yvalue) { + + // Bounds of a regular matrix + if (id1 < max1 && id2 < max2) { + + #if defined(ROUTINE_SPR) || defined(ROUTINE_HPR) + const int a_index = (id1 <= id2) ? ((id2+1)*id2)/2 + id1 + a_offset : ((id1+1)*id1)/2 + id2 + a_offset; + #else + const int a_index = id2*a_ld + id1 + a_offset; + #endif + + // Loads the current value of the A matrix + const real avalue = agm[a_index]; + + // Computes result = alpha * x[i] * y[j] + a[i][j] + real result; + GER(result, alpha, xvalue, yvalue, avalue); + + // For hermetian matrices + #if defined(ROUTINE_HER) || defined(ROUTINE_HPR) + if (id1 == id2) { result.y = ZERO; } + #endif + + // Stores the final result + agm[a_index] = result; + } +} + +// ================================================================================================= + +// End of the C++11 raw string literal +)" + +// ================================================================================================= diff --git a/src/kernels/level2/xger.opencl b/src/kernels/level2/xger.opencl index c1cddea1..ce8e04bb 100644 --- a/src/kernels/level2/xger.opencl +++ b/src/kernels/level2/xger.opencl @@ -7,7 +7,7 @@ // Author(s): // Cedric Nugteren // -// This file contains the Xger kernel (generic version) for rank-1 matrix update. +// This file contains the Xger kernels for rank-1 matrix update. // // ================================================================================================= @@ -17,24 +17,9 @@ R"( // ================================================================================================= -// Parameters set by the tuner or by the database. Here they are given a basic default value in case -// this kernel file is used outside of the CLBlast library. - -#ifndef WGS1 - #define WGS1 8 // The local work-group size in first dimension -#endif -#ifndef WGS2 - #define WGS2 8 // The local work-group size in second dimension -#endif -#ifndef WPT - #define WPT 1 // The amount of work-per-thread in both dimensions -#endif - -// ================================================================================================= - -// Row-major version of the kernel +// Regular version of the rank-1 matrix update kernel (GER, GERU, GERC) __attribute__((reqd_work_group_size(WGS1, WGS2, 1))) -__kernel void Xger(const int max_one, const int max_two, const real alpha, +__kernel void Xger(const int max1, const int max2, const real alpha, const __global real* restrict xgm, const int x_offset, const int x_inc, const __global real* ygm, const int y_offset, const int y_inc, __global real* restrict agm, const int a_offset, const int a_ld, @@ -51,21 +36,14 @@ __kernel void Xger(const int max_one, const int max_two, const real alpha, #pragma unroll for (int w=0; w +// +// This file contains the Xher kernels for rank-1 matrix update. +// +// ================================================================================================= + +// Enables loading of this file using the C++ pre-processor's #include (C++11 standard raw string +// literal). Comment-out this line for syntax-highlighting when developing. +R"( + +// ================================================================================================= + +// Symmetric version of the rank-1 matrix update kernel (HER, HPR, SYR, SPR) +__attribute__((reqd_work_group_size(WGS1, WGS2, 1))) +__kernel void Xher(const int n, const real alpha, + const __global real* restrict xgm, const int x_offset, const int x_inc, + __global real* restrict agm, const int a_offset, const int a_ld, + const int is_upper, const int is_rowmajor) { + + // Register storage for X and XT + real xvalues[WPT]; + real xtvalues[WPT]; + + // Loads the X-vector + #pragma unroll + for (int w=0; w id2)) || (!is_upper && (id2 > id1))) { + // Do nothing + } + + // Loads A, performs the operation, and stores the result into A + else { + MatrixUpdate(id1, id2, n, n, agm, a_offset, a_ld, alpha, xvalues[w2], xtvalues[w1]); + } + } + } +} + +// ================================================================================================= + +// End of the C++11 raw string literal +)" + +// ================================================================================================= diff --git a/src/routines/level2/xger.cc b/src/routines/level2/xger.cc index 55fa26d4..0953c8bb 100644 --- a/src/routines/level2/xger.cc +++ b/src/routines/level2/xger.cc @@ -32,6 +32,7 @@ template Xger::Xger(Queue &queue, Event &event, const std::string &name): Routine(queue, event, name, {"Xger"}, precision_) { source_string_ = + #include "../../kernels/level2/level2.opencl" #include "../../kernels/level2/xger.opencl" ; } diff --git a/src/routines/level2/xher.cc b/src/routines/level2/xher.cc new file mode 100644 index 00000000..4227252e --- /dev/null +++ b/src/routines/level2/xher.cc @@ -0,0 +1,119 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren +// +// This file implements the Xher class (see the header for information about the class). +// +// ================================================================================================= + +#include "internal/routines/level2/xher.h" + +#include + +namespace clblast { +// ================================================================================================= + +// Specific implementations to get the memory-type based on a template argument +template <> const Precision Xher::precision_ = Precision::kSingle; +template <> const Precision Xher::precision_ = Precision::kDouble; +template <> const Precision Xher::precision_ = Precision::kComplexSingle; +template <> const Precision Xher::precision_ = Precision::kComplexDouble; + +// ================================================================================================= + +// Constructor: forwards to base class constructor +template +Xher::Xher(Queue &queue, Event &event, const std::string &name): + Routine(queue, event, name, {"Xger"}, precision_) { + source_string_ = + #include "../../kernels/level2/level2.opencl" + #include "../../kernels/level2/xher.opencl" + ; +} + +// ================================================================================================= + +// Specializations to compute alpha of type 'T' +template <> float2 Xher::GetAlpha(const float alpha) { return float2{alpha, 0.0f}; } +template <> double2 Xher::GetAlpha(const double alpha) { return double2{alpha, 0.0}; } +template <> float Xher::GetAlpha(const float alpha) { return alpha; } +template <> double Xher::GetAlpha(const double alpha) { return alpha; } + +// ================================================================================================= + +// The main routine +template +StatusCode Xher::DoHer(const Layout layout, const Triangle triangle, + const size_t n, + const U alpha, + const Buffer &x_buffer, const size_t x_offset, const size_t x_inc, + const Buffer &a_buffer, const size_t a_offset, const size_t a_ld, + const bool packed) { + + // Makes sure the dimensions are larger than zero + if (n == 0) { return StatusCode::kInvalidDimension; } + + // The data is either in the upper or lower triangle + const auto is_upper = ((triangle == Triangle::kUpper && layout != Layout::kRowMajor) || + (triangle == Triangle::kLower && layout == Layout::kRowMajor)); + const auto is_rowmajor = (layout == Layout::kRowMajor); + + // Creates a matching version of alpha + const auto matching_alpha = GetAlpha(alpha); + + // Tests the matrix and the vectors for validity + auto status = StatusCode::kSuccess; + if (packed) { status = TestMatrixAP(n, a_buffer, a_offset, sizeof(T)); } + else { status = TestMatrixA(n, n, a_buffer, a_offset, a_ld, sizeof(T)); } + if (ErrorIn(status)) { return status; } + status = TestVectorX(n, x_buffer, x_offset, x_inc, sizeof(T)); + if (ErrorIn(status)) { return status; } + + // Retrieves the Xgemv kernel from the compiled binary + try { + auto& program = GetProgramFromCache(); + auto kernel = Kernel(program, "Xher"); + + // Sets the kernel arguments + kernel.SetArgument(0, static_cast(n)); + kernel.SetArgument(1, matching_alpha); + kernel.SetArgument(2, x_buffer()); + kernel.SetArgument(3, static_cast(x_offset)); + kernel.SetArgument(4, static_cast(x_inc)); + kernel.SetArgument(5, a_buffer()); + kernel.SetArgument(6, static_cast(a_offset)); + kernel.SetArgument(7, static_cast(a_ld)); + kernel.SetArgument(8, static_cast(is_upper)); + kernel.SetArgument(9, static_cast(is_rowmajor)); + + // Launches the kernel + auto global_one = CeilDiv(Ceil(n, db_["WGS1"]), db_["WPT"]); + auto global_two = CeilDiv(Ceil(n, db_["WGS2"]), db_["WPT"]); + auto global = std::vector{global_one, global_two}; + auto local = std::vector{db_["WGS1"], db_["WGS2"]}; + status = RunKernel(kernel, global, local); + if (ErrorIn(status)) { return status; } + + // Waits for all kernels to finish + queue_.Finish(); + + // Succesfully finished the computation + return StatusCode::kSuccess; + } catch (...) { return StatusCode::kInvalidKernel; } +} + +// ================================================================================================= + +// Compiles the templated class +template class Xher; +template class Xher; +template class Xher; +template class Xher; + +// ================================================================================================= +} // namespace clblast diff --git a/src/routines/level2/xhpr.cc b/src/routines/level2/xhpr.cc new file mode 100644 index 00000000..abe00669 --- /dev/null +++ b/src/routines/level2/xhpr.cc @@ -0,0 +1,50 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren +// +// This file implements the Xhpr class (see the header for information about the class). +// +// ================================================================================================= + +#include "internal/routines/level2/xhpr.h" + +#include + +namespace clblast { +// ================================================================================================= + +// Constructor: forwards to base class constructor +template +Xhpr::Xhpr(Queue &queue, Event &event, const std::string &name): + Xher(queue, event, name) { +} + +// ================================================================================================= + +// The main routine +template +StatusCode Xhpr::DoHpr(const Layout layout, const Triangle triangle, + const size_t n, + const U alpha, + const Buffer &x_buffer, const size_t x_offset, const size_t x_inc, + const Buffer &ap_buffer, const size_t ap_offset) { + + // + return DoHer(layout, triangle, n, alpha, + x_buffer, x_offset, x_inc, + ap_buffer, ap_offset, n, true); +} + +// ================================================================================================= + +// Compiles the templated class +template class Xhpr; +template class Xhpr; + +// ================================================================================================= +} // namespace clblast diff --git a/src/routines/level2/xspr.cc b/src/routines/level2/xspr.cc new file mode 100644 index 00000000..5159ad50 --- /dev/null +++ b/src/routines/level2/xspr.cc @@ -0,0 +1,50 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren +// +// This file implements the Xspr class (see the header for information about the class). +// +// ================================================================================================= + +#include "internal/routines/level2/xspr.h" + +#include + +namespace clblast { +// ================================================================================================= + +// Constructor: forwards to base class constructor +template +Xspr::Xspr(Queue &queue, Event &event, const std::string &name): + Xher(queue, event, name) { +} + +// ================================================================================================= + +// The main routine +template +StatusCode Xspr::DoSpr(const Layout layout, const Triangle triangle, + const size_t n, + const T alpha, + const Buffer &x_buffer, const size_t x_offset, const size_t x_inc, + const Buffer &ap_buffer, const size_t ap_offset) { + + // + return DoHer(layout, triangle, n, alpha, + x_buffer, x_offset, x_inc, + ap_buffer, ap_offset, n, true); +} + +// ================================================================================================= + +// Compiles the templated class +template class Xspr; +template class Xspr; + +// ================================================================================================= +} // namespace clblast diff --git a/src/routines/level2/xsyr.cc b/src/routines/level2/xsyr.cc new file mode 100644 index 00000000..755fde0d --- /dev/null +++ b/src/routines/level2/xsyr.cc @@ -0,0 +1,50 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren +// +// This file implements the Xsyr class (see the header for information about the class). +// +// ================================================================================================= + +#include "internal/routines/level2/xsyr.h" + +#include + +namespace clblast { +// ================================================================================================= + +// Constructor: forwards to base class constructor +template +Xsyr::Xsyr(Queue &queue, Event &event, const std::string &name): + Xher(queue, event, name) { +} + +// ================================================================================================= + +// The main routine +template +StatusCode Xsyr::DoSyr(const Layout layout, const Triangle triangle, + const size_t n, + const T alpha, + const Buffer &x_buffer, const size_t x_offset, const size_t x_inc, + const Buffer &a_buffer, const size_t a_offset, const size_t a_ld) { + + // + return DoHer(layout, triangle, n, alpha, + x_buffer, x_offset, x_inc, + a_buffer, a_offset, a_ld); +} + +// ================================================================================================= + +// Compiles the templated class +template class Xsyr; +template class Xsyr; + +// ================================================================================================= +} // namespace clblast diff --git a/src/tuning/xger.cc b/src/tuning/xger.cc index a47f5a78..39efdb81 100644 --- a/src/tuning/xger.cc +++ b/src/tuning/xger.cc @@ -31,6 +31,7 @@ class TuneXger { static std::string GetSources() { return #include "../src/kernels/common.opencl" + #include "../src/kernels/level2/level2.opencl" #include "../src/kernels/level2/xger.opencl" ; } diff --git a/test/routines/level2/xher.h b/test/routines/level2/xher.h new file mode 100644 index 00000000..53c4200f --- /dev/null +++ b/test/routines/level2/xher.h @@ -0,0 +1,122 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren +// +// This file implements a class with static methods to describe the Xher routine. Examples of +// such 'descriptions' are how to calculate the size a of buffer or how to run the routine. These +// static methods are used by the correctness tester and the performance tester. +// +// ================================================================================================= + +#ifndef CLBLAST_TEST_ROUTINES_XHER_H_ +#define CLBLAST_TEST_ROUTINES_XHER_H_ + +#include +#include + +#include "wrapper_clblas.h" + +namespace clblast { +// ================================================================================================= + +// See comment at top of file for a description of the class +template +class TestXher { + public: + + // The BLAS level: 1, 2, or 3 + static size_t BLASLevel() { return 2; } + + // The list of arguments relevant for this routine + static std::vector GetOptions() { + return {kArgN, + kArgLayout, kArgTriangle, + kArgALeadDim, kArgXInc, + kArgAOffset, kArgXOffset, + kArgAlpha}; + } + + // Describes how to obtain the sizes of the buffers + static size_t GetSizeX(const Arguments &args) { + return args.n * args.x_inc + args.x_offset; + } + static size_t GetSizeA(const Arguments &args) { + return args.n * args.a_ld + args.a_offset; + } + + // Describes how to set the sizes of all the buffers + static void SetSizes(Arguments &args) { + args.a_size = GetSizeA(args); + args.x_size = GetSizeX(args); + } + + // Describes what the default values of the leading dimensions of the matrices are + static size_t DefaultLDA(const Arguments &args) { return args.n; } + static size_t DefaultLDB(const Arguments &) { return 1; } // N/A for this routine + static size_t DefaultLDC(const Arguments &) { return 1; } // N/A for this routine + + // Describes which transpose options are relevant for this routine + using Transposes = std::vector; + static Transposes GetATransposes(const Transposes &) { return {}; } // N/A for this routine + static Transposes GetBTransposes(const Transposes &) { return {}; } // N/A for this routine + + // Describes how to run the CLBlast routine + static StatusCode RunRoutine(const Arguments &args, const Buffers &buffers, Queue &queue) { + auto queue_plain = queue(); + auto event = cl_event{}; + auto status = Her(args.layout, args.triangle, + args.n, args.alpha, + buffers.x_vec(), args.x_offset, args.x_inc, + buffers.a_mat(), args.a_offset, args.a_ld, + &queue_plain, &event); + clWaitForEvents(1, &event); + return status; + } + + // Describes how to run the clBLAS routine (for correctness/performance comparison) + static StatusCode RunReference(const Arguments &args, const Buffers &buffers, Queue &queue) { + auto queue_plain = queue(); + auto event = cl_event{}; + auto status = clblasXher(static_cast(args.layout), + static_cast(args.triangle), + args.n, args.alpha, + buffers.x_vec(), args.x_offset, args.x_inc, + buffers.a_mat(), args.a_offset, args.a_ld, + 1, &queue_plain, 0, nullptr, &event); + clWaitForEvents(1, &event); + return static_cast(status); + } + + // Describes how to download the results of the computation (more importantly: which buffer) + static std::vector DownloadResult(const Arguments &args, Buffers &buffers, Queue &queue) { + std::vector result(args.a_size, static_cast(0)); + buffers.a_mat.Read(queue, args.a_size, result); + return result; + } + + // Describes how to compute the indices of the result buffer + static size_t ResultID1(const Arguments &args) { return args.n; } + static size_t ResultID2(const Arguments &args) { return args.n; } + static size_t GetResultIndex(const Arguments &args, const size_t id1, const size_t id2) { + return id2*args.a_ld + id1 + args.a_offset; + } + + // Describes how to compute performance metrics + static size_t GetFlops(const Arguments &args) { + return 3 * args.n * args.n; + } + static size_t GetBytes(const Arguments &args) { + return (args.n*args.n + args.n) * sizeof(T); + } +}; + +// ================================================================================================= +} // namespace clblast + +// CLBLAST_TEST_ROUTINES_XHER_H_ +#endif diff --git a/test/routines/level2/xhpr.h b/test/routines/level2/xhpr.h new file mode 100644 index 00000000..03599ddc --- /dev/null +++ b/test/routines/level2/xhpr.h @@ -0,0 +1,122 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren +// +// This file implements a class with static methods to describe the Xhpr routine. Examples of +// such 'descriptions' are how to calculate the size a of buffer or how to run the routine. These +// static methods are used by the correctness tester and the performance tester. +// +// ================================================================================================= + +#ifndef CLBLAST_TEST_ROUTINES_XHPR_H_ +#define CLBLAST_TEST_ROUTINES_XHPR_H_ + +#include +#include + +#include "wrapper_clblas.h" + +namespace clblast { +// ================================================================================================= + +// See comment at top of file for a description of the class +template +class TestXhpr { + public: + + // The BLAS level: 1, 2, or 3 + static size_t BLASLevel() { return 2; } + + // The list of arguments relevant for this routine + static std::vector GetOptions() { + return {kArgN, + kArgLayout, kArgTriangle, + kArgXInc, + kArgAPOffset, kArgXOffset, + kArgAlpha}; + } + + // Describes how to obtain the sizes of the buffers + static size_t GetSizeX(const Arguments &args) { + return args.n * args.x_inc + args.x_offset; + } + static size_t GetSizeAP(const Arguments &args) { + return ((args.n*(args.n+1)) / 2) + args.ap_offset; + } + + // Describes how to set the sizes of all the buffers + static void SetSizes(Arguments &args) { + args.ap_size = GetSizeAP(args); + args.x_size = GetSizeX(args); + } + + // Describes what the default values of the leading dimensions of the matrices are + static size_t DefaultLDA(const Arguments &args) { return args.n; } + static size_t DefaultLDB(const Arguments &) { return 1; } // N/A for this routine + static size_t DefaultLDC(const Arguments &) { return 1; } // N/A for this routine + + // Describes which transpose options are relevant for this routine + using Transposes = std::vector; + static Transposes GetATransposes(const Transposes &) { return {}; } // N/A for this routine + static Transposes GetBTransposes(const Transposes &) { return {}; } // N/A for this routine + + // Describes how to run the CLBlast routine + static StatusCode RunRoutine(const Arguments &args, const Buffers &buffers, Queue &queue) { + auto queue_plain = queue(); + auto event = cl_event{}; + auto status = Hpr(args.layout, args.triangle, + args.n, args.alpha, + buffers.x_vec(), args.x_offset, args.x_inc, + buffers.ap_mat(), args.ap_offset, + &queue_plain, &event); + clWaitForEvents(1, &event); + return status; + } + + // Describes how to run the clBLAS routine (for correctness/performance comparison) + static StatusCode RunReference(const Arguments &args, const Buffers &buffers, Queue &queue) { + auto queue_plain = queue(); + auto event = cl_event{}; + auto status = clblasXhpr(static_cast(args.layout), + static_cast(args.triangle), + args.n, args.alpha, + buffers.x_vec(), args.x_offset, args.x_inc, + buffers.ap_mat(), args.ap_offset, + 1, &queue_plain, 0, nullptr, &event); + clWaitForEvents(1, &event); + return static_cast(status); + } + + // Describes how to download the results of the computation (more importantly: which buffer) + static std::vector DownloadResult(const Arguments &args, Buffers &buffers, Queue &queue) { + std::vector result(args.ap_size, static_cast(0)); + buffers.ap_mat.Read(queue, args.ap_size, result); + return result; + } + + // Describes how to compute the indices of the result buffer + static size_t ResultID1(const Arguments &args) { return args.ap_size - args.ap_offset; } + static size_t ResultID2(const Arguments &) { return 1; } // N/A for this routine + static size_t GetResultIndex(const Arguments &args, const size_t id1, const size_t) { + return id1 + args.ap_offset; + } + + // Describes how to compute performance metrics + static size_t GetFlops(const Arguments &args) { + return 3 * ((args.n*(args.n+1)) / 2); + } + static size_t GetBytes(const Arguments &args) { + return ((args.n*(args.n+1)) + args.n) * sizeof(T); + } +}; + +// ================================================================================================= +} // namespace clblast + +// CLBLAST_TEST_ROUTINES_XHPR_H_ +#endif diff --git a/test/routines/level2/xspr.h b/test/routines/level2/xspr.h new file mode 100644 index 00000000..819b1ca8 --- /dev/null +++ b/test/routines/level2/xspr.h @@ -0,0 +1,122 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren +// +// This file implements a class with static methods to describe the Xspr routine. Examples of +// such 'descriptions' are how to calculate the size a of buffer or how to run the routine. These +// static methods are used by the correctness tester and the performance tester. +// +// ================================================================================================= + +#ifndef CLBLAST_TEST_ROUTINES_XSPR_H_ +#define CLBLAST_TEST_ROUTINES_XSPR_H_ + +#include +#include + +#include "wrapper_clblas.h" + +namespace clblast { +// ================================================================================================= + +// See comment at top of file for a description of the class +template +class TestXspr { + public: + + // The BLAS level: 1, 2, or 3 + static size_t BLASLevel() { return 2; } + + // The list of arguments relevant for this routine + static std::vector GetOptions() { + return {kArgN, + kArgLayout, kArgTriangle, + kArgXInc, + kArgAPOffset, kArgXOffset, + kArgAlpha}; + } + + // Describes how to obtain the sizes of the buffers + static size_t GetSizeX(const Arguments &args) { + return args.n * args.x_inc + args.x_offset; + } + static size_t GetSizeAP(const Arguments &args) { + return ((args.n*(args.n+1)) / 2) + args.ap_offset; + } + + // Describes how to set the sizes of all the buffers + static void SetSizes(Arguments &args) { + args.ap_size = GetSizeAP(args); + args.x_size = GetSizeX(args); + } + + // Describes what the default values of the leading dimensions of the matrices are + static size_t DefaultLDA(const Arguments &args) { return args.n; } + static size_t DefaultLDB(const Arguments &) { return 1; } // N/A for this routine + static size_t DefaultLDC(const Arguments &) { return 1; } // N/A for this routine + + // Describes which transpose options are relevant for this routine + using Transposes = std::vector; + static Transposes GetATransposes(const Transposes &) { return {}; } // N/A for this routine + static Transposes GetBTransposes(const Transposes &) { return {}; } // N/A for this routine + + // Describes how to run the CLBlast routine + static StatusCode RunRoutine(const Arguments &args, const Buffers &buffers, Queue &queue) { + auto queue_plain = queue(); + auto event = cl_event{}; + auto status = Spr(args.layout, args.triangle, + args.n, args.alpha, + buffers.x_vec(), args.x_offset, args.x_inc, + buffers.ap_mat(), args.ap_offset, + &queue_plain, &event); + clWaitForEvents(1, &event); + return status; + } + + // Describes how to run the clBLAS routine (for correctness/performance comparison) + static StatusCode RunReference(const Arguments &args, const Buffers &buffers, Queue &queue) { + auto queue_plain = queue(); + auto event = cl_event{}; + auto status = clblasXspr(static_cast(args.layout), + static_cast(args.triangle), + args.n, args.alpha, + buffers.x_vec(), args.x_offset, args.x_inc, + buffers.ap_mat(), args.ap_offset, + 1, &queue_plain, 0, nullptr, &event); + clWaitForEvents(1, &event); + return static_cast(status); + } + + // Describes how to download the results of the computation (more importantly: which buffer) + static std::vector DownloadResult(const Arguments &args, Buffers &buffers, Queue &queue) { + std::vector result(args.ap_size, static_cast(0)); + buffers.ap_mat.Read(queue, args.ap_size, result); + return result; + } + + // Describes how to compute the indices of the result buffer + static size_t ResultID1(const Arguments &args) { return args.ap_size - args.ap_offset; } + static size_t ResultID2(const Arguments &) { return 1; } // N/A for this routine + static size_t GetResultIndex(const Arguments &args, const size_t id1, const size_t) { + return id1 + args.ap_offset; + } + + // Describes how to compute performance metrics + static size_t GetFlops(const Arguments &args) { + return 3 * ((args.n*(args.n+1)) / 2); + } + static size_t GetBytes(const Arguments &args) { + return ((args.n*(args.n+1)) + args.n) * sizeof(T); + } +}; + +// ================================================================================================= +} // namespace clblast + +// CLBLAST_TEST_ROUTINES_XSPR_H_ +#endif diff --git a/test/routines/level2/xsyr.h b/test/routines/level2/xsyr.h new file mode 100644 index 00000000..66b75c0c --- /dev/null +++ b/test/routines/level2/xsyr.h @@ -0,0 +1,122 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren +// +// This file implements a class with static methods to describe the Xsyr routine. Examples of +// such 'descriptions' are how to calculate the size a of buffer or how to run the routine. These +// static methods are used by the correctness tester and the performance tester. +// +// ================================================================================================= + +#ifndef CLBLAST_TEST_ROUTINES_XSYR_H_ +#define CLBLAST_TEST_ROUTINES_XSYR_H_ + +#include +#include + +#include "wrapper_clblas.h" + +namespace clblast { +// ================================================================================================= + +// See comment at top of file for a description of the class +template +class TestXsyr { + public: + + // The BLAS level: 1, 2, or 3 + static size_t BLASLevel() { return 2; } + + // The list of arguments relevant for this routine + static std::vector GetOptions() { + return {kArgN, + kArgLayout, kArgTriangle, + kArgALeadDim, kArgXInc, + kArgAOffset, kArgXOffset, + kArgAlpha}; + } + + // Describes how to obtain the sizes of the buffers + static size_t GetSizeX(const Arguments &args) { + return args.n * args.x_inc + args.x_offset; + } + static size_t GetSizeA(const Arguments &args) { + return args.n * args.a_ld + args.a_offset; + } + + // Describes how to set the sizes of all the buffers + static void SetSizes(Arguments &args) { + args.a_size = GetSizeA(args); + args.x_size = GetSizeX(args); + } + + // Describes what the default values of the leading dimensions of the matrices are + static size_t DefaultLDA(const Arguments &args) { return args.n; } + static size_t DefaultLDB(const Arguments &) { return 1; } // N/A for this routine + static size_t DefaultLDC(const Arguments &) { return 1; } // N/A for this routine + + // Describes which transpose options are relevant for this routine + using Transposes = std::vector; + static Transposes GetATransposes(const Transposes &) { return {}; } // N/A for this routine + static Transposes GetBTransposes(const Transposes &) { return {}; } // N/A for this routine + + // Describes how to run the CLBlast routine + static StatusCode RunRoutine(const Arguments &args, const Buffers &buffers, Queue &queue) { + auto queue_plain = queue(); + auto event = cl_event{}; + auto status = Syr(args.layout, args.triangle, + args.n, args.alpha, + buffers.x_vec(), args.x_offset, args.x_inc, + buffers.a_mat(), args.a_offset, args.a_ld, + &queue_plain, &event); + clWaitForEvents(1, &event); + return status; + } + + // Describes how to run the clBLAS routine (for correctness/performance comparison) + static StatusCode RunReference(const Arguments &args, const Buffers &buffers, Queue &queue) { + auto queue_plain = queue(); + auto event = cl_event{}; + auto status = clblasXsyr(static_cast(args.layout), + static_cast(args.triangle), + args.n, args.alpha, + buffers.x_vec(), args.x_offset, args.x_inc, + buffers.a_mat(), args.a_offset, args.a_ld, + 1, &queue_plain, 0, nullptr, &event); + clWaitForEvents(1, &event); + return static_cast(status); + } + + // Describes how to download the results of the computation (more importantly: which buffer) + static std::vector DownloadResult(const Arguments &args, Buffers &buffers, Queue &queue) { + std::vector result(args.a_size, static_cast(0)); + buffers.a_mat.Read(queue, args.a_size, result); + return result; + } + + // Describes how to compute the indices of the result buffer + static size_t ResultID1(const Arguments &args) { return args.n; } + static size_t ResultID2(const Arguments &args) { return args.n; } + static size_t GetResultIndex(const Arguments &args, const size_t id1, const size_t id2) { + return id2*args.a_ld + id1 + args.a_offset; + } + + // Describes how to compute performance metrics + static size_t GetFlops(const Arguments &args) { + return 3 * args.n * args.n; + } + static size_t GetBytes(const Arguments &args) { + return (args.n*args.n + args.n) * sizeof(T); + } +}; + +// ================================================================================================= +} // namespace clblast + +// CLBLAST_TEST_ROUTINES_XSYR_H_ +#endif From 4a56822dcc7f723db0dc9a86fbb71abdd18cee31 Mon Sep 17 00:00:00 2001 From: Cedric Nugteren Date: Sun, 28 Feb 2016 15:49:59 +0100 Subject: [PATCH 35/50] Fixed a couple of correctness bugs in the Xher kernels --- src/kernels/level2/level2.opencl | 18 +++++++++++++----- src/kernels/level2/xger.opencl | 4 ++-- src/kernels/level2/xher.opencl | 2 +- src/routines/level2/xher.cc | 3 +++ 4 files changed, 19 insertions(+), 8 deletions(-) diff --git a/src/kernels/level2/level2.opencl b/src/kernels/level2/level2.opencl index ea7d7afd..ad92595a 100644 --- a/src/kernels/level2/level2.opencl +++ b/src/kernels/level2/level2.opencl @@ -34,7 +34,7 @@ R"( // Returns an element from a vector inline real LoadVector(const int id, const int max, - __global real* restrict gm, const int offset, const int inc, + __global real* gm, const int offset, const int inc, const int do_conjugate) { if (id < max) { real result = gm[id*inc + offset]; @@ -42,7 +42,7 @@ inline real LoadVector(const int id, const int max, #if defined(ROUTINE_GERC) COMPLEX_CONJUGATE(result); #endif - #if defined(ROUTINE_HER) + #if defined(ROUTINE_HER) || defined(ROUTINE_HPR) COMPLEX_CONJUGATE(result); #endif } @@ -57,14 +57,22 @@ inline real LoadVector(const int id, const int max, // Performs the rank-1 matrix update inline void MatrixUpdate(const int id1, const int id2, const int max1, const int max2, - __global real* restrict agm, const int a_offset, const int a_ld, - const real alpha, const real xvalue, const real yvalue) { + __global real* agm, const int a_offset, const int a_ld, + const real alpha, const real xvalue, const real yvalue, + const int is_upper) { // Bounds of a regular matrix if (id1 < max1 && id2 < max2) { #if defined(ROUTINE_SPR) || defined(ROUTINE_HPR) - const int a_index = (id1 <= id2) ? ((id2+1)*id2)/2 + id1 + a_offset : ((id1+1)*id1)/2 + id2 + a_offset; + int a_index; + if (is_upper) { + a_index = (id1 <= id2) ? ((id2+1)*id2)/2 + id1 : ((id1+1)*id1)/2 + id2; + } + else { + a_index = (id1 >= id2) ? ((2*a_ld-(id2+1))*id2)/2 + id1 : ((2*a_ld-(id1+1))*id1)/2 + id2; + } + a_index += a_offset; #else const int a_index = id2*a_ld + id1 + a_offset; #endif diff --git a/src/kernels/level2/xger.opencl b/src/kernels/level2/xger.opencl index ce8e04bb..d377fbb0 100644 --- a/src/kernels/level2/xger.opencl +++ b/src/kernels/level2/xger.opencl @@ -58,7 +58,7 @@ __kernel void Xger(const int max1, const int max2, const real alpha, // Loads A, performs the operation, and stores the result into A MatrixUpdate(id1, id2, max1, max2, agm, a_offset, a_ld, - alpha, xvalues[w2], yvalues[w1]); + alpha, xvalues[w2], yvalues[w1], false); } } } @@ -92,7 +92,7 @@ __kernel void Xger(const int max1, const int max2, const real alpha, // Loads A, performs the operation, and stores the result into A MatrixUpdate(id1, id2, max1, max2, agm, a_offset, a_ld, - alpha, xvalues[w1], yvalues[w2]); + alpha, xvalues[w1], yvalues[w2], false); } } } diff --git a/src/kernels/level2/xher.opencl b/src/kernels/level2/xher.opencl index 13bc4135..edb94ca8 100644 --- a/src/kernels/level2/xher.opencl +++ b/src/kernels/level2/xher.opencl @@ -59,7 +59,7 @@ __kernel void Xher(const int n, const real alpha, // Loads A, performs the operation, and stores the result into A else { - MatrixUpdate(id1, id2, n, n, agm, a_offset, a_ld, alpha, xvalues[w2], xtvalues[w1]); + MatrixUpdate(id1, id2, n, n, agm, a_offset, a_ld, alpha, xvalues[w2], xtvalues[w1], is_upper); } } } diff --git a/src/routines/level2/xher.cc b/src/routines/level2/xher.cc index 4227252e..5eca44b0 100644 --- a/src/routines/level2/xher.cc +++ b/src/routines/level2/xher.cc @@ -74,6 +74,9 @@ StatusCode Xher::DoHer(const Layout layout, const Triangle triangle, status = TestVectorX(n, x_buffer, x_offset, x_inc, sizeof(T)); if (ErrorIn(status)) { return status; } + // If alpha is zero an update is not required + if (alpha == U{0}) { return StatusCode::kSuccess; } + // Retrieves the Xgemv kernel from the compiled binary try { auto& program = GetProgramFromCache(); From 3c27edb087ce12956cb526af9280276b49f5a692 Mon Sep 17 00:00:00 2001 From: Cedric Nugteren Date: Sun, 28 Feb 2016 16:37:49 +0100 Subject: [PATCH 36/50] Updated the changelog with newly supported level-2 routines --- CHANGELOG | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/CHANGELOG b/CHANGELOG index c6cfa174..72f53550 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -3,6 +3,14 @@ Development version (next release) - Added support for MSVC (Visual Studio) 2015 - Added tuned parameters for various devices (see README) - Now automatically generates C++ code from JSON tuning results +- Added level-2 routines: + * SGER/DGER + * CGERU/ZGERU + * CGERC/ZGERC + * CHER/ZHER + * CHPR/ZHPR + * CSYR/ZSYR + * CSPR/ZSPR Version 0.5.0 - Improved structure and performance of level-2 routines (xSYMV/xHEMV) From fa79720557412cad605589301580ccda39edce6c Mon Sep 17 00:00:00 2001 From: Cedric Nugteren Date: Sun, 28 Feb 2016 16:47:52 +0100 Subject: [PATCH 37/50] Added tuning results for Intel Iris Pro and AMD R9 M370X --- README.md | 2 ++ include/internal/database/copy.h | 12 +++++-- include/internal/database/pad.h | 16 ++++++--- include/internal/database/padtranspose.h | 8 ++++- include/internal/database/transpose.h | 12 +++++-- include/internal/database/xaxpy.h | 10 ++++-- include/internal/database/xdot.h | 20 +++++++---- include/internal/database/xgemm.h | 20 +++++++---- include/internal/database/xgemv.h | 16 ++++++--- include/internal/database/xger.h | 44 +++++++++++++++++++++--- scripts/database/database.py | 5 ++- 11 files changed, 125 insertions(+), 40 deletions(-) diff --git a/README.md b/README.md index 1d6e0bad..3da0e262 100644 --- a/README.md +++ b/README.md @@ -93,8 +93,10 @@ The CLBlast library will be tuned in the future for the most commonly used OpenC - Tesla K40m * AMD GPUs: - Tahiti + - R9 M370X * Intel GPUs: - Iris + - Iris Pro * Intel CPUs: - Core i5-6200U - Core i7-3770K diff --git a/include/internal/database/copy.h b/include/internal/database/copy.h index 9429857c..820e0f04 100644 --- a/include/internal/database/copy.h +++ b/include/internal/database/copy.h @@ -18,8 +18,9 @@ const Database::DatabaseEntry Database::CopySingle = { "Copy", Precision::kSingle, { { // AMD GPUs kDeviceTypeGPU, "AMD", { + { "AMD Radeon R9 M370X Compute Engine", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_VW",4}, {"COPY_WPT",1} } }, { "Tahiti", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_VW",2}, {"COPY_WPT",2} } }, - { "default", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_VW",2}, {"COPY_WPT",2} } }, + { "default", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_VW",2}, {"COPY_WPT",1} } }, } }, { // Intel CPUs @@ -33,6 +34,7 @@ const Database::DatabaseEntry Database::CopySingle = { { // Intel GPUs kDeviceTypeGPU, "Intel", { { "Iris", { {"COPY_DIMX",16}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",2} } }, + { "Iris Pro", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_VW",4}, {"COPY_WPT",4} } }, { "default", { {"COPY_DIMX",16}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",2} } }, } }, @@ -69,8 +71,9 @@ const Database::DatabaseEntry Database::CopyComplexSingle = { "Copy", Precision::kComplexSingle, { { // AMD GPUs kDeviceTypeGPU, "AMD", { + { "AMD Radeon R9 M370X Compute Engine", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",1} } }, { "Tahiti", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",2}, {"COPY_WPT",2} } }, - { "default", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",2}, {"COPY_WPT",2} } }, + { "default", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",1} } }, } }, { // Intel CPUs @@ -84,6 +87,7 @@ const Database::DatabaseEntry Database::CopyComplexSingle = { { // Intel GPUs kDeviceTypeGPU, "Intel", { { "Iris", { {"COPY_DIMX",16}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",2} } }, + { "Iris Pro", { {"COPY_DIMX",32}, {"COPY_DIMY",16}, {"COPY_VW",1}, {"COPY_WPT",4} } }, { "default", { {"COPY_DIMX",16}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",2} } }, } }, @@ -118,8 +122,9 @@ const Database::DatabaseEntry Database::CopyDouble = { "Copy", Precision::kDouble, { { // AMD GPUs kDeviceTypeGPU, "AMD", { + { "AMD Radeon R9 M370X Compute Engine", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",1} } }, { "Tahiti", { {"COPY_DIMX",8}, {"COPY_DIMY",32}, {"COPY_VW",2}, {"COPY_WPT",1} } }, - { "default", { {"COPY_DIMX",8}, {"COPY_DIMY",32}, {"COPY_VW",2}, {"COPY_WPT",1} } }, + { "default", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",1} } }, } }, { // Intel CPUs @@ -163,6 +168,7 @@ const Database::DatabaseEntry Database::CopyComplexDouble = { "Copy", Precision::kComplexDouble, { { // AMD GPUs kDeviceTypeGPU, "AMD", { + { "AMD Radeon R9 M370X Compute Engine", { {"COPY_DIMX",8}, {"COPY_DIMY",16}, {"COPY_VW",1}, {"COPY_WPT",1} } }, { "Tahiti", { {"COPY_DIMX",8}, {"COPY_DIMY",16}, {"COPY_VW",1}, {"COPY_WPT",1} } }, { "default", { {"COPY_DIMX",8}, {"COPY_DIMY",16}, {"COPY_VW",1}, {"COPY_WPT",1} } }, } diff --git a/include/internal/database/pad.h b/include/internal/database/pad.h index 373f9641..c08a60d6 100644 --- a/include/internal/database/pad.h +++ b/include/internal/database/pad.h @@ -18,8 +18,9 @@ const Database::DatabaseEntry Database::PadSingle = { "Pad", Precision::kSingle, { { // AMD GPUs kDeviceTypeGPU, "AMD", { + { "AMD Radeon R9 M370X Compute Engine", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, { "Tahiti", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",2} } }, - { "default", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",2} } }, + { "default", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, } }, { // Intel CPUs @@ -33,7 +34,8 @@ const Database::DatabaseEntry Database::PadSingle = { { // Intel GPUs kDeviceTypeGPU, "Intel", { { "Iris", { {"PAD_DIMX",32}, {"PAD_DIMY",16}, {"PAD_WPTX",2}, {"PAD_WPTY",1} } }, - { "default", { {"PAD_DIMX",32}, {"PAD_DIMY",16}, {"PAD_WPTX",2}, {"PAD_WPTY",1} } }, + { "Iris Pro", { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",2}, {"PAD_WPTY",1} } }, + { "default", { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",2}, {"PAD_WPTY",1} } }, } }, { // Intel accelerators @@ -69,8 +71,9 @@ const Database::DatabaseEntry Database::PadComplexSingle = { "Pad", Precision::kComplexSingle, { { // AMD GPUs kDeviceTypeGPU, "AMD", { + { "AMD Radeon R9 M370X Compute Engine", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, { "Tahiti", { {"PAD_DIMX",16}, {"PAD_DIMY",16}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, - { "default", { {"PAD_DIMX",16}, {"PAD_DIMY",16}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, + { "default", { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, } }, { // Intel CPUs @@ -84,7 +87,8 @@ const Database::DatabaseEntry Database::PadComplexSingle = { { // Intel GPUs kDeviceTypeGPU, "Intel", { { "Iris", { {"PAD_DIMX",32}, {"PAD_DIMY",16}, {"PAD_WPTX",2}, {"PAD_WPTY",4} } }, - { "default", { {"PAD_DIMX",32}, {"PAD_DIMY",16}, {"PAD_WPTX",2}, {"PAD_WPTY",4} } }, + { "Iris Pro", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",2}, {"PAD_WPTY",1} } }, + { "default", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",2}, {"PAD_WPTY",1} } }, } }, { // Intel accelerators @@ -120,6 +124,7 @@ const Database::DatabaseEntry Database::PadDouble = { "Pad", Precision::kDouble, { { // AMD GPUs kDeviceTypeGPU, "AMD", { + { "AMD Radeon R9 M370X Compute Engine", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, { "Tahiti", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, { "default", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, } @@ -165,8 +170,9 @@ const Database::DatabaseEntry Database::PadComplexDouble = { "Pad", Precision::kComplexDouble, { { // AMD GPUs kDeviceTypeGPU, "AMD", { + { "AMD Radeon R9 M370X Compute Engine", { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, { "Tahiti", { {"PAD_DIMX",8}, {"PAD_DIMY",16}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, - { "default", { {"PAD_DIMX",8}, {"PAD_DIMY",16}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, + { "default", { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, } }, { // Intel CPUs diff --git a/include/internal/database/padtranspose.h b/include/internal/database/padtranspose.h index 99d4f32a..5a89869f 100644 --- a/include/internal/database/padtranspose.h +++ b/include/internal/database/padtranspose.h @@ -18,6 +18,7 @@ const Database::DatabaseEntry Database::PadtransposeSingle = { "Padtranspose", Precision::kSingle, { { // AMD GPUs kDeviceTypeGPU, "AMD", { + { "AMD Radeon R9 M370X Compute Engine", { {"PADTRA_PAD",0}, {"PADTRA_TILE",16}, {"PADTRA_WPT",4} } }, { "Tahiti", { {"PADTRA_PAD",0}, {"PADTRA_TILE",16}, {"PADTRA_WPT",4} } }, { "default", { {"PADTRA_PAD",0}, {"PADTRA_TILE",16}, {"PADTRA_WPT",4} } }, } @@ -33,6 +34,7 @@ const Database::DatabaseEntry Database::PadtransposeSingle = { { // Intel GPUs kDeviceTypeGPU, "Intel", { { "Iris", { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",2} } }, + { "Iris Pro", { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",2} } }, { "default", { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",2} } }, } }, @@ -69,6 +71,7 @@ const Database::DatabaseEntry Database::PadtransposeComplexSingle = { "Padtranspose", Precision::kComplexSingle, { { // AMD GPUs kDeviceTypeGPU, "AMD", { + { "AMD Radeon R9 M370X Compute Engine", { {"PADTRA_PAD",0}, {"PADTRA_TILE",16}, {"PADTRA_WPT",4} } }, { "Tahiti", { {"PADTRA_PAD",0}, {"PADTRA_TILE",16}, {"PADTRA_WPT",2} } }, { "default", { {"PADTRA_PAD",0}, {"PADTRA_TILE",16}, {"PADTRA_WPT",2} } }, } @@ -84,6 +87,7 @@ const Database::DatabaseEntry Database::PadtransposeComplexSingle = { { // Intel GPUs kDeviceTypeGPU, "Intel", { { "Iris", { {"PADTRA_PAD",0}, {"PADTRA_TILE",16}, {"PADTRA_WPT",2} } }, + { "Iris Pro", { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",2} } }, { "default", { {"PADTRA_PAD",0}, {"PADTRA_TILE",16}, {"PADTRA_WPT",2} } }, } }, @@ -120,8 +124,9 @@ const Database::DatabaseEntry Database::PadtransposeDouble = { "Padtranspose", Precision::kDouble, { { // AMD GPUs kDeviceTypeGPU, "AMD", { + { "AMD Radeon R9 M370X Compute Engine", { {"PADTRA_PAD",0}, {"PADTRA_TILE",16}, {"PADTRA_WPT",4} } }, { "Tahiti", { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",2} } }, - { "default", { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",2} } }, + { "default", { {"PADTRA_PAD",0}, {"PADTRA_TILE",16}, {"PADTRA_WPT",2} } }, } }, { // Intel CPUs @@ -165,6 +170,7 @@ const Database::DatabaseEntry Database::PadtransposeComplexDouble = { "Padtranspose", Precision::kComplexDouble, { { // AMD GPUs kDeviceTypeGPU, "AMD", { + { "AMD Radeon R9 M370X Compute Engine", { {"PADTRA_PAD",0}, {"PADTRA_TILE",8}, {"PADTRA_WPT",4} } }, { "Tahiti", { {"PADTRA_PAD",0}, {"PADTRA_TILE",8}, {"PADTRA_WPT",2} } }, { "default", { {"PADTRA_PAD",0}, {"PADTRA_TILE",8}, {"PADTRA_WPT",2} } }, } diff --git a/include/internal/database/transpose.h b/include/internal/database/transpose.h index 8eee2e5d..1948e0d6 100644 --- a/include/internal/database/transpose.h +++ b/include/internal/database/transpose.h @@ -18,6 +18,7 @@ const Database::DatabaseEntry Database::TransposeSingle = { "Transpose", Precision::kSingle, { { // AMD GPUs kDeviceTypeGPU, "AMD", { + { "AMD Radeon R9 M370X Compute Engine", { {"TRA_DIM",4}, {"TRA_PAD",0}, {"TRA_SHUFFLE",1}, {"TRA_WPT",8} } }, { "Tahiti", { {"TRA_DIM",4}, {"TRA_PAD",0}, {"TRA_SHUFFLE",1}, {"TRA_WPT",4} } }, { "default", { {"TRA_DIM",4}, {"TRA_PAD",0}, {"TRA_SHUFFLE",1}, {"TRA_WPT",4} } }, } @@ -33,6 +34,7 @@ const Database::DatabaseEntry Database::TransposeSingle = { { // Intel GPUs kDeviceTypeGPU, "Intel", { { "Iris", { {"TRA_DIM",8}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",4} } }, + { "Iris Pro", { {"TRA_DIM",16}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",4} } }, { "default", { {"TRA_DIM",8}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",4} } }, } }, @@ -69,8 +71,9 @@ const Database::DatabaseEntry Database::TransposeComplexSingle = { "Transpose", Precision::kComplexSingle, { { // AMD GPUs kDeviceTypeGPU, "AMD", { + { "AMD Radeon R9 M370X Compute Engine", { {"TRA_DIM",4}, {"TRA_PAD",1}, {"TRA_SHUFFLE",1}, {"TRA_WPT",4} } }, { "Tahiti", { {"TRA_DIM",16}, {"TRA_PAD",0}, {"TRA_SHUFFLE",1}, {"TRA_WPT",1} } }, - { "default", { {"TRA_DIM",16}, {"TRA_PAD",0}, {"TRA_SHUFFLE",1}, {"TRA_WPT",1} } }, + { "default", { {"TRA_DIM",4}, {"TRA_PAD",0}, {"TRA_SHUFFLE",1}, {"TRA_WPT",1} } }, } }, { // Intel CPUs @@ -84,6 +87,7 @@ const Database::DatabaseEntry Database::TransposeComplexSingle = { { // Intel GPUs kDeviceTypeGPU, "Intel", { { "Iris", { {"TRA_DIM",8}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",2} } }, + { "Iris Pro", { {"TRA_DIM",16}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",2} } }, { "default", { {"TRA_DIM",8}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",2} } }, } }, @@ -114,8 +118,9 @@ const Database::DatabaseEntry Database::TransposeDouble = { "Transpose", Precision::kDouble, { { // AMD GPUs kDeviceTypeGPU, "AMD", { + { "AMD Radeon R9 M370X Compute Engine", { {"TRA_DIM",4}, {"TRA_PAD",0}, {"TRA_SHUFFLE",1}, {"TRA_WPT",4} } }, { "Tahiti", { {"TRA_DIM",4}, {"TRA_PAD",1}, {"TRA_SHUFFLE",1}, {"TRA_WPT",4} } }, - { "default", { {"TRA_DIM",4}, {"TRA_PAD",1}, {"TRA_SHUFFLE",1}, {"TRA_WPT",4} } }, + { "default", { {"TRA_DIM",4}, {"TRA_PAD",0}, {"TRA_SHUFFLE",1}, {"TRA_WPT",4} } }, } }, { // Intel CPUs @@ -159,8 +164,9 @@ const Database::DatabaseEntry Database::TransposeComplexDouble = { "Transpose", Precision::kComplexDouble, { { // AMD GPUs kDeviceTypeGPU, "AMD", { + { "AMD Radeon R9 M370X Compute Engine", { {"TRA_DIM",4}, {"TRA_PAD",1}, {"TRA_SHUFFLE",1}, {"TRA_WPT",2} } }, { "Tahiti", { {"TRA_DIM",16}, {"TRA_PAD",0}, {"TRA_SHUFFLE",1}, {"TRA_WPT",1} } }, - { "default", { {"TRA_DIM",16}, {"TRA_PAD",0}, {"TRA_SHUFFLE",1}, {"TRA_WPT",1} } }, + { "default", { {"TRA_DIM",4}, {"TRA_PAD",0}, {"TRA_SHUFFLE",1}, {"TRA_WPT",1} } }, } }, { // Intel CPUs diff --git a/include/internal/database/xaxpy.h b/include/internal/database/xaxpy.h index bcaf80cd..491dfc68 100644 --- a/include/internal/database/xaxpy.h +++ b/include/internal/database/xaxpy.h @@ -18,8 +18,9 @@ const Database::DatabaseEntry Database::XaxpySingle = { "Xaxpy", Precision::kSingle, { { // AMD GPUs kDeviceTypeGPU, "AMD", { + { "AMD Radeon R9 M370X Compute Engine", { {"VW",1}, {"WGS",128}, {"WPT",1} } }, { "Tahiti", { {"VW",2}, {"WGS",64}, {"WPT",1} } }, - { "default", { {"VW",2}, {"WGS",64}, {"WPT",1} } }, + { "default", { {"VW",1}, {"WGS",64}, {"WPT",1} } }, } }, { // Intel CPUs @@ -33,6 +34,7 @@ const Database::DatabaseEntry Database::XaxpySingle = { { // Intel GPUs kDeviceTypeGPU, "Intel", { { "Iris", { {"VW",1}, {"WGS",64}, {"WPT",1} } }, + { "Iris Pro", { {"VW",1}, {"WGS",128}, {"WPT",2} } }, { "default", { {"VW",1}, {"WGS",64}, {"WPT",1} } }, } }, @@ -69,6 +71,7 @@ const Database::DatabaseEntry Database::XaxpyComplexSingle = { "Xaxpy", Precision::kComplexSingle, { { // AMD GPUs kDeviceTypeGPU, "AMD", { + { "AMD Radeon R9 M370X Compute Engine", { {"VW",2}, {"WGS",64}, {"WPT",8} } }, { "Tahiti", { {"VW",1}, {"WGS",64}, {"WPT",1} } }, { "default", { {"VW",1}, {"WGS",64}, {"WPT",1} } }, } @@ -84,7 +87,8 @@ const Database::DatabaseEntry Database::XaxpyComplexSingle = { { // Intel GPUs kDeviceTypeGPU, "Intel", { { "Iris", { {"VW",2}, {"WGS",128}, {"WPT",1} } }, - { "default", { {"VW",2}, {"WGS",128}, {"WPT",1} } }, + { "Iris Pro", { {"VW",1}, {"WGS",256}, {"WPT",8} } }, + { "default", { {"VW",1}, {"WGS",128}, {"WPT",1} } }, } }, { // Intel accelerators @@ -120,6 +124,7 @@ const Database::DatabaseEntry Database::XaxpyDouble = { "Xaxpy", Precision::kDouble, { { // AMD GPUs kDeviceTypeGPU, "AMD", { + { "AMD Radeon R9 M370X Compute Engine", { {"VW",1}, {"WGS",256}, {"WPT",1} } }, { "Tahiti", { {"VW",1}, {"WGS",64}, {"WPT",1} } }, { "default", { {"VW",1}, {"WGS",64}, {"WPT",1} } }, } @@ -165,6 +170,7 @@ const Database::DatabaseEntry Database::XaxpyComplexDouble = { "Xaxpy", Precision::kComplexDouble, { { // AMD GPUs kDeviceTypeGPU, "AMD", { + { "AMD Radeon R9 M370X Compute Engine", { {"VW",1}, {"WGS",128}, {"WPT",1} } }, { "Tahiti", { {"VW",1}, {"WGS",128}, {"WPT",1} } }, { "default", { {"VW",1}, {"WGS",128}, {"WPT",1} } }, } diff --git a/include/internal/database/xdot.h b/include/internal/database/xdot.h index a80398d7..31d0f84b 100644 --- a/include/internal/database/xdot.h +++ b/include/internal/database/xdot.h @@ -18,8 +18,9 @@ const Database::DatabaseEntry Database::XdotSingle = { "Xdot", Precision::kSingle, { { // AMD GPUs kDeviceTypeGPU, "AMD", { + { "AMD Radeon R9 M370X Compute Engine", { {"VW",1}, {"WGS1",64}, {"WGS2",128} } }, { "Tahiti", { {"VW",1}, {"WGS1",256}, {"WGS2",256} } }, - { "default", { {"VW",1}, {"WGS1",256}, {"WGS2",256} } }, + { "default", { {"VW",1}, {"WGS1",64}, {"WGS2",128} } }, } }, { // Intel CPUs @@ -33,7 +34,8 @@ const Database::DatabaseEntry Database::XdotSingle = { { // Intel GPUs kDeviceTypeGPU, "Intel", { { "Iris", { {"VW",1}, {"WGS1",512}, {"WGS2",32} } }, - { "default", { {"VW",1}, {"WGS1",512}, {"WGS2",32} } }, + { "Iris Pro", { {"VW",1}, {"WGS1",128}, {"WGS2",512} } }, + { "default", { {"VW",1}, {"WGS1",128}, {"WGS2",32} } }, } }, { // Intel accelerators @@ -69,8 +71,9 @@ const Database::DatabaseEntry Database::XdotComplexSingle = { "Xdot", Precision::kComplexSingle, { { // AMD GPUs kDeviceTypeGPU, "AMD", { + { "AMD Radeon R9 M370X Compute Engine", { {"VW",1}, {"WGS1",64}, {"WGS2",32} } }, { "Tahiti", { {"VW",1}, {"WGS1",64}, {"WGS2",256} } }, - { "default", { {"VW",1}, {"WGS1",64}, {"WGS2",256} } }, + { "default", { {"VW",1}, {"WGS1",64}, {"WGS2",32} } }, } }, { // Intel CPUs @@ -84,7 +87,8 @@ const Database::DatabaseEntry Database::XdotComplexSingle = { { // Intel GPUs kDeviceTypeGPU, "Intel", { { "Iris", { {"VW",1}, {"WGS1",512}, {"WGS2",1024} } }, - { "default", { {"VW",1}, {"WGS1",512}, {"WGS2",1024} } }, + { "Iris Pro", { {"VW",1}, {"WGS1",256}, {"WGS2",1024} } }, + { "default", { {"VW",1}, {"WGS1",256}, {"WGS2",1024} } }, } }, { // Intel accelerators @@ -120,8 +124,9 @@ const Database::DatabaseEntry Database::XdotDouble = { "Xdot", Precision::kDouble, { { // AMD GPUs kDeviceTypeGPU, "AMD", { + { "AMD Radeon R9 M370X Compute Engine", { {"VW",1}, {"WGS1",64}, {"WGS2",128} } }, { "Tahiti", { {"VW",1}, {"WGS1",64}, {"WGS2",256} } }, - { "default", { {"VW",1}, {"WGS1",64}, {"WGS2",256} } }, + { "default", { {"VW",1}, {"WGS1",64}, {"WGS2",128} } }, } }, { // Intel CPUs @@ -165,8 +170,9 @@ const Database::DatabaseEntry Database::XdotComplexDouble = { "Xdot", Precision::kComplexDouble, { { // AMD GPUs kDeviceTypeGPU, "AMD", { + { "AMD Radeon R9 M370X Compute Engine", { {"VW",1}, {"WGS1",64}, {"WGS2",32} } }, { "Tahiti", { {"VW",1}, {"WGS1",64}, {"WGS2",256} } }, - { "default", { {"VW",1}, {"WGS1",64}, {"WGS2",256} } }, + { "default", { {"VW",1}, {"WGS1",64}, {"WGS2",32} } }, } }, { // Intel CPUs @@ -198,7 +204,7 @@ const Database::DatabaseEntry Database::XdotComplexDouble = { }, { // Default kDeviceTypeAll, "default", { - { "default", { {"VW",1}, {"WGS1",32}, {"WGS2",64} } }, + { "default", { {"VW",1}, {"WGS1",32}, {"WGS2",32} } }, } }, } diff --git a/include/internal/database/xgemm.h b/include/internal/database/xgemm.h index 1197bc0a..833e2c7a 100644 --- a/include/internal/database/xgemm.h +++ b/include/internal/database/xgemm.h @@ -18,8 +18,9 @@ const Database::DatabaseEntry Database::XgemmSingle = { "Xgemm", Precision::kSingle, { { // AMD GPUs kDeviceTypeGPU, "AMD", { + { "AMD Radeon R9 M370X Compute Engine", { {"KWG",32}, {"KWI",2}, {"MDIMA",8}, {"MDIMC",32}, {"MWG",128}, {"NDIMB",32}, {"NDIMC",8}, {"NWG",64}, {"SA",0}, {"SB",1}, {"STRM",1}, {"STRN",1}, {"VWM",4}, {"VWN",1} } }, { "Tahiti", { {"KWG",32}, {"KWI",2}, {"MDIMA",16}, {"MDIMC",32}, {"MWG",128}, {"NDIMB",16}, {"NDIMC",8}, {"NWG",64}, {"SA",0}, {"SB",0}, {"STRM",0}, {"STRN",0}, {"VWM",4}, {"VWN",1} } }, - { "default", { {"KWG",32}, {"KWI",2}, {"MDIMA",16}, {"MDIMC",32}, {"MWG",128}, {"NDIMB",16}, {"NDIMC",8}, {"NWG",64}, {"SA",0}, {"SB",0}, {"STRM",0}, {"STRN",0}, {"VWM",4}, {"VWN",1} } }, + { "default", { {"KWG",32}, {"KWI",2}, {"MDIMA",8}, {"MDIMC",32}, {"MWG",128}, {"NDIMB",16}, {"NDIMC",8}, {"NWG",64}, {"SA",0}, {"SB",0}, {"STRM",0}, {"STRN",0}, {"VWM",4}, {"VWN",1} } }, } }, { // Intel CPUs @@ -33,7 +34,8 @@ const Database::DatabaseEntry Database::XgemmSingle = { { // Intel GPUs kDeviceTypeGPU, "Intel", { { "Iris", { {"KWG",16}, {"KWI",8}, {"MDIMA",16}, {"MDIMC",8}, {"MWG",128}, {"NDIMB",32}, {"NDIMC",16}, {"NWG",64}, {"SA",1}, {"SB",1}, {"STRM",1}, {"STRN",1}, {"VWM",4}, {"VWN",1} } }, - { "default", { {"KWG",16}, {"KWI",8}, {"MDIMA",16}, {"MDIMC",8}, {"MWG",128}, {"NDIMB",32}, {"NDIMC",16}, {"NWG",64}, {"SA",1}, {"SB",1}, {"STRM",1}, {"STRN",1}, {"VWM",4}, {"VWN",1} } }, + { "Iris Pro", { {"KWG",32}, {"KWI",8}, {"MDIMA",16}, {"MDIMC",8}, {"MWG",64}, {"NDIMB",8}, {"NDIMC",8}, {"NWG",64}, {"SA",1}, {"SB",0}, {"STRM",1}, {"STRN",0}, {"VWM",4}, {"VWN",4} } }, + { "default", { {"KWG",16}, {"KWI",8}, {"MDIMA",16}, {"MDIMC",8}, {"MWG",64}, {"NDIMB",8}, {"NDIMC",8}, {"NWG",64}, {"SA",1}, {"SB",0}, {"STRM",1}, {"STRN",0}, {"VWM",4}, {"VWN",1} } }, } }, { // Intel accelerators @@ -69,8 +71,9 @@ const Database::DatabaseEntry Database::XgemmComplexSingle = { "Xgemm", Precision::kComplexSingle, { { // AMD GPUs kDeviceTypeGPU, "AMD", { + { "AMD Radeon R9 M370X Compute Engine", { {"KWG",32}, {"KWI",2}, {"MDIMA",32}, {"MDIMC",32}, {"MWG",64}, {"NDIMB",8}, {"NDIMC",8}, {"NWG",64}, {"SA",0}, {"SB",0}, {"STRM",1}, {"STRN",1}, {"VWM",2}, {"VWN",8} } }, { "Tahiti", { {"KWG",16}, {"KWI",2}, {"MDIMA",8}, {"MDIMC",8}, {"MWG",32}, {"NDIMB",8}, {"NDIMC",16}, {"NWG",32}, {"SA",1}, {"SB",0}, {"STRM",0}, {"STRN",1}, {"VWM",2}, {"VWN",1} } }, - { "default", { {"KWG",16}, {"KWI",2}, {"MDIMA",8}, {"MDIMC",8}, {"MWG",32}, {"NDIMB",8}, {"NDIMC",16}, {"NWG",32}, {"SA",1}, {"SB",0}, {"STRM",0}, {"STRN",1}, {"VWM",2}, {"VWN",1} } }, + { "default", { {"KWG",32}, {"KWI",2}, {"MDIMA",32}, {"MDIMC",32}, {"MWG",64}, {"NDIMB",8}, {"NDIMC",8}, {"NWG",64}, {"SA",0}, {"SB",0}, {"STRM",1}, {"STRN",1}, {"VWM",2}, {"VWN",8} } }, } }, { // Intel CPUs @@ -84,7 +87,8 @@ const Database::DatabaseEntry Database::XgemmComplexSingle = { { // Intel GPUs kDeviceTypeGPU, "Intel", { { "Iris", { {"KWG",32}, {"KWI",8}, {"MDIMA",32}, {"MDIMC",16}, {"MWG",64}, {"NDIMB",8}, {"NDIMC",16}, {"NWG",64}, {"SA",1}, {"SB",0}, {"STRM",1}, {"STRN",0}, {"VWM",1}, {"VWN",1} } }, - { "default", { {"KWG",32}, {"KWI",8}, {"MDIMA",32}, {"MDIMC",16}, {"MWG",64}, {"NDIMB",8}, {"NDIMC",16}, {"NWG",64}, {"SA",1}, {"SB",0}, {"STRM",1}, {"STRN",0}, {"VWM",1}, {"VWN",1} } }, + { "Iris Pro", { {"KWG",16}, {"KWI",2}, {"MDIMA",8}, {"MDIMC",8}, {"MWG",32}, {"NDIMB",32}, {"NDIMC",8}, {"NWG",32}, {"SA",1}, {"SB",1}, {"STRM",1}, {"STRN",1}, {"VWM",1}, {"VWN",1} } }, + { "default", { {"KWG",16}, {"KWI",2}, {"MDIMA",8}, {"MDIMC",8}, {"MWG",32}, {"NDIMB",32}, {"NDIMC",8}, {"NWG",32}, {"SA",1}, {"SB",1}, {"STRM",1}, {"STRN",1}, {"VWM",1}, {"VWN",1} } }, } }, { // Intel accelerators @@ -108,7 +112,7 @@ const Database::DatabaseEntry Database::XgemmComplexSingle = { }, { // Default kDeviceTypeAll, "default", { - { "default", { {"KWG",16}, {"KWI",2}, {"MDIMA",8}, {"MDIMC",8}, {"MWG",64}, {"NDIMB",8}, {"NDIMC",8}, {"NWG",32}, {"SA",1}, {"SB",0}, {"STRM",0}, {"STRN",0}, {"VWM",1}, {"VWN",4} } }, + { "default", { {"KWG",16}, {"KWI",2}, {"MDIMA",8}, {"MDIMC",8}, {"MWG",32}, {"NDIMB",8}, {"NDIMC",8}, {"NWG",32}, {"SA",0}, {"SB",0}, {"STRM",1}, {"STRN",1}, {"VWM",1}, {"VWN",1} } }, } }, } @@ -120,6 +124,7 @@ const Database::DatabaseEntry Database::XgemmDouble = { "Xgemm", Precision::kDouble, { { // AMD GPUs kDeviceTypeGPU, "AMD", { + { "AMD Radeon R9 M370X Compute Engine", { {"KWG",32}, {"KWI",2}, {"MDIMA",32}, {"MDIMC",32}, {"MWG",64}, {"NDIMB",8}, {"NDIMC",8}, {"NWG",64}, {"SA",0}, {"SB",0}, {"STRM",1}, {"STRN",1}, {"VWM",2}, {"VWN",8} } }, { "Tahiti", { {"KWG",32}, {"KWI",2}, {"MDIMA",16}, {"MDIMC",8}, {"MWG",16}, {"NDIMB",8}, {"NDIMC",8}, {"NWG",32}, {"SA",0}, {"SB",0}, {"STRM",0}, {"STRN",1}, {"VWM",1}, {"VWN",4} } }, { "default", { {"KWG",32}, {"KWI",2}, {"MDIMA",16}, {"MDIMC",8}, {"MWG",16}, {"NDIMB",8}, {"NDIMC",8}, {"NWG",32}, {"SA",0}, {"SB",0}, {"STRM",0}, {"STRN",1}, {"VWM",1}, {"VWN",4} } }, } @@ -165,8 +170,9 @@ const Database::DatabaseEntry Database::XgemmComplexDouble = { "Xgemm", Precision::kComplexDouble, { { // AMD GPUs kDeviceTypeGPU, "AMD", { + { "AMD Radeon R9 M370X Compute Engine", { {"KWG",32}, {"KWI",8}, {"MDIMA",8}, {"MDIMC",16}, {"MWG",32}, {"NDIMB",16}, {"NDIMC",16}, {"NWG",32}, {"SA",0}, {"SB",0}, {"STRM",1}, {"STRN",1}, {"VWM",2}, {"VWN",2} } }, { "Tahiti", { {"KWG",16}, {"KWI",2}, {"MDIMA",16}, {"MDIMC",8}, {"MWG",16}, {"NDIMB",8}, {"NDIMC",8}, {"NWG",16}, {"SA",0}, {"SB",0}, {"STRM",1}, {"STRN",0}, {"VWM",1}, {"VWN",1} } }, - { "default", { {"KWG",16}, {"KWI",2}, {"MDIMA",16}, {"MDIMC",8}, {"MWG",16}, {"NDIMB",8}, {"NDIMC",8}, {"NWG",16}, {"SA",0}, {"SB",0}, {"STRM",1}, {"STRN",0}, {"VWM",1}, {"VWN",1} } }, + { "default", { {"KWG",32}, {"KWI",8}, {"MDIMA",8}, {"MDIMC",16}, {"MWG",32}, {"NDIMB",16}, {"NDIMC",16}, {"NWG",32}, {"SA",0}, {"SB",0}, {"STRM",1}, {"STRN",1}, {"VWM",2}, {"VWN",2} } }, } }, { // Intel CPUs @@ -197,7 +203,7 @@ const Database::DatabaseEntry Database::XgemmComplexDouble = { }, { // Default kDeviceTypeAll, "default", { - { "default", { {"KWG",32}, {"KWI",2}, {"MDIMA",16}, {"MDIMC",8}, {"MWG",32}, {"NDIMB",16}, {"NDIMC",16}, {"NWG",32}, {"SA",0}, {"SB",0}, {"STRM",1}, {"STRN",0}, {"VWM",1}, {"VWN",1} } }, + { "default", { {"KWG",32}, {"KWI",8}, {"MDIMA",8}, {"MDIMC",16}, {"MWG",32}, {"NDIMB",16}, {"NDIMC",16}, {"NWG",32}, {"SA",0}, {"SB",0}, {"STRM",1}, {"STRN",1}, {"VWM",2}, {"VWN",2} } }, } }, } diff --git a/include/internal/database/xgemv.h b/include/internal/database/xgemv.h index 37d859e2..c3ce3b20 100644 --- a/include/internal/database/xgemv.h +++ b/include/internal/database/xgemv.h @@ -18,8 +18,9 @@ const Database::DatabaseEntry Database::XgemvSingle = { "Xgemv", Precision::kSingle, { { // AMD GPUs kDeviceTypeGPU, "AMD", { + { "AMD Radeon R9 M370X Compute Engine", { {"WGS1",128}, {"WPT1",1}, {"VW2",1}, {"WGS2",128}, {"WPT2",1}, {"VW3",1}, {"WGS3",64}, {"WPT3",1} } }, { "Tahiti", { {"WGS1",256}, {"WPT1",1}, {"VW2",1}, {"WGS2",64}, {"WPT2",1}, {"VW3",1}, {"WGS3",64}, {"WPT3",1} } }, - { "default", { {"WGS1",256}, {"WPT1",1}, {"VW2",1}, {"WGS2",64}, {"WPT2",1}, {"VW3",1}, {"WGS3",64}, {"WPT3",1} } }, + { "default", { {"WGS1",128}, {"WPT1",1}, {"VW2",1}, {"WGS2",64}, {"WPT2",1}, {"VW3",1}, {"WGS3",64}, {"WPT3",1} } }, } }, { // Intel CPUs @@ -32,7 +33,8 @@ const Database::DatabaseEntry Database::XgemvSingle = { { // Intel GPUs kDeviceTypeGPU, "Intel", { { "Iris", { {"WGS1",64}, {"WPT1",2}, {"VW2",1}, {"WGS2",128}, {"WPT2",2}, {"VW3",4}, {"WGS3",64}, {"WPT3",8} } }, - { "default", { {"WGS1",64}, {"WPT1",2}, {"VW2",1}, {"WGS2",128}, {"WPT2",2}, {"VW3",4}, {"WGS3",64}, {"WPT3",8} } }, + { "Iris Pro", { {"WGS1",256}, {"WPT1",2}, {"VW2",1}, {"WGS2",128}, {"WPT2",2}, {"VW3",4}, {"WGS3",64}, {"WPT3",4} } }, + { "default", { {"WGS1",64}, {"WPT1",2}, {"VW2",1}, {"WGS2",128}, {"WPT2",2}, {"VW3",4}, {"WGS3",64}, {"WPT3",4} } }, } }, { // Intel accelerators @@ -68,6 +70,7 @@ const Database::DatabaseEntry Database::XgemvComplexSingle = { "Xgemv", Precision::kComplexSingle, { { // AMD GPUs kDeviceTypeGPU, "AMD", { + { "AMD Radeon R9 M370X Compute Engine", { {"WGS1",64}, {"WPT1",1}, {"VW2",2}, {"WGS2",256}, {"WPT2",2}, {"VW3",1}, {"WGS3",128}, {"WPT3",1} } }, { "Tahiti", { {"WGS1",64}, {"WPT1",1}, {"VW2",1}, {"WGS2",128}, {"WPT2",1}, {"VW3",1}, {"WGS3",64}, {"WPT3",1} } }, { "default", { {"WGS1",64}, {"WPT1",1}, {"VW2",1}, {"WGS2",128}, {"WPT2",1}, {"VW3",1}, {"WGS3",64}, {"WPT3",1} } }, } @@ -82,7 +85,8 @@ const Database::DatabaseEntry Database::XgemvComplexSingle = { { // Intel GPUs kDeviceTypeGPU, "Intel", { { "Iris", { {"WGS1",256}, {"WPT1",1}, {"VW2",1}, {"WGS2",64}, {"WPT2",1}, {"VW3",1}, {"WGS3",64}, {"WPT3",1} } }, - { "default", { {"WGS1",256}, {"WPT1",1}, {"VW2",1}, {"WGS2",64}, {"WPT2",1}, {"VW3",1}, {"WGS3",64}, {"WPT3",1} } }, + { "Iris Pro", { {"WGS1",64}, {"WPT1",1}, {"VW2",4}, {"WGS2",128}, {"WPT2",4}, {"VW3",2}, {"WGS3",64}, {"WPT3",2} } }, + { "default", { {"WGS1",64}, {"WPT1",1}, {"VW2",1}, {"WGS2",64}, {"WPT2",1}, {"VW3",1}, {"WGS3",64}, {"WPT3",1} } }, } }, { // Intel accelerators @@ -114,8 +118,9 @@ const Database::DatabaseEntry Database::XgemvDouble = { "Xgemv", Precision::kDouble, { { // AMD GPUs kDeviceTypeGPU, "AMD", { + { "AMD Radeon R9 M370X Compute Engine", { {"WGS1",64}, {"WPT1",1}, {"VW2",1}, {"WGS2",256}, {"WPT2",1}, {"VW3",1}, {"WGS3",128}, {"WPT3",1} } }, { "Tahiti", { {"WGS1",256}, {"WPT1",1}, {"VW2",1}, {"WGS2",64}, {"WPT2",1}, {"VW3",2}, {"WGS3",64}, {"WPT3",2} } }, - { "default", { {"WGS1",256}, {"WPT1",1}, {"VW2",1}, {"WGS2",64}, {"WPT2",1}, {"VW3",2}, {"WGS3",64}, {"WPT3",2} } }, + { "default", { {"WGS1",64}, {"WPT1",1}, {"VW2",1}, {"WGS2",64}, {"WPT2",1}, {"VW3",1}, {"WGS3",64}, {"WPT3",1} } }, } }, { // Intel CPUs @@ -158,8 +163,9 @@ const Database::DatabaseEntry Database::XgemvComplexDouble = { "Xgemv", Precision::kComplexDouble, { { // AMD GPUs kDeviceTypeGPU, "AMD", { + { "AMD Radeon R9 M370X Compute Engine", { {"WGS1",64}, {"WPT1",1}, {"VW2",1}, {"WGS2",256}, {"WPT2",1}, {"VW3",1}, {"WGS3",128}, {"WPT3",1} } }, { "Tahiti", { {"WGS1",256}, {"WPT1",1}, {"VW2",1}, {"WGS2",64}, {"WPT2",1}, {"VW3",1}, {"WGS3",64}, {"WPT3",1} } }, - { "default", { {"WGS1",256}, {"WPT1",1}, {"VW2",1}, {"WGS2",64}, {"WPT2",1}, {"VW3",1}, {"WGS3",64}, {"WPT3",1} } }, + { "default", { {"WGS1",64}, {"WPT1",1}, {"VW2",1}, {"WGS2",64}, {"WPT2",1}, {"VW3",1}, {"WGS3",64}, {"WPT3",1} } }, } }, { // Intel CPUs diff --git a/include/internal/database/xger.h b/include/internal/database/xger.h index c9cfb6cd..1f7730b1 100644 --- a/include/internal/database/xger.h +++ b/include/internal/database/xger.h @@ -16,15 +16,27 @@ namespace clblast { const Database::DatabaseEntry Database::XgerSingle = { "Xger", Precision::kSingle, { + { // AMD GPUs + kDeviceTypeGPU, "AMD", { + { "AMD Radeon R9 M370X Compute Engine", { {"WGS1",256}, {"WGS2",1}, {"WPT",1} } }, + { "default", { {"WGS1",256}, {"WGS2",1}, {"WPT",1} } }, + } + }, { // Intel CPUs kDeviceTypeCPU, "Intel", { { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"WGS1",128}, {"WGS2",2}, {"WPT",4} } }, { "default", { {"WGS1",128}, {"WGS2",2}, {"WPT",4} } }, } }, + { // Intel GPUs + kDeviceTypeGPU, "Intel", { + { "Iris Pro", { {"WGS1",64}, {"WGS2",1}, {"WPT",4} } }, + { "default", { {"WGS1",64}, {"WGS2",1}, {"WPT",4} } }, + } + }, { // Default kDeviceTypeAll, "default", { - { "default", { {"WGS1",128}, {"WGS2",2}, {"WPT",4} } }, + { "default", { {"WGS1",64}, {"WGS2",1}, {"WPT",1} } }, } }, } @@ -34,15 +46,27 @@ const Database::DatabaseEntry Database::XgerSingle = { const Database::DatabaseEntry Database::XgerComplexSingle = { "Xger", Precision::kComplexSingle, { + { // AMD GPUs + kDeviceTypeGPU, "AMD", { + { "AMD Radeon R9 M370X Compute Engine", { {"WGS1",64}, {"WGS2",4}, {"WPT",1} } }, + { "default", { {"WGS1",64}, {"WGS2",4}, {"WPT",1} } }, + } + }, { // Intel CPUs kDeviceTypeCPU, "Intel", { { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"WGS1",512}, {"WGS2",8}, {"WPT",2} } }, { "default", { {"WGS1",512}, {"WGS2",8}, {"WPT",2} } }, } }, + { // Intel GPUs + kDeviceTypeGPU, "Intel", { + { "Iris Pro", { {"WGS1",16}, {"WGS2",2}, {"WPT",4} } }, + { "default", { {"WGS1",16}, {"WGS2",2}, {"WPT",4} } }, + } + }, { // Default kDeviceTypeAll, "default", { - { "default", { {"WGS1",512}, {"WGS2",8}, {"WPT",2} } }, + { "default", { {"WGS1",16}, {"WGS2",2}, {"WPT",1} } }, } }, } @@ -52,6 +76,12 @@ const Database::DatabaseEntry Database::XgerComplexSingle = { const Database::DatabaseEntry Database::XgerDouble = { "Xger", Precision::kDouble, { + { // AMD GPUs + kDeviceTypeGPU, "AMD", { + { "AMD Radeon R9 M370X Compute Engine", { {"WGS1",32}, {"WGS2",4}, {"WPT",1} } }, + { "default", { {"WGS1",32}, {"WGS2",4}, {"WPT",1} } }, + } + }, { // Intel CPUs kDeviceTypeCPU, "Intel", { { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"WGS1",512}, {"WGS2",16}, {"WPT",1} } }, @@ -60,7 +90,7 @@ const Database::DatabaseEntry Database::XgerDouble = { }, { // Default kDeviceTypeAll, "default", { - { "default", { {"WGS1",512}, {"WGS2",16}, {"WPT",1} } }, + { "default", { {"WGS1",32}, {"WGS2",4}, {"WPT",1} } }, } }, } @@ -70,6 +100,12 @@ const Database::DatabaseEntry Database::XgerDouble = { const Database::DatabaseEntry Database::XgerComplexDouble = { "Xger", Precision::kComplexDouble, { + { // AMD GPUs + kDeviceTypeGPU, "AMD", { + { "AMD Radeon R9 M370X Compute Engine", { {"WGS1",64}, {"WGS2",1}, {"WPT",1} } }, + { "default", { {"WGS1",64}, {"WGS2",1}, {"WPT",1} } }, + } + }, { // Intel CPUs kDeviceTypeCPU, "Intel", { { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"WGS1",512}, {"WGS2",1}, {"WPT",1} } }, @@ -78,7 +114,7 @@ const Database::DatabaseEntry Database::XgerComplexDouble = { }, { // Default kDeviceTypeAll, "default", { - { "default", { {"WGS1",512}, {"WGS2",1}, {"WPT",1} } }, + { "default", { {"WGS1",64}, {"WGS2",1}, {"WPT",1} } }, } }, } diff --git a/scripts/database/database.py b/scripts/database/database.py index 6f3ce85e..b3f919ef 100644 --- a/scripts/database/database.py +++ b/scripts/database/database.py @@ -55,12 +55,11 @@ pd.set_option('display.width', 1000) # Downloads the database and save it to disk def DownloadDatabase(filename): - sys.stdout.write("## Downloading database from '"+DATABASE_SERVER_URL+"'...") + print("## Downloading database from '"+DATABASE_SERVER_URL+"'...") df = urlopen(DATABASE_SERVER_URL) output = open(file_db,'wb') output.write(df.read()) output.close() - print("done") # Loads the database from disk def LoadDatabase(filename): @@ -263,7 +262,7 @@ database = LoadDatabase(file_db) for file_json in glob.glob(glob_json): # Loads the newly imported data - sys.stdout.write("## Processing '"+file_json+"'") + sys.stdout.write("## Processing '"+file_json+"' ") imported_data = ImportDataFromFile(file_json) imported_data = SanitizeVendorNames(imported_data) From 60da54da5d8cb8dc763c13ba48ec6d8e557a609d Mon Sep 17 00:00:00 2001 From: Cedric Nugteren Date: Wed, 2 Mar 2016 21:18:01 +0100 Subject: [PATCH 38/50] Added preliminary support for xHER2 and xSYR2 routines --- CMakeLists.txt | 2 +- include/internal/routines/level2/xher2.h | 60 +++++++++++ include/internal/routines/level2/xsyr2.h | 46 ++++++++ scripts/generator/generator.py | 6 +- src/clblast.cc | 54 +++++++--- src/kernels/common.opencl | 7 -- src/kernels/level2/level2.opencl | 57 ++++++++-- src/kernels/level2/xher2.opencl | 104 ++++++++++++++++++ src/routines/level2/xher2.cc | 114 ++++++++++++++++++++ src/routines/level2/xhpr.cc | 2 +- src/routines/level2/xspr.cc | 2 +- src/routines/level2/xsyr.cc | 2 +- src/routines/level2/xsyr2.cc | 52 +++++++++ test/routines/level2/xher2.h | 128 +++++++++++++++++++++++ test/routines/level2/xsyr2.h | 128 +++++++++++++++++++++++ 15 files changed, 728 insertions(+), 36 deletions(-) create mode 100644 include/internal/routines/level2/xher2.h create mode 100644 include/internal/routines/level2/xsyr2.h create mode 100644 src/kernels/level2/xher2.opencl create mode 100644 src/routines/level2/xher2.cc create mode 100644 src/routines/level2/xsyr2.cc create mode 100644 test/routines/level2/xher2.h create mode 100644 test/routines/level2/xsyr2.h diff --git a/CMakeLists.txt b/CMakeLists.txt index e35fda7a..33458989 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -112,7 +112,7 @@ set(SAMPLE_PROGRAMS_CPP sgemm) set(SAMPLE_PROGRAMS_C sgemm) set(LEVEL1_ROUTINES xswap xscal xcopy xaxpy xdot xdotu xdotc) set(LEVEL2_ROUTINES xgemv xgbmv xhemv xhbmv xhpmv xsymv xsbmv xspmv xtrmv xtbmv xtpmv - xger xgeru xgerc xher xhpr xsyr xspr) + xger xgeru xgerc xher xhpr xher2 xsyr xspr xsyr2) set(LEVEL3_ROUTINES xgemm xsymm xhemm xsyrk xherk xsyr2k xher2k xtrmm) set(ROUTINES ${LEVEL1_ROUTINES} ${LEVEL2_ROUTINES} ${LEVEL3_ROUTINES}) set(PRECISIONS 32 64 3232 6464) diff --git a/include/internal/routines/level2/xher2.h b/include/internal/routines/level2/xher2.h new file mode 100644 index 00000000..26f69046 --- /dev/null +++ b/include/internal/routines/level2/xher2.h @@ -0,0 +1,60 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren +// +// This file implements the Xher2 routine. The precision is implemented using a template argument. +// +// ================================================================================================= + +#ifndef CLBLAST_ROUTINES_XHER2_H_ +#define CLBLAST_ROUTINES_XHER2_H_ + +#include "internal/routine.h" + +namespace clblast { +// ================================================================================================= + +// See comment at top of file for a description of the class +template +class Xher2: public Routine { + public: + + // Members and methods from the base class + using Routine::db_; + using Routine::source_string_; + using Routine::queue_; + using Routine::GetProgramFromCache; + using Routine::TestVectorX; + using Routine::TestVectorY; + using Routine::TestMatrixA; + using Routine::TestMatrixAP; + using Routine::RunKernel; + using Routine::ErrorIn; + + // Constructor + Xher2(Queue &queue, Event &event, const std::string &name = "HER2"); + + // Templated-precision implementation of the routine + StatusCode DoHer2(const Layout layout, const Triangle triangle, + const size_t n, + const T alpha, + const Buffer &x_buffer, const size_t x_offset, const size_t x_inc, + const Buffer &y_buffer, const size_t y_offset, const size_t y_inc, + const Buffer &a_buffer, const size_t a_offset, const size_t a_ld, + const bool packed = false); + + private: + // Static variable to get the precision + const static Precision precision_; +}; + +// ================================================================================================= +} // namespace clblast + +// CLBLAST_ROUTINES_XHER2_H_ +#endif diff --git a/include/internal/routines/level2/xsyr2.h b/include/internal/routines/level2/xsyr2.h new file mode 100644 index 00000000..f4dc9375 --- /dev/null +++ b/include/internal/routines/level2/xsyr2.h @@ -0,0 +1,46 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren +// +// This file implements the Xsyr2 routine. The precision is implemented using a template argument. +// +// ================================================================================================= + +#ifndef CLBLAST_ROUTINES_XSYR2_H_ +#define CLBLAST_ROUTINES_XSYR2_H_ + +#include "internal/routines/level2/xher2.h" + +namespace clblast { +// ================================================================================================= + +// See comment at top of file for a description of the class +template +class Xsyr2: public Xher2 { + public: + + // Uses the regular Xher2 routine + using Xher2::DoHer2; + + // Constructor + Xsyr2(Queue &queue, Event &event, const std::string &name = "SYR2"); + + // Templated-precision implementation of the routine + StatusCode DoSyr2(const Layout layout, const Triangle triangle, + const size_t n, + const T alpha, + const Buffer &x_buffer, const size_t x_offset, const size_t x_inc, + const Buffer &y_buffer, const size_t y_offset, const size_t y_inc, + const Buffer &a_buffer, const size_t a_offset, const size_t a_ld); +}; + +// ================================================================================================= +} // namespace clblast + +// CLBLAST_ROUTINES_XSYR2_H_ +#endif diff --git a/scripts/generator/generator.py b/scripts/generator/generator.py index 0f5fbfa7..2c01efb5 100644 --- a/scripts/generator/generator.py +++ b/scripts/generator/generator.py @@ -83,11 +83,11 @@ routines = [ Routine(True, "2b", "gerc", T, [C,Z], ["m","n"], ["layout"], ["x","y"], ["a"], ["alpha"], False, "General rank-1 complex conjugated matrix update"), Routine(True, "2b", "her", Tc, [Css,Zdd], ["n"], ["layout","triangle"], ["x"], ["a"], ["alpha"], False, "Hermitian rank-1 matrix update"), Routine(True, "2b", "hpr", Tc, [Css,Zdd], ["n"], ["layout","triangle"], ["x"], ["ap"], ["alpha"], False, "Hermitian packed rank-1 matrix update"), - Routine(False, "2b", "her2", T, [C,Z], ["n"], ["layout","triangle"], ["x","y"], ["a"], ["alpha"], False, "Hermitian rank-2 matrix update"), + Routine(True, "2b", "her2", T, [C,Z], ["n"], ["layout","triangle"], ["x","y"], ["a"], ["alpha"], False, "Hermitian rank-2 matrix update"), Routine(False, "2b", "hpr2", T, [C,Z], ["n"], ["layout","triangle"], ["x","y"], ["ap"], ["alpha"], False, "Hermitian packed rank-2 matrix update"), Routine(True, "2b", "syr", T, [S,D], ["n"], ["layout","triangle"], ["x"], ["a"], ["alpha"], False, "Symmetric rank-1 matrix update"), Routine(True, "2b", "spr", T, [S,D], ["n"], ["layout","triangle"], ["x"], ["ap"], ["alpha"], False, "Symmetric packed rank-1 matrix update"), - Routine(False, "2b", "syr2", T, [S,D], ["n"], ["layout","triangle"], ["x","y"], ["a"], ["alpha"], False, "Symmetric rank-2 matrix update"), + Routine(True, "2b", "syr2", T, [S,D], ["n"], ["layout","triangle"], ["x","y"], ["a"], ["alpha"], False, "Symmetric rank-2 matrix update"), Routine(False, "2b", "spr2", T, [S,D], ["n"], ["layout","triangle"], ["x","y"], ["ap"], ["alpha"], False, "Symmetric packed rank-2 matrix update"), ], [ # Level 3: matrix-matrix @@ -247,7 +247,7 @@ files = [ path_clblast+"/src/clblast_c.cc", path_clblast+"/test/wrapper_clblas.h", ] -header_lines = [84, 59, 80, 24, 22] +header_lines = [84, 61, 80, 24, 22] footer_lines = [6, 3, 5, 2, 6] # Checks whether the command-line arguments are valid; exists otherwise diff --git a/src/clblast.cc b/src/clblast.cc index 466de83e..6b8ac409 100644 --- a/src/clblast.cc +++ b/src/clblast.cc @@ -43,8 +43,10 @@ #include "internal/routines/level2/xgerc.h" #include "internal/routines/level2/xher.h" #include "internal/routines/level2/xhpr.h" +#include "internal/routines/level2/xher2.h" #include "internal/routines/level2/xsyr.h" #include "internal/routines/level2/xspr.h" +#include "internal/routines/level2/xsyr2.h" // BLAS level-3 includes #include "internal/routines/level3/xgemm.h" @@ -1014,14 +1016,24 @@ template StatusCode Hpr(const Layout, const Triangle, // Hermitian rank-2 matrix update: CHER2/ZHER2 template -StatusCode Her2(const Layout, const Triangle, - const size_t, - const T, - const cl_mem, const size_t, const size_t, - const cl_mem, const size_t, const size_t, - cl_mem, const size_t, const size_t, - cl_command_queue*, cl_event*) { - return StatusCode::kNotImplemented; +StatusCode Her2(const Layout layout, const Triangle triangle, + const size_t n, + const T alpha, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, + cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + cl_command_queue* queue, cl_event* event) { + auto queue_cpp = Queue(*queue); + auto event_cpp = Event(*event); + auto routine = Xher2(queue_cpp, event_cpp); + auto status = routine.SetUp(); + if (status != StatusCode::kSuccess) { return status; } + return routine.DoHer2(layout, triangle, + n, + alpha, + Buffer(x_buffer), x_offset, x_inc, + Buffer(y_buffer), y_offset, y_inc, + Buffer(a_buffer), a_offset, a_ld); } template StatusCode Her2(const Layout, const Triangle, const size_t, @@ -1130,14 +1142,24 @@ template StatusCode Spr(const Layout, const Triangle, // Symmetric rank-2 matrix update: SSYR2/DSYR2 template -StatusCode Syr2(const Layout, const Triangle, - const size_t, - const T, - const cl_mem, const size_t, const size_t, - const cl_mem, const size_t, const size_t, - cl_mem, const size_t, const size_t, - cl_command_queue*, cl_event*) { - return StatusCode::kNotImplemented; +StatusCode Syr2(const Layout layout, const Triangle triangle, + const size_t n, + const T alpha, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, + cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + cl_command_queue* queue, cl_event* event) { + auto queue_cpp = Queue(*queue); + auto event_cpp = Event(*event); + auto routine = Xsyr2(queue_cpp, event_cpp); + auto status = routine.SetUp(); + if (status != StatusCode::kSuccess) { return status; } + return routine.DoSyr2(layout, triangle, + n, + alpha, + Buffer(x_buffer), x_offset, x_inc, + Buffer(y_buffer), y_offset, y_inc, + Buffer(a_buffer), a_offset, a_ld); } template StatusCode Syr2(const Layout, const Triangle, const size_t, diff --git a/src/kernels/common.opencl b/src/kernels/common.opencl index 973c123e..f2a2e7a7 100644 --- a/src/kernels/common.opencl +++ b/src/kernels/common.opencl @@ -147,13 +147,6 @@ R"( #define AXPBY(e, a, b, c, d) e = a*b + c*d #endif -// The scalar GER function -#if PRECISION == 3232 || PRECISION == 6464 - #define GER(e, a, b, c, d) real ab; ab.x = MulReal(a,b); ab.y = MulImag(a,b); e.x = MulReal(ab,c) + d.x; e.y = MulImag(ab,c) + d.y -#else - #define GER(e, a, b, c, d) e = a*b*c + d -#endif - // The complex conjugate operation for complex transforms #if PRECISION == 3232 || PRECISION == 6464 #define COMPLEX_CONJUGATE(value) value.x = value.x; value.y = -value.y diff --git a/src/kernels/level2/level2.opencl b/src/kernels/level2/level2.opencl index ad92595a..1b0efeab 100644 --- a/src/kernels/level2/level2.opencl +++ b/src/kernels/level2/level2.opencl @@ -39,10 +39,7 @@ inline real LoadVector(const int id, const int max, if (id < max) { real result = gm[id*inc + offset]; if (do_conjugate) { - #if defined(ROUTINE_GERC) - COMPLEX_CONJUGATE(result); - #endif - #if defined(ROUTINE_HER) || defined(ROUTINE_HPR) + #if defined(ROUTINE_GERC) || defined(ROUTINE_HER) || defined(ROUTINE_HPR) || defined(ROUTINE_HER2) || defined(ROUTINE_HPR2) COMPLEX_CONJUGATE(result); #endif } @@ -81,8 +78,16 @@ inline void MatrixUpdate(const int id1, const int id2, const int max1, const int const real avalue = agm[a_index]; // Computes result = alpha * x[i] * y[j] + a[i][j] - real result; - GER(result, alpha, xvalue, yvalue, avalue); + #if PRECISION == 3232 || PRECISION == 6464 + real ax; + ax.x = MulReal(alpha, xvalue); + ax.y = MulImag(alpha, xvalue); + real result; + result.x = MulReal(ax, yvalue) + avalue.x; + result.y = MulImag(ax, yvalue) + avalue.y; + #else + real result = alpha * xvalue * yvalue + avalue; + #endif // For hermetian matrices #if defined(ROUTINE_HER) || defined(ROUTINE_HPR) @@ -94,6 +99,46 @@ inline void MatrixUpdate(const int id1, const int id2, const int max1, const int } } +// Performs the rank-2 matrix update +inline void MatrixUpdate2(const int id1, const int id2, const int max1, const int max2, + __global real* agm, const int a_offset, const int a_ld, + const real alpha1, const real xvalue, const real yvalue, + const real alpha2, const real xtvalue, const real ytvalue, + const int is_upper) { + + // Bounds of a regular matrix + if (id1 < max1 && id2 < max2) { + + const int a_index = id2*a_ld + id1 + a_offset; + + // Loads the current value of the A matrix + const real avalue = agm[a_index]; + + // Computes result = alpha * x[i] * y[j] + alpha * x[j] * y[i] + a[i][j] + #if PRECISION == 3232 || PRECISION == 6464 + real ax; + ax.x = MulReal(alpha2, xvalue); + ax.y = MulImag(alpha2, xvalue); + real atx; + atx.x = MulReal(alpha1, xtvalue); + atx.y = MulImag(alpha1, xtvalue); + real result; + result.x = MulReal(ax, yvalue) + MulReal(atx, ytvalue) + avalue.x; + result.y = MulImag(ax, yvalue) + MulImag(atx, ytvalue) + avalue.y; + #else + real result = alpha1 * xvalue * yvalue + alpha2 * xtvalue * ytvalue + avalue; + #endif + + // For hermetian matrices + #if defined(ROUTINE_HER2) || defined(ROUTINE_HPR2) + if (id1 == id2) { result.y = ZERO; } + #endif + + // Stores the final result + agm[a_index] = result; + } +} + // ================================================================================================= // End of the C++11 raw string literal diff --git a/src/kernels/level2/xher2.opencl b/src/kernels/level2/xher2.opencl new file mode 100644 index 00000000..4a2edce8 --- /dev/null +++ b/src/kernels/level2/xher2.opencl @@ -0,0 +1,104 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren +// +// This file contains the Xher2 kernels for rank-2 matrix update. +// +// ================================================================================================= + +// Enables loading of this file using the C++ pre-processor's #include (C++11 standard raw string +// literal). Comment-out this line for syntax-highlighting when developing. +R"( + +// ================================================================================================= + +// Symmetric version of the rank-2 matrix update kernel (HER2, HPR2, SYR2, SPR2) +__attribute__((reqd_work_group_size(WGS1, WGS2, 1))) +__kernel void Xher2(const int n, const real alpha, + const __global real* restrict xgm, const int x_offset, const int x_inc, + const __global real* restrict ygm, const int y_offset, const int y_inc, + __global real* restrict agm, const int a_offset, const int a_ld, + const int is_upper, const int is_rowmajor) { + + // Register storage for X and Y + real xvalues[WPT]; + real yvalues[WPT]; + real xtvalues[WPT]; + real ytvalues[WPT]; + + // Loads the X-vector + #pragma unroll + for (int w=0; w id2)) || (!is_upper && (id2 > id1))) { + // Do nothing + } + + // Loads A, performs the operation, and stores the result into A + else { + MatrixUpdate2(id1, id2, n, n, agm, a_offset, a_ld, + alpha1, xvalues[w2], yvalues[w1], + alpha2, xtvalues[w1], ytvalues[w2], is_upper); + } + } + } +} + +// ================================================================================================= + +// End of the C++11 raw string literal +)" + +// ================================================================================================= diff --git a/src/routines/level2/xher2.cc b/src/routines/level2/xher2.cc new file mode 100644 index 00000000..63144f77 --- /dev/null +++ b/src/routines/level2/xher2.cc @@ -0,0 +1,114 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren +// +// This file implements the Xher2 class (see the header for information about the class). +// +// ================================================================================================= + +#include "internal/routines/level2/xher2.h" + +#include + +namespace clblast { +// ================================================================================================= + +// Specific implementations to get the memory-type based on a template argument +template <> const Precision Xher2::precision_ = Precision::kSingle; +template <> const Precision Xher2::precision_ = Precision::kDouble; +template <> const Precision Xher2::precision_ = Precision::kComplexSingle; +template <> const Precision Xher2::precision_ = Precision::kComplexDouble; + +// ================================================================================================= + +// Constructor: forwards to base class constructor +template +Xher2::Xher2(Queue &queue, Event &event, const std::string &name): + Routine(queue, event, name, {"Xger"}, precision_) { + source_string_ = + #include "../../kernels/level2/level2.opencl" + #include "../../kernels/level2/xher2.opencl" + ; +} + +// ================================================================================================= + +// The main routine +template +StatusCode Xher2::DoHer2(const Layout layout, const Triangle triangle, + const size_t n, + const T alpha, + const Buffer &x_buffer, const size_t x_offset, const size_t x_inc, + const Buffer &y_buffer, const size_t y_offset, const size_t y_inc, + const Buffer &a_buffer, const size_t a_offset, const size_t a_ld, + const bool packed) { + + // Makes sure the dimensions are larger than zero + if (n == 0) { return StatusCode::kInvalidDimension; } + + // The data is either in the upper or lower triangle + const auto is_upper = ((triangle == Triangle::kUpper && layout != Layout::kRowMajor) || + (triangle == Triangle::kLower && layout == Layout::kRowMajor)); + const auto is_rowmajor = (layout == Layout::kRowMajor); + + // Tests the matrix and the vectors for validity + auto status = StatusCode::kSuccess; + if (packed) { status = TestMatrixAP(n, a_buffer, a_offset, sizeof(T)); } + else { status = TestMatrixA(n, n, a_buffer, a_offset, a_ld, sizeof(T)); } + if (ErrorIn(status)) { return status; } + status = TestVectorX(n, x_buffer, x_offset, x_inc, sizeof(T)); + if (ErrorIn(status)) { return status; } + status = TestVectorY(n, y_buffer, y_offset, y_inc, sizeof(T)); + if (ErrorIn(status)) { return status; } + + // Retrieves the Xgemv kernel from the compiled binary + try { + auto& program = GetProgramFromCache(); + auto kernel = Kernel(program, "Xher2"); + + // Sets the kernel arguments + kernel.SetArgument(0, static_cast(n)); + kernel.SetArgument(1, alpha); + kernel.SetArgument(2, x_buffer()); + kernel.SetArgument(3, static_cast(x_offset)); + kernel.SetArgument(4, static_cast(x_inc)); + kernel.SetArgument(5, y_buffer()); + kernel.SetArgument(6, static_cast(y_offset)); + kernel.SetArgument(7, static_cast(y_inc)); + kernel.SetArgument(8, a_buffer()); + kernel.SetArgument(9, static_cast(a_offset)); + kernel.SetArgument(10, static_cast(a_ld)); + kernel.SetArgument(11, static_cast(is_upper)); + kernel.SetArgument(12, static_cast(is_rowmajor)); + + // Launches the kernel + auto global_one = CeilDiv(Ceil(n, db_["WGS1"]), db_["WPT"]); + auto global_two = CeilDiv(Ceil(n, db_["WGS2"]), db_["WPT"]); + auto global = std::vector{global_one, global_two}; + auto local = std::vector{db_["WGS1"], db_["WGS2"]}; + status = RunKernel(kernel, global, local); + if (ErrorIn(status)) { return status; } + + // Waits for all kernels to finish + queue_.Finish(); + + // Succesfully finished the computation + return StatusCode::kSuccess; + } catch (...) { return StatusCode::kInvalidKernel; } +} + +// ================================================================================================= + +// Compiles the templated class +template class Xher2; +template class Xher2; +template class Xher2; +template class Xher2; + +// ================================================================================================= +} // namespace clblast diff --git a/src/routines/level2/xhpr.cc b/src/routines/level2/xhpr.cc index abe00669..24d7ae95 100644 --- a/src/routines/level2/xhpr.cc +++ b/src/routines/level2/xhpr.cc @@ -34,7 +34,7 @@ StatusCode Xhpr::DoHpr(const Layout layout, const Triangle triangle, const Buffer &x_buffer, const size_t x_offset, const size_t x_inc, const Buffer &ap_buffer, const size_t ap_offset) { - // + // Specific Xhpr functionality is implemented in the kernel using defines return DoHer(layout, triangle, n, alpha, x_buffer, x_offset, x_inc, ap_buffer, ap_offset, n, true); diff --git a/src/routines/level2/xspr.cc b/src/routines/level2/xspr.cc index 5159ad50..7ef41fba 100644 --- a/src/routines/level2/xspr.cc +++ b/src/routines/level2/xspr.cc @@ -34,7 +34,7 @@ StatusCode Xspr::DoSpr(const Layout layout, const Triangle triangle, const Buffer &x_buffer, const size_t x_offset, const size_t x_inc, const Buffer &ap_buffer, const size_t ap_offset) { - // + // Specific Xspr functionality is implemented in the kernel using defines return DoHer(layout, triangle, n, alpha, x_buffer, x_offset, x_inc, ap_buffer, ap_offset, n, true); diff --git a/src/routines/level2/xsyr.cc b/src/routines/level2/xsyr.cc index 755fde0d..c01fa2d3 100644 --- a/src/routines/level2/xsyr.cc +++ b/src/routines/level2/xsyr.cc @@ -34,7 +34,7 @@ StatusCode Xsyr::DoSyr(const Layout layout, const Triangle triangle, const Buffer &x_buffer, const size_t x_offset, const size_t x_inc, const Buffer &a_buffer, const size_t a_offset, const size_t a_ld) { - // + // Specific Xsyr functionality is implemented in the kernel using defines return DoHer(layout, triangle, n, alpha, x_buffer, x_offset, x_inc, a_buffer, a_offset, a_ld); diff --git a/src/routines/level2/xsyr2.cc b/src/routines/level2/xsyr2.cc new file mode 100644 index 00000000..6db55085 --- /dev/null +++ b/src/routines/level2/xsyr2.cc @@ -0,0 +1,52 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren +// +// This file implements the Xsyr2 class (see the header for information about the class). +// +// ================================================================================================= + +#include "internal/routines/level2/xsyr2.h" + +#include + +namespace clblast { +// ================================================================================================= + +// Constructor: forwards to base class constructor +template +Xsyr2::Xsyr2(Queue &queue, Event &event, const std::string &name): + Xher2(queue, event, name) { +} + +// ================================================================================================= + +// The main routine +template +StatusCode Xsyr2::DoSyr2(const Layout layout, const Triangle triangle, + const size_t n, + const T alpha, + const Buffer &x_buffer, const size_t x_offset, const size_t x_inc, + const Buffer &y_buffer, const size_t y_offset, const size_t y_inc, + const Buffer &a_buffer, const size_t a_offset, const size_t a_ld) { + + // Specific Xsyr2 functionality is implemented in the kernel using defines + return DoHer2(layout, triangle, n, alpha, + x_buffer, x_offset, x_inc, + y_buffer, y_offset, y_inc, + a_buffer, a_offset, a_ld); +} + +// ================================================================================================= + +// Compiles the templated class +template class Xsyr2; +template class Xsyr2; + +// ================================================================================================= +} // namespace clblast diff --git a/test/routines/level2/xher2.h b/test/routines/level2/xher2.h new file mode 100644 index 00000000..c12ff827 --- /dev/null +++ b/test/routines/level2/xher2.h @@ -0,0 +1,128 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren +// +// This file implements a class with static methods to describe the Xher2 routine. Examples of +// such 'descriptions' are how to calculate the size a of buffer or how to run the routine. These +// static methods are used by the correctness tester and the performance tester. +// +// ================================================================================================= + +#ifndef CLBLAST_TEST_ROUTINES_XHER2_H_ +#define CLBLAST_TEST_ROUTINES_XHER2_H_ + +#include +#include + +#include "wrapper_clblas.h" + +namespace clblast { +// ================================================================================================= + +// See comment at top of file for a description of the class +template +class TestXher2 { + public: + + // The BLAS level: 1, 2, or 3 + static size_t BLASLevel() { return 2; } + + // The list of arguments relevant for this routine + static std::vector GetOptions() { + return {kArgN, + kArgLayout, kArgTriangle, + kArgALeadDim, kArgXInc, kArgYInc, + kArgAOffset, kArgXOffset, kArgYOffset, + kArgAlpha}; + } + + // Describes how to obtain the sizes of the buffers + static size_t GetSizeX(const Arguments &args) { + return args.n * args.x_inc + args.x_offset; + } + static size_t GetSizeY(const Arguments &args) { + return args.n * args.y_inc + args.y_offset; + } + static size_t GetSizeA(const Arguments &args) { + return args.n * args.a_ld + args.a_offset; + } + + // Describes how to set the sizes of all the buffers + static void SetSizes(Arguments &args) { + args.a_size = GetSizeA(args); + args.x_size = GetSizeX(args); + args.y_size = GetSizeY(args); + } + + // Describes what the default values of the leading dimensions of the matrices are + static size_t DefaultLDA(const Arguments &args) { return args.n; } + static size_t DefaultLDB(const Arguments &) { return 1; } // N/A for this routine + static size_t DefaultLDC(const Arguments &) { return 1; } // N/A for this routine + + // Describes which transpose options are relevant for this routine + using Transposes = std::vector; + static Transposes GetATransposes(const Transposes &) { return {}; } // N/A for this routine + static Transposes GetBTransposes(const Transposes &) { return {}; } // N/A for this routine + + // Describes how to run the CLBlast routine + static StatusCode RunRoutine(const Arguments &args, const Buffers &buffers, Queue &queue) { + auto queue_plain = queue(); + auto event = cl_event{}; + auto status = Her2(args.layout, args.triangle, + args.n, args.alpha, + buffers.x_vec(), args.x_offset, args.x_inc, + buffers.y_vec(), args.y_offset, args.y_inc, + buffers.a_mat(), args.a_offset, args.a_ld, + &queue_plain, &event); + clWaitForEvents(1, &event); + return status; + } + + // Describes how to run the clBLAS routine (for correctness/performance comparison) + static StatusCode RunReference(const Arguments &args, const Buffers &buffers, Queue &queue) { + auto queue_plain = queue(); + auto event = cl_event{}; + auto status = clblasXher2(static_cast(args.layout), + static_cast(args.triangle), + args.n, args.alpha, + buffers.x_vec(), args.x_offset, args.x_inc, + buffers.y_vec(), args.y_offset, args.y_inc, + buffers.a_mat(), args.a_offset, args.a_ld, + 1, &queue_plain, 0, nullptr, &event); + clWaitForEvents(1, &event); + return static_cast(status); + } + + // Describes how to download the results of the computation (more importantly: which buffer) + static std::vector DownloadResult(const Arguments &args, Buffers &buffers, Queue &queue) { + std::vector result(args.a_size, static_cast(0)); + buffers.a_mat.Read(queue, args.a_size, result); + return result; + } + + // Describes how to compute the indices of the result buffer + static size_t ResultID1(const Arguments &args) { return args.n; } + static size_t ResultID2(const Arguments &args) { return args.n; } + static size_t GetResultIndex(const Arguments &args, const size_t id1, const size_t id2) { + return id2*args.a_ld + id1 + args.a_offset; + } + + // Describes how to compute performance metrics + static size_t GetFlops(const Arguments &args) { + return 5 * args.n * args.n; + } + static size_t GetBytes(const Arguments &args) { + return (args.n*args.n + 2 * args.n) * sizeof(T); + } +}; + +// ================================================================================================= +} // namespace clblast + +// CLBLAST_TEST_ROUTINES_XHER2_H_ +#endif diff --git a/test/routines/level2/xsyr2.h b/test/routines/level2/xsyr2.h new file mode 100644 index 00000000..32497a61 --- /dev/null +++ b/test/routines/level2/xsyr2.h @@ -0,0 +1,128 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren +// +// This file implements a class with static methods to describe the Xsyr2 routine. Examples of +// such 'descriptions' are how to calculate the size a of buffer or how to run the routine. These +// static methods are used by the correctness tester and the performance tester. +// +// ================================================================================================= + +#ifndef CLBLAST_TEST_ROUTINES_XSYR2_H_ +#define CLBLAST_TEST_ROUTINES_XSYR2_H_ + +#include +#include + +#include "wrapper_clblas.h" + +namespace clblast { +// ================================================================================================= + +// See comment at top of file for a description of the class +template +class TestXsyr2 { + public: + + // The BLAS level: 1, 2, or 3 + static size_t BLASLevel() { return 2; } + + // The list of arguments relevant for this routine + static std::vector GetOptions() { + return {kArgN, + kArgLayout, kArgTriangle, + kArgALeadDim, kArgXInc, kArgYInc, + kArgAOffset, kArgXOffset, kArgYOffset, + kArgAlpha}; + } + + // Describes how to obtain the sizes of the buffers + static size_t GetSizeX(const Arguments &args) { + return args.n * args.x_inc + args.x_offset; + } + static size_t GetSizeY(const Arguments &args) { + return args.n * args.y_inc + args.y_offset; + } + static size_t GetSizeA(const Arguments &args) { + return args.n * args.a_ld + args.a_offset; + } + + // Describes how to set the sizes of all the buffers + static void SetSizes(Arguments &args) { + args.a_size = GetSizeA(args); + args.x_size = GetSizeX(args); + args.y_size = GetSizeY(args); + } + + // Describes what the default values of the leading dimensions of the matrices are + static size_t DefaultLDA(const Arguments &args) { return args.n; } + static size_t DefaultLDB(const Arguments &) { return 1; } // N/A for this routine + static size_t DefaultLDC(const Arguments &) { return 1; } // N/A for this routine + + // Describes which transpose options are relevant for this routine + using Transposes = std::vector; + static Transposes GetATransposes(const Transposes &) { return {}; } // N/A for this routine + static Transposes GetBTransposes(const Transposes &) { return {}; } // N/A for this routine + + // Describes how to run the CLBlast routine + static StatusCode RunRoutine(const Arguments &args, const Buffers &buffers, Queue &queue) { + auto queue_plain = queue(); + auto event = cl_event{}; + auto status = Syr2(args.layout, args.triangle, + args.n, args.alpha, + buffers.x_vec(), args.x_offset, args.x_inc, + buffers.y_vec(), args.y_offset, args.y_inc, + buffers.a_mat(), args.a_offset, args.a_ld, + &queue_plain, &event); + clWaitForEvents(1, &event); + return status; + } + + // Describes how to run the clBLAS routine (for correctness/performance comparison) + static StatusCode RunReference(const Arguments &args, const Buffers &buffers, Queue &queue) { + auto queue_plain = queue(); + auto event = cl_event{}; + auto status = clblasXsyr2(static_cast(args.layout), + static_cast(args.triangle), + args.n, args.alpha, + buffers.x_vec(), args.x_offset, args.x_inc, + buffers.y_vec(), args.y_offset, args.y_inc, + buffers.a_mat(), args.a_offset, args.a_ld, + 1, &queue_plain, 0, nullptr, &event); + clWaitForEvents(1, &event); + return static_cast(status); + } + + // Describes how to download the results of the computation (more importantly: which buffer) + static std::vector DownloadResult(const Arguments &args, Buffers &buffers, Queue &queue) { + std::vector result(args.a_size, static_cast(0)); + buffers.a_mat.Read(queue, args.a_size, result); + return result; + } + + // Describes how to compute the indices of the result buffer + static size_t ResultID1(const Arguments &args) { return args.n; } + static size_t ResultID2(const Arguments &args) { return args.n; } + static size_t GetResultIndex(const Arguments &args, const size_t id1, const size_t id2) { + return id2*args.a_ld + id1 + args.a_offset; + } + + // Describes how to compute performance metrics + static size_t GetFlops(const Arguments &args) { + return 5 * args.n * args.n; + } + static size_t GetBytes(const Arguments &args) { + return (args.n*args.n + 2 * args.n) * sizeof(T); + } +}; + +// ================================================================================================= +} // namespace clblast + +// CLBLAST_TEST_ROUTINES_XSYR2_H_ +#endif From 306bf67660da4f1adacaedf9066925240abf4ea9 Mon Sep 17 00:00:00 2001 From: Cedric Nugteren Date: Sun, 6 Mar 2016 15:48:11 +0100 Subject: [PATCH 39/50] Added preliminary support for xHPR2 and xSPR2 routines --- CHANGELOG | 4 + CMakeLists.txt | 2 +- README.md | 12 +-- include/internal/routines/level2/xhpr2.h | 46 ++++++++ include/internal/routines/level2/xspr2.h | 46 ++++++++ scripts/generator/generator.py | 6 +- src/clblast.cc | 54 +++++++--- src/kernels/level2/level2.opencl | 13 ++- src/routines/level2/xhpr.cc | 3 +- src/routines/level2/xhpr2.cc | 53 ++++++++++ src/routines/level2/xspr.cc | 3 +- src/routines/level2/xspr2.cc | 53 ++++++++++ test/routines/level2/xhpr2.h | 128 +++++++++++++++++++++++ test/routines/level2/xspr2.h | 128 +++++++++++++++++++++++ 14 files changed, 520 insertions(+), 31 deletions(-) create mode 100644 include/internal/routines/level2/xhpr2.h create mode 100644 include/internal/routines/level2/xspr2.h create mode 100644 src/routines/level2/xhpr2.cc create mode 100644 src/routines/level2/xspr2.cc create mode 100644 test/routines/level2/xhpr2.h create mode 100644 test/routines/level2/xspr2.h diff --git a/CHANGELOG b/CHANGELOG index 72f53550..3c91c31b 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -9,8 +9,12 @@ Development version (next release) * CGERC/ZGERC * CHER/ZHER * CHPR/ZHPR + * CHER2/ZHER2 + * CHPR2/ZHPR2 * CSYR/ZSYR * CSPR/ZSPR + * CSYR2/ZSYR2 + * CSPR2/ZSPR2 Version 0.5.0 - Improved structure and performance of level-2 routines (xSYMV/xHEMV) diff --git a/CMakeLists.txt b/CMakeLists.txt index 33458989..508dad72 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -112,7 +112,7 @@ set(SAMPLE_PROGRAMS_CPP sgemm) set(SAMPLE_PROGRAMS_C sgemm) set(LEVEL1_ROUTINES xswap xscal xcopy xaxpy xdot xdotu xdotc) set(LEVEL2_ROUTINES xgemv xgbmv xhemv xhbmv xhpmv xsymv xsbmv xspmv xtrmv xtbmv xtpmv - xger xgeru xgerc xher xhpr xher2 xsyr xspr xsyr2) + xger xgeru xgerc xher xhpr xher2 xhpr2 xsyr xspr xsyr2 xspr2) set(LEVEL3_ROUTINES xgemm xsymm xhemm xsyrk xherk xsyr2k xher2k xtrmm) set(ROUTINES ${LEVEL1_ROUTINES} ${LEVEL2_ROUTINES} ${LEVEL3_ROUTINES}) set(PRECISIONS 32 64 3232 6464) diff --git a/README.md b/README.md index 3da0e262..87c7bb53 100644 --- a/README.md +++ b/README.md @@ -176,7 +176,6 @@ CLBlast is in active development but already supports almost all the BLAS routin | xASUM | | | - | - | +SC +DZ | | IxAMAX | | | | | | - | Level-2 | S | D | C | Z | Notes | | ---------|---|---|---|---|---------| | xGEMV | ✔ | ✔ | ✔ | ✔ | | @@ -198,12 +197,12 @@ CLBlast is in active development but already supports almost all the BLAS routin | xGERC | - | - | ✔ | ✔ | | | xHER | - | - | ✔ | ✔ | | | xHPR | - | - | ✔ | ✔ | | -| xHER2 | - | - | | | | -| xHPR2 | - | - | | | | +| xHER2 | - | - | ✔ | ✔ | | +| xHPR2 | - | - | ✔ | ✔ | | | xSYR | ✔ | ✔ | - | - | | | xSPR | ✔ | ✔ | - | - | | -| xSYR2 | | | - | - | | -| xSPR2 | | | - | - | | +| xSYR2 | ✔ | ✔ | - | - | | +| xSPR2 | ✔ | ✔ | - | - | | | Level-3 | S | D | C | Z | Notes | | ---------|---|---|---|---|---------| @@ -246,11 +245,8 @@ To-do list before release of version 1.0 - Improve device performance: * Tune for a wider range of devices * Allow users to define custom tuned parameters -- Improve the tuning - * Make the tuners upload their data to a central server - Improve the performance comparisons: * Enable comparison against optionally: ViennaCL, cuBLAS, MAGMA OpenCL - Further reduce the likelihood of crashes: * Add checks for proper command-line arguments in the tuner, tester and client - * Add checks for valid database parameters * Test in multi-threaded environments diff --git a/include/internal/routines/level2/xhpr2.h b/include/internal/routines/level2/xhpr2.h new file mode 100644 index 00000000..fd243d33 --- /dev/null +++ b/include/internal/routines/level2/xhpr2.h @@ -0,0 +1,46 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren +// +// This file implements the Xhpr2 routine. The precision is implemented using a template argument. +// +// ================================================================================================= + +#ifndef CLBLAST_ROUTINES_XHPR2_H_ +#define CLBLAST_ROUTINES_XHPR2_H_ + +#include "internal/routines/level2/xher2.h" + +namespace clblast { +// ================================================================================================= + +// See comment at top of file for a description of the class +template +class Xhpr2: public Xher2 { + public: + + // Uses the regular Xher2 routine + using Xher2::DoHer2; + + // Constructor + Xhpr2(Queue &queue, Event &event, const std::string &name = "HPR2"); + + // Templated-precision implementation of the routine + StatusCode DoHpr2(const Layout layout, const Triangle triangle, + const size_t n, + const T alpha, + const Buffer &x_buffer, const size_t x_offset, const size_t x_inc, + const Buffer &y_buffer, const size_t y_offset, const size_t y_inc, + const Buffer &ap_buffer, const size_t ap_offset); +}; + +// ================================================================================================= +} // namespace clblast + +// CLBLAST_ROUTINES_XHPR2_H_ +#endif diff --git a/include/internal/routines/level2/xspr2.h b/include/internal/routines/level2/xspr2.h new file mode 100644 index 00000000..3d5f4992 --- /dev/null +++ b/include/internal/routines/level2/xspr2.h @@ -0,0 +1,46 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren +// +// This file implements the Xspr2 routine. The precision is implemented using a template argument. +// +// ================================================================================================= + +#ifndef CLBLAST_ROUTINES_XSPR2_H_ +#define CLBLAST_ROUTINES_XSPR2_H_ + +#include "internal/routines/level2/xher2.h" + +namespace clblast { +// ================================================================================================= + +// See comment at top of file for a description of the class +template +class Xspr2: public Xher2 { + public: + + // Uses the regular Xher2 routine + using Xher2::DoHer2; + + // Constructor + Xspr2(Queue &queue, Event &event, const std::string &name = "SPR2"); + + // Templated-precision implementation of the routine + StatusCode DoSpr2(const Layout layout, const Triangle triangle, + const size_t n, + const T alpha, + const Buffer &x_buffer, const size_t x_offset, const size_t x_inc, + const Buffer &y_buffer, const size_t y_offset, const size_t y_inc, + const Buffer &ap_buffer, const size_t ap_offset); +}; + +// ================================================================================================= +} // namespace clblast + +// CLBLAST_ROUTINES_XSPR2_H_ +#endif diff --git a/scripts/generator/generator.py b/scripts/generator/generator.py index 2c01efb5..99f326cd 100644 --- a/scripts/generator/generator.py +++ b/scripts/generator/generator.py @@ -84,11 +84,11 @@ routines = [ Routine(True, "2b", "her", Tc, [Css,Zdd], ["n"], ["layout","triangle"], ["x"], ["a"], ["alpha"], False, "Hermitian rank-1 matrix update"), Routine(True, "2b", "hpr", Tc, [Css,Zdd], ["n"], ["layout","triangle"], ["x"], ["ap"], ["alpha"], False, "Hermitian packed rank-1 matrix update"), Routine(True, "2b", "her2", T, [C,Z], ["n"], ["layout","triangle"], ["x","y"], ["a"], ["alpha"], False, "Hermitian rank-2 matrix update"), - Routine(False, "2b", "hpr2", T, [C,Z], ["n"], ["layout","triangle"], ["x","y"], ["ap"], ["alpha"], False, "Hermitian packed rank-2 matrix update"), + Routine(True, "2b", "hpr2", T, [C,Z], ["n"], ["layout","triangle"], ["x","y"], ["ap"], ["alpha"], False, "Hermitian packed rank-2 matrix update"), Routine(True, "2b", "syr", T, [S,D], ["n"], ["layout","triangle"], ["x"], ["a"], ["alpha"], False, "Symmetric rank-1 matrix update"), Routine(True, "2b", "spr", T, [S,D], ["n"], ["layout","triangle"], ["x"], ["ap"], ["alpha"], False, "Symmetric packed rank-1 matrix update"), Routine(True, "2b", "syr2", T, [S,D], ["n"], ["layout","triangle"], ["x","y"], ["a"], ["alpha"], False, "Symmetric rank-2 matrix update"), - Routine(False, "2b", "spr2", T, [S,D], ["n"], ["layout","triangle"], ["x","y"], ["ap"], ["alpha"], False, "Symmetric packed rank-2 matrix update"), + Routine(True, "2b", "spr2", T, [S,D], ["n"], ["layout","triangle"], ["x","y"], ["ap"], ["alpha"], False, "Symmetric packed rank-2 matrix update"), ], [ # Level 3: matrix-matrix Routine(True, "3", "gemm", T, [S,D,C,Z], ["m","n","k"], ["layout","a_transpose","b_transpose"], ["a","b"], ["c"], ["alpha","beta"], False, "General matrix-matrix multiplication"), @@ -247,7 +247,7 @@ files = [ path_clblast+"/src/clblast_c.cc", path_clblast+"/test/wrapper_clblas.h", ] -header_lines = [84, 61, 80, 24, 22] +header_lines = [84, 63, 80, 24, 22] footer_lines = [6, 3, 5, 2, 6] # Checks whether the command-line arguments are valid; exists otherwise diff --git a/src/clblast.cc b/src/clblast.cc index 6b8ac409..3695aa02 100644 --- a/src/clblast.cc +++ b/src/clblast.cc @@ -44,9 +44,11 @@ #include "internal/routines/level2/xher.h" #include "internal/routines/level2/xhpr.h" #include "internal/routines/level2/xher2.h" +#include "internal/routines/level2/xhpr2.h" #include "internal/routines/level2/xsyr.h" #include "internal/routines/level2/xspr.h" #include "internal/routines/level2/xsyr2.h" +#include "internal/routines/level2/xspr2.h" // BLAS level-3 includes #include "internal/routines/level3/xgemm.h" @@ -1052,14 +1054,24 @@ template StatusCode Her2(const Layout, const Triangle, // Hermitian packed rank-2 matrix update: CHPR2/ZHPR2 template -StatusCode Hpr2(const Layout, const Triangle, - const size_t, - const T, - const cl_mem, const size_t, const size_t, - const cl_mem, const size_t, const size_t, - cl_mem, const size_t, - cl_command_queue*, cl_event*) { - return StatusCode::kNotImplemented; +StatusCode Hpr2(const Layout layout, const Triangle triangle, + const size_t n, + const T alpha, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, + cl_mem ap_buffer, const size_t ap_offset, + cl_command_queue* queue, cl_event* event) { + auto queue_cpp = Queue(*queue); + auto event_cpp = Event(*event); + auto routine = Xhpr2(queue_cpp, event_cpp); + auto status = routine.SetUp(); + if (status != StatusCode::kSuccess) { return status; } + return routine.DoHpr2(layout, triangle, + n, + alpha, + Buffer(x_buffer), x_offset, x_inc, + Buffer(y_buffer), y_offset, y_inc, + Buffer(ap_buffer), ap_offset); } template StatusCode Hpr2(const Layout, const Triangle, const size_t, @@ -1178,14 +1190,24 @@ template StatusCode Syr2(const Layout, const Triangle, // Symmetric packed rank-2 matrix update: SSPR2/DSPR2 template -StatusCode Spr2(const Layout, const Triangle, - const size_t, - const T, - const cl_mem, const size_t, const size_t, - const cl_mem, const size_t, const size_t, - cl_mem, const size_t, - cl_command_queue*, cl_event*) { - return StatusCode::kNotImplemented; +StatusCode Spr2(const Layout layout, const Triangle triangle, + const size_t n, + const T alpha, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, + cl_mem ap_buffer, const size_t ap_offset, + cl_command_queue* queue, cl_event* event) { + auto queue_cpp = Queue(*queue); + auto event_cpp = Event(*event); + auto routine = Xspr2(queue_cpp, event_cpp); + auto status = routine.SetUp(); + if (status != StatusCode::kSuccess) { return status; } + return routine.DoSpr2(layout, triangle, + n, + alpha, + Buffer(x_buffer), x_offset, x_inc, + Buffer(y_buffer), y_offset, y_inc, + Buffer(ap_buffer), ap_offset); } template StatusCode Spr2(const Layout, const Triangle, const size_t, diff --git a/src/kernels/level2/level2.opencl b/src/kernels/level2/level2.opencl index 1b0efeab..be979766 100644 --- a/src/kernels/level2/level2.opencl +++ b/src/kernels/level2/level2.opencl @@ -109,7 +109,18 @@ inline void MatrixUpdate2(const int id1, const int id2, const int max1, const in // Bounds of a regular matrix if (id1 < max1 && id2 < max2) { - const int a_index = id2*a_ld + id1 + a_offset; + #if defined(ROUTINE_SPR2) || defined(ROUTINE_HPR2) + int a_index; + if (is_upper) { + a_index = (id1 <= id2) ? ((id2+1)*id2)/2 + id1 : ((id1+1)*id1)/2 + id2; + } + else { + a_index = (id1 >= id2) ? ((2*a_ld-(id2+1))*id2)/2 + id1 : ((2*a_ld-(id1+1))*id1)/2 + id2; + } + a_index += a_offset; + #else + const int a_index = id2*a_ld + id1 + a_offset; + #endif // Loads the current value of the A matrix const real avalue = agm[a_index]; diff --git a/src/routines/level2/xhpr.cc b/src/routines/level2/xhpr.cc index 24d7ae95..b0cea72f 100644 --- a/src/routines/level2/xhpr.cc +++ b/src/routines/level2/xhpr.cc @@ -37,7 +37,8 @@ StatusCode Xhpr::DoHpr(const Layout layout, const Triangle triangle, // Specific Xhpr functionality is implemented in the kernel using defines return DoHer(layout, triangle, n, alpha, x_buffer, x_offset, x_inc, - ap_buffer, ap_offset, n, true); + ap_buffer, ap_offset, n, + true); // packed matrix } // ================================================================================================= diff --git a/src/routines/level2/xhpr2.cc b/src/routines/level2/xhpr2.cc new file mode 100644 index 00000000..ded35e53 --- /dev/null +++ b/src/routines/level2/xhpr2.cc @@ -0,0 +1,53 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren +// +// This file implements the Xhpr2 class (see the header for information about the class). +// +// ================================================================================================= + +#include "internal/routines/level2/xhpr2.h" + +#include + +namespace clblast { +// ================================================================================================= + +// Constructor: forwards to base class constructor +template +Xhpr2::Xhpr2(Queue &queue, Event &event, const std::string &name): + Xher2(queue, event, name) { +} + +// ================================================================================================= + +// The main routine +template +StatusCode Xhpr2::DoHpr2(const Layout layout, const Triangle triangle, + const size_t n, + const T alpha, + const Buffer &x_buffer, const size_t x_offset, const size_t x_inc, + const Buffer &y_buffer, const size_t y_offset, const size_t y_inc, + const Buffer &ap_buffer, const size_t ap_offset) { + + // Specific Xhpr2 functionality is implemented in the kernel using defines + return DoHer2(layout, triangle, n, alpha, + x_buffer, x_offset, x_inc, + y_buffer, y_offset, y_inc, + ap_buffer, ap_offset, n, + true); // packed matrix +} + +// ================================================================================================= + +// Compiles the templated class +template class Xhpr2; +template class Xhpr2; + +// ================================================================================================= +} // namespace clblast diff --git a/src/routines/level2/xspr.cc b/src/routines/level2/xspr.cc index 7ef41fba..2d998e0b 100644 --- a/src/routines/level2/xspr.cc +++ b/src/routines/level2/xspr.cc @@ -37,7 +37,8 @@ StatusCode Xspr::DoSpr(const Layout layout, const Triangle triangle, // Specific Xspr functionality is implemented in the kernel using defines return DoHer(layout, triangle, n, alpha, x_buffer, x_offset, x_inc, - ap_buffer, ap_offset, n, true); + ap_buffer, ap_offset, n, + true); // packed matrix } // ================================================================================================= diff --git a/src/routines/level2/xspr2.cc b/src/routines/level2/xspr2.cc new file mode 100644 index 00000000..fd5232da --- /dev/null +++ b/src/routines/level2/xspr2.cc @@ -0,0 +1,53 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren +// +// This file implements the Xspr2 class (see the header for information about the class). +// +// ================================================================================================= + +#include "internal/routines/level2/xspr2.h" + +#include + +namespace clblast { +// ================================================================================================= + +// Constructor: forwards to base class constructor +template +Xspr2::Xspr2(Queue &queue, Event &event, const std::string &name): + Xher2(queue, event, name) { +} + +// ================================================================================================= + +// The main routine +template +StatusCode Xspr2::DoSpr2(const Layout layout, const Triangle triangle, + const size_t n, + const T alpha, + const Buffer &x_buffer, const size_t x_offset, const size_t x_inc, + const Buffer &y_buffer, const size_t y_offset, const size_t y_inc, + const Buffer &ap_buffer, const size_t ap_offset) { + + // Specific Xspr2 functionality is implemented in the kernel using defines + return DoHer2(layout, triangle, n, alpha, + x_buffer, x_offset, x_inc, + y_buffer, y_offset, y_inc, + ap_buffer, ap_offset, n, + true); // packed matrix +} + +// ================================================================================================= + +// Compiles the templated class +template class Xspr2; +template class Xspr2; + +// ================================================================================================= +} // namespace clblast diff --git a/test/routines/level2/xhpr2.h b/test/routines/level2/xhpr2.h new file mode 100644 index 00000000..68fbc76d --- /dev/null +++ b/test/routines/level2/xhpr2.h @@ -0,0 +1,128 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren +// +// This file implements a class with static methods to describe the Xhpr2 routine. Examples of +// such 'descriptions' are how to calculate the size a of buffer or how to run the routine. These +// static methods are used by the correctness tester and the performance tester. +// +// ================================================================================================= + +#ifndef CLBLAST_TEST_ROUTINES_XHPR2_H_ +#define CLBLAST_TEST_ROUTINES_XHPR2_H_ + +#include +#include + +#include "wrapper_clblas.h" + +namespace clblast { +// ================================================================================================= + +// See comment at top of file for a description of the class +template +class TestXhpr2 { + public: + + // The BLAS level: 1, 2, or 3 + static size_t BLASLevel() { return 2; } + + // The list of arguments relevant for this routine + static std::vector GetOptions() { + return {kArgN, + kArgLayout, kArgTriangle, + kArgXInc, kArgYInc, + kArgAPOffset, kArgXOffset, kArgYOffset, + kArgAlpha}; + } + + // Describes how to obtain the sizes of the buffers + static size_t GetSizeX(const Arguments &args) { + return args.n * args.x_inc + args.x_offset; + } + static size_t GetSizeY(const Arguments &args) { + return args.n * args.y_inc + args.y_offset; + } + static size_t GetSizeAP(const Arguments &args) { + return ((args.n*(args.n+1)) / 2) + args.ap_offset; + } + + // Describes how to set the sizes of all the buffers + static void SetSizes(Arguments &args) { + args.ap_size = GetSizeAP(args); + args.x_size = GetSizeX(args); + args.y_size = GetSizeY(args); + } + + // Describes what the default values of the leading dimensions of the matrices are + static size_t DefaultLDA(const Arguments &args) { return args.n; } + static size_t DefaultLDB(const Arguments &) { return 1; } // N/A for this routine + static size_t DefaultLDC(const Arguments &) { return 1; } // N/A for this routine + + // Describes which transpose options are relevant for this routine + using Transposes = std::vector; + static Transposes GetATransposes(const Transposes &) { return {}; } // N/A for this routine + static Transposes GetBTransposes(const Transposes &) { return {}; } // N/A for this routine + + // Describes how to run the CLBlast routine + static StatusCode RunRoutine(const Arguments &args, const Buffers &buffers, Queue &queue) { + auto queue_plain = queue(); + auto event = cl_event{}; + auto status = Hpr2(args.layout, args.triangle, + args.n, args.alpha, + buffers.x_vec(), args.x_offset, args.x_inc, + buffers.y_vec(), args.y_offset, args.y_inc, + buffers.ap_mat(), args.ap_offset, + &queue_plain, &event); + clWaitForEvents(1, &event); + return status; + } + + // Describes how to run the clBLAS routine (for correctness/performance comparison) + static StatusCode RunReference(const Arguments &args, const Buffers &buffers, Queue &queue) { + auto queue_plain = queue(); + auto event = cl_event{}; + auto status = clblasXhpr2(static_cast(args.layout), + static_cast(args.triangle), + args.n, args.alpha, + buffers.x_vec(), args.x_offset, args.x_inc, + buffers.y_vec(), args.y_offset, args.y_inc, + buffers.ap_mat(), args.ap_offset, + 1, &queue_plain, 0, nullptr, &event); + clWaitForEvents(1, &event); + return static_cast(status); + } + + // Describes how to download the results of the computation (more importantly: which buffer) + static std::vector DownloadResult(const Arguments &args, Buffers &buffers, Queue &queue) { + std::vector result(args.ap_size, static_cast(0)); + buffers.ap_mat.Read(queue, args.ap_size, result); + return result; + } + + // Describes how to compute the indices of the result buffer + static size_t ResultID1(const Arguments &args) { return args.ap_size - args.ap_offset; } + static size_t ResultID2(const Arguments &) { return 1; } // N/A for this routine + static size_t GetResultIndex(const Arguments &args, const size_t id1, const size_t) { + return id1 + args.ap_offset; + } + + // Describes how to compute performance metrics + static size_t GetFlops(const Arguments &args) { + return 5 * ((args.n*(args.n+1)) / 2); + } + static size_t GetBytes(const Arguments &args) { + return ((args.n*(args.n+1)) + 2 * args.n) * sizeof(T); + } +}; + +// ================================================================================================= +} // namespace clblast + +// CLBLAST_TEST_ROUTINES_XHPR2_H_ +#endif diff --git a/test/routines/level2/xspr2.h b/test/routines/level2/xspr2.h new file mode 100644 index 00000000..43d66c9e --- /dev/null +++ b/test/routines/level2/xspr2.h @@ -0,0 +1,128 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren +// +// This file implements a class with static methods to describe the Xspr2 routine. Examples of +// such 'descriptions' are how to calculate the size a of buffer or how to run the routine. These +// static methods are used by the correctness tester and the performance tester. +// +// ================================================================================================= + +#ifndef CLBLAST_TEST_ROUTINES_XSPR2_H_ +#define CLBLAST_TEST_ROUTINES_XSPR2_H_ + +#include +#include + +#include "wrapper_clblas.h" + +namespace clblast { +// ================================================================================================= + +// See comment at top of file for a description of the class +template +class TestXspr2 { + public: + + // The BLAS level: 1, 2, or 3 + static size_t BLASLevel() { return 2; } + + // The list of arguments relevant for this routine + static std::vector GetOptions() { + return {kArgN, + kArgLayout, kArgTriangle, + kArgXInc, kArgYInc, + kArgAPOffset, kArgXOffset, kArgYOffset, + kArgAlpha}; + } + + // Describes how to obtain the sizes of the buffers + static size_t GetSizeX(const Arguments &args) { + return args.n * args.x_inc + args.x_offset; + } + static size_t GetSizeY(const Arguments &args) { + return args.n * args.y_inc + args.y_offset; + } + static size_t GetSizeAP(const Arguments &args) { + return ((args.n*(args.n+1)) / 2) + args.ap_offset; + } + + // Describes how to set the sizes of all the buffers + static void SetSizes(Arguments &args) { + args.ap_size = GetSizeAP(args); + args.x_size = GetSizeX(args); + args.y_size = GetSizeY(args); + } + + // Describes what the default values of the leading dimensions of the matrices are + static size_t DefaultLDA(const Arguments &args) { return args.n; } + static size_t DefaultLDB(const Arguments &) { return 1; } // N/A for this routine + static size_t DefaultLDC(const Arguments &) { return 1; } // N/A for this routine + + // Describes which transpose options are relevant for this routine + using Transposes = std::vector; + static Transposes GetATransposes(const Transposes &) { return {}; } // N/A for this routine + static Transposes GetBTransposes(const Transposes &) { return {}; } // N/A for this routine + + // Describes how to run the CLBlast routine + static StatusCode RunRoutine(const Arguments &args, const Buffers &buffers, Queue &queue) { + auto queue_plain = queue(); + auto event = cl_event{}; + auto status = Spr2(args.layout, args.triangle, + args.n, args.alpha, + buffers.x_vec(), args.x_offset, args.x_inc, + buffers.y_vec(), args.y_offset, args.y_inc, + buffers.ap_mat(), args.ap_offset, + &queue_plain, &event); + clWaitForEvents(1, &event); + return status; + } + + // Describes how to run the clBLAS routine (for correctness/performance comparison) + static StatusCode RunReference(const Arguments &args, const Buffers &buffers, Queue &queue) { + auto queue_plain = queue(); + auto event = cl_event{}; + auto status = clblasXspr2(static_cast(args.layout), + static_cast(args.triangle), + args.n, args.alpha, + buffers.x_vec(), args.x_offset, args.x_inc, + buffers.y_vec(), args.y_offset, args.y_inc, + buffers.ap_mat(), args.ap_offset, + 1, &queue_plain, 0, nullptr, &event); + clWaitForEvents(1, &event); + return static_cast(status); + } + + // Describes how to download the results of the computation (more importantly: which buffer) + static std::vector DownloadResult(const Arguments &args, Buffers &buffers, Queue &queue) { + std::vector result(args.ap_size, static_cast(0)); + buffers.ap_mat.Read(queue, args.ap_size, result); + return result; + } + + // Describes how to compute the indices of the result buffer + static size_t ResultID1(const Arguments &args) { return args.ap_size - args.ap_offset; } + static size_t ResultID2(const Arguments &) { return 1; } // N/A for this routine + static size_t GetResultIndex(const Arguments &args, const size_t id1, const size_t) { + return id1 + args.ap_offset; + } + + // Describes how to compute performance metrics + static size_t GetFlops(const Arguments &args) { + return 5 * ((args.n*(args.n+1)) / 2); + } + static size_t GetBytes(const Arguments &args) { + return ((args.n*(args.n+1)) + 2 * args.n) * sizeof(T); + } +}; + +// ================================================================================================= +} // namespace clblast + +// CLBLAST_TEST_ROUTINES_XSPR2_H_ +#endif From 7468e2ba9db068ef7a11b11d12ac66752ea96713 Mon Sep 17 00:00:00 2001 From: Cedric Nugteren Date: Sun, 6 Mar 2016 16:32:38 +0100 Subject: [PATCH 40/50] Adjusted the correctness-test error margins --- test/correctness/tester.cc | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/test/correctness/tester.cc b/test/correctness/tester.cc index d9836500..8169f700 100644 --- a/test/correctness/tester.cc +++ b/test/correctness/tester.cc @@ -280,21 +280,21 @@ bool TestSimilarity(const T val1, const T val2) { const auto difference = std::fabs(val1 - val2); // Set the allowed error margin for floating-point comparisons - constexpr auto kErrorMarginRelative = 1.0e-2; - constexpr auto kErrorMarginAbsolute = 1.0e-10; + constexpr auto kErrorMarginRelative = T{0.025}; + constexpr auto kErrorMarginAbsolute = T{1.0e-6}; // Shortcut, handles infinities if (val1 == val2) { return true; } // The values are zero or very small: the relative error is less meaningful - else if (val1 == 0 || val2 == 0 || difference < static_cast(kErrorMarginAbsolute)) { - return (difference < static_cast(kErrorMarginAbsolute)); + else if (val1 == 0 || val2 == 0 || difference < kErrorMarginAbsolute) { + return (difference < kErrorMarginAbsolute); } // Use relative error else { const auto absolute_sum = std::fabs(val1) + std::fabs(val2); - return (difference / absolute_sum) < static_cast(kErrorMarginRelative); + return (difference / absolute_sum) < kErrorMarginRelative; } } From fb58129afbbb4ff758924b9187cac4c954cafd0f Mon Sep 17 00:00:00 2001 From: Cedric Nugteren Date: Sun, 6 Mar 2016 16:34:26 +0100 Subject: [PATCH 41/50] Made testing against clBLAS in the client binaries truely optional (was partly implemented before) --- test/performance/client.cc | 65 +++++++++++++++++++++++--------------- test/performance/client.h | 6 ++-- 2 files changed, 44 insertions(+), 27 deletions(-) diff --git a/test/performance/client.cc b/test/performance/client.cc index ce97d273..ebfad3a6 100644 --- a/test/performance/client.cc +++ b/test/performance/client.cc @@ -15,6 +15,7 @@ #include #include +#include #include #include @@ -112,7 +113,7 @@ template void Client::PerformanceTest(Arguments &args, const SetMetric set_sizes) { // Prints the header of the output table - PrintTableHeader(args.silent, options_); + PrintTableHeader(args); // Initializes OpenCL and the libraries auto platform = Platform(args.platform_id); @@ -162,11 +163,16 @@ void Client::PerformanceTest(Arguments &args, const SetMetric set_sizes) auto buffers = Buffers{x_vec, y_vec, a_mat, b_mat, c_mat, ap_mat, dot}; // Runs the routines and collects the timings + auto timings = std::vector>(); auto ms_clblast = TimedExecution(args.num_runs, args, buffers, queue, run_routine_, "CLBlast"); - auto ms_clblas = TimedExecution(args.num_runs, args, buffers, queue, run_reference_, "clBLAS"); + timings.push_back(std::pair("CLBlast", ms_clblast)); + if (args.compare_clblas) { + auto ms_clblas = TimedExecution(args.num_runs, args, buffers, queue, run_reference_, "clBLAS"); + timings.push_back(std::pair("clBLAS", ms_clblas)); + } - // Prints the performance of both libraries - PrintTableRow(args, ms_clblast, ms_clblas); + // Prints the performance of the tested libraries + PrintTableRow(args, timings); // Makes the jump to the next step ++s; @@ -213,20 +219,27 @@ double Client::TimedExecution(const size_t num_runs, const Arguments &ar // Prints the header of the performance table template -void Client::PrintTableHeader(const bool silent, const std::vector &args) { - if (!silent) { - for (auto i=size_t{0}; i | <-- clBLAS --> |\n"); +void Client::PrintTableHeader(const Arguments& args) { + + // First line (optional) + if (!args.silent) { + for (auto i=size_t{0}; i"); + if (args.compare_clblas) { fprintf(stdout, " | <-- clBLAS -->"); } + fprintf(stdout, " |\n"); } - for (auto &argument: args) { fprintf(stdout, "%9s;", argument.c_str()); } - fprintf(stdout, "%9s;%9s;%9s;%9s;%9s;%9s\n", - "ms_1", "GFLOPS_1", "GBs_1", "ms_2", "GFLOPS_2", "GBs_2"); + + // Second line + for (auto &option: options_) { fprintf(stdout, "%9s;", option.c_str()); } + fprintf(stdout, "%9s;%9s;%9s", "ms_1", "GFLOPS_1", "GBs_1"); + if (args.compare_clblas) { fprintf(stdout, ";%9s;%9s;%9s", "ms_2", "GFLOPS_2", "GBs_2"); } + fprintf(stdout, "\n"); } // Print a performance-result row template -void Client::PrintTableRow(const Arguments& args, const double ms_clblast, - const double ms_clblas) { +void Client::PrintTableRow(const Arguments& args, + const std::vector>& timings) { // Creates a vector of relevant variables auto integers = std::vector{}; @@ -261,14 +274,6 @@ void Client::PrintTableRow(const Arguments& args, const double ms_clblas else if (o == kArgBeta) { strings.push_back(ToString(args.beta)); } } - // Computes the GFLOPS and GB/s metrics - auto flops = get_flops_(args); - auto bytes = get_bytes_(args); - auto gflops_clblast = (ms_clblast != 0.0) ? (flops*1e-6)/ms_clblast : 0; - auto gflops_clblas = (ms_clblas != 0.0) ? (flops*1e-6)/ms_clblas: 0; - auto gbs_clblast = (ms_clblast != 0.0) ? (bytes*1e-6)/ms_clblast : 0; - auto gbs_clblas = (ms_clblas != 0.0) ? (bytes*1e-6)/ms_clblas: 0; - // Outputs the argument values for (auto &argument: integers) { if (!args.no_abbrv && argument >= 1024*1024 && IsMultiple(argument, 1024*1024)) { @@ -285,10 +290,20 @@ void Client::PrintTableRow(const Arguments& args, const double ms_clblas fprintf(stdout, "%9s;", argument.c_str()); } - // Outputs the performance numbers - fprintf(stdout, "%9.2lf;%9.1lf;%9.1lf;%9.2lf;%9.1lf;%9.1lf\n", - ms_clblast, gflops_clblast, gbs_clblast, - ms_clblas, gflops_clblas, gbs_clblas); + // Loops over all tested libraries + for (const auto& timing : timings) { + + // Computes the GFLOPS and GB/s metrics + auto flops = get_flops_(args); + auto bytes = get_bytes_(args); + auto gflops = (timing.second != 0.0) ? (flops*1e-6)/timing.second : 0; + auto gbs = (timing.second != 0.0) ? (bytes*1e-6)/timing.second : 0; + + // Outputs the performance numbers + if (timing.first != "CLBlast") { fprintf(stdout, ";"); } + fprintf(stdout, "%9.2lf;%9.1lf;%9.1lf", timing.second, gflops, gbs); + } + fprintf(stdout, "\n"); } // ================================================================================================= diff --git a/test/performance/client.h b/test/performance/client.h index 9f6852d0..5805b8a5 100644 --- a/test/performance/client.h +++ b/test/performance/client.h @@ -23,6 +23,7 @@ #include #include +#include // The libraries to test #include @@ -64,10 +65,11 @@ class Client { Queue &queue, Routine run_blas, const std::string &library_name); // Prints the header of a performance-data table - void PrintTableHeader(const bool silent, const std::vector &args); + void PrintTableHeader(const Arguments& args); // Prints a row of performance data, including results of two libraries - void PrintTableRow(const Arguments& args, const double ms_clblast, const double ms_clblas); + void PrintTableRow(const Arguments& args, + const std::vector>& timings); // The routine-specific functions passed to the tester const Routine run_routine_; From f4c09220c196f09a5494378d39e8d7c626e6d377 Mon Sep 17 00:00:00 2001 From: Cedric Nugteren Date: Sun, 6 Mar 2016 16:43:28 +0100 Subject: [PATCH 42/50] Fixed a bug in the GER-family of routines due to incorrect division of the workgroup size --- src/routines/level2/xger.cc | 4 ++-- src/routines/level2/xher.cc | 4 ++-- src/routines/level2/xher2.cc | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/routines/level2/xger.cc b/src/routines/level2/xger.cc index 0953c8bb..d8fb6b03 100644 --- a/src/routines/level2/xger.cc +++ b/src/routines/level2/xger.cc @@ -85,8 +85,8 @@ StatusCode Xger::DoGer(const Layout layout, kernel.SetArgument(12, static_cast(a_is_rowmajor)); // Launches the kernel - auto a_one_ceiled = CeilDiv(Ceil(a_one, db_["WGS1"]), db_["WPT"]); - auto a_two_ceiled = CeilDiv(Ceil(a_two, db_["WGS2"]), db_["WPT"]); + auto a_one_ceiled = Ceil(CeilDiv(a_one, db_["WPT"]), db_["WGS1"]); + auto a_two_ceiled = Ceil(CeilDiv(a_two, db_["WPT"]), db_["WGS2"]); auto global = std::vector{a_one_ceiled, a_two_ceiled}; auto local = std::vector{db_["WGS1"], db_["WGS2"]}; status = RunKernel(kernel, global, local); diff --git a/src/routines/level2/xher.cc b/src/routines/level2/xher.cc index 5eca44b0..c025117b 100644 --- a/src/routines/level2/xher.cc +++ b/src/routines/level2/xher.cc @@ -95,8 +95,8 @@ StatusCode Xher::DoHer(const Layout layout, const Triangle triangle, kernel.SetArgument(9, static_cast(is_rowmajor)); // Launches the kernel - auto global_one = CeilDiv(Ceil(n, db_["WGS1"]), db_["WPT"]); - auto global_two = CeilDiv(Ceil(n, db_["WGS2"]), db_["WPT"]); + auto global_one = Ceil(CeilDiv(n, db_["WPT"]), db_["WGS1"]); + auto global_two = Ceil(CeilDiv(n, db_["WPT"]), db_["WGS2"]); auto global = std::vector{global_one, global_two}; auto local = std::vector{db_["WGS1"], db_["WGS2"]}; status = RunKernel(kernel, global, local); diff --git a/src/routines/level2/xher2.cc b/src/routines/level2/xher2.cc index 63144f77..bfa84d18 100644 --- a/src/routines/level2/xher2.cc +++ b/src/routines/level2/xher2.cc @@ -87,8 +87,8 @@ StatusCode Xher2::DoHer2(const Layout layout, const Triangle triangle, kernel.SetArgument(12, static_cast(is_rowmajor)); // Launches the kernel - auto global_one = CeilDiv(Ceil(n, db_["WGS1"]), db_["WPT"]); - auto global_two = CeilDiv(Ceil(n, db_["WGS2"]), db_["WPT"]); + auto global_one = Ceil(CeilDiv(n, db_["WPT"]), db_["WGS1"]); + auto global_two = Ceil(CeilDiv(n, db_["WPT"]), db_["WGS2"]); auto global = std::vector{global_one, global_two}; auto local = std::vector{db_["WGS1"], db_["WGS2"]}; status = RunKernel(kernel, global, local); From 83c6a517659ee1d005da32a7593e8b5fdb7827ee Mon Sep 17 00:00:00 2001 From: Cedric Nugteren Date: Sat, 12 Mar 2016 15:10:35 +0100 Subject: [PATCH 43/50] Added tuning results for the ARM Mali-T628 GPU --- include/internal/database/copy.h | 18 ++++++++++++++++ include/internal/database/pad.h | 24 ++++++++++++++++++++++ include/internal/database/padtranspose.h | 24 ++++++++++++++++++++++ include/internal/database/transpose.h | 24 ++++++++++++++++++++++ include/internal/database/xaxpy.h | 24 ++++++++++++++++++++++ include/internal/database/xdot.h | 24 ++++++++++++++++++++++ include/internal/database/xgemm.h | 24 ++++++++++++++++++++++ include/internal/database/xger.h | 26 +++++++++++++++++++++++- 8 files changed, 187 insertions(+), 1 deletion(-) diff --git a/include/internal/database/copy.h b/include/internal/database/copy.h index 820e0f04..42c9c21c 100644 --- a/include/internal/database/copy.h +++ b/include/internal/database/copy.h @@ -23,6 +23,12 @@ const Database::DatabaseEntry Database::CopySingle = { { "default", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_VW",2}, {"COPY_WPT",1} } }, } }, + { // ARM GPUs + kDeviceTypeGPU, "ARM", { + { "Mali-T628", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_VW",2}, {"COPY_WPT",4} } }, + { "default", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_VW",2}, {"COPY_WPT",4} } }, + } + }, { // Intel CPUs kDeviceTypeCPU, "Intel", { { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"COPY_DIMX",32}, {"COPY_DIMY",16}, {"COPY_VW",8}, {"COPY_WPT",2} } }, @@ -127,6 +133,12 @@ const Database::DatabaseEntry Database::CopyDouble = { { "default", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",1} } }, } }, + { // ARM GPUs + kDeviceTypeGPU, "ARM", { + { "Mali-T628", { {"COPY_DIMX",16}, {"COPY_DIMY",8}, {"COPY_VW",8}, {"COPY_WPT",2} } }, + { "default", { {"COPY_DIMX",16}, {"COPY_DIMY",8}, {"COPY_VW",8}, {"COPY_WPT",2} } }, + } + }, { // Intel CPUs kDeviceTypeCPU, "Intel", { { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"COPY_DIMX",16}, {"COPY_DIMY",8}, {"COPY_VW",8}, {"COPY_WPT",1} } }, @@ -173,6 +185,12 @@ const Database::DatabaseEntry Database::CopyComplexDouble = { { "default", { {"COPY_DIMX",8}, {"COPY_DIMY",16}, {"COPY_VW",1}, {"COPY_WPT",1} } }, } }, + { // ARM GPUs + kDeviceTypeGPU, "ARM", { + { "Mali-T628", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",2} } }, + { "default", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",2} } }, + } + }, { // Intel CPUs kDeviceTypeCPU, "Intel", { { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_VW",8}, {"COPY_WPT",1} } }, diff --git a/include/internal/database/pad.h b/include/internal/database/pad.h index c08a60d6..8e8ae966 100644 --- a/include/internal/database/pad.h +++ b/include/internal/database/pad.h @@ -23,6 +23,12 @@ const Database::DatabaseEntry Database::PadSingle = { { "default", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, } }, + { // ARM GPUs + kDeviceTypeGPU, "ARM", { + { "Mali-T628", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",4} } }, + { "default", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",4} } }, + } + }, { // Intel CPUs kDeviceTypeCPU, "Intel", { { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"PAD_DIMX",32}, {"PAD_DIMY",16}, {"PAD_WPTX",4}, {"PAD_WPTY",1} } }, @@ -76,6 +82,12 @@ const Database::DatabaseEntry Database::PadComplexSingle = { { "default", { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, } }, + { // ARM GPUs + kDeviceTypeGPU, "ARM", { + { "Mali-T628", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",4} } }, + { "default", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",4} } }, + } + }, { // Intel CPUs kDeviceTypeCPU, "Intel", { { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",2}, {"PAD_WPTY",2} } }, @@ -129,6 +141,12 @@ const Database::DatabaseEntry Database::PadDouble = { { "default", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, } }, + { // ARM GPUs + kDeviceTypeGPU, "ARM", { + { "Mali-T628", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",4}, {"PAD_WPTY",2} } }, + { "default", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",4}, {"PAD_WPTY",2} } }, + } + }, { // Intel CPUs kDeviceTypeCPU, "Intel", { { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",4}, {"PAD_WPTY",1} } }, @@ -175,6 +193,12 @@ const Database::DatabaseEntry Database::PadComplexDouble = { { "default", { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } }, } }, + { // ARM GPUs + kDeviceTypeGPU, "ARM", { + { "Mali-T628", { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",4}, {"PAD_WPTY",1} } }, + { "default", { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",4}, {"PAD_WPTY",1} } }, + } + }, { // Intel CPUs kDeviceTypeCPU, "Intel", { { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",2}, {"PAD_WPTY",1} } }, diff --git a/include/internal/database/padtranspose.h b/include/internal/database/padtranspose.h index 5a89869f..6e210e84 100644 --- a/include/internal/database/padtranspose.h +++ b/include/internal/database/padtranspose.h @@ -23,6 +23,12 @@ const Database::DatabaseEntry Database::PadtransposeSingle = { { "default", { {"PADTRA_PAD",0}, {"PADTRA_TILE",16}, {"PADTRA_WPT",4} } }, } }, + { // ARM GPUs + kDeviceTypeGPU, "ARM", { + { "Mali-T628", { {"PADTRA_PAD",0}, {"PADTRA_TILE",8}, {"PADTRA_WPT",2} } }, + { "default", { {"PADTRA_PAD",0}, {"PADTRA_TILE",8}, {"PADTRA_WPT",2} } }, + } + }, { // Intel CPUs kDeviceTypeCPU, "Intel", { { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"PADTRA_PAD",0}, {"PADTRA_TILE",16}, {"PADTRA_WPT",1} } }, @@ -76,6 +82,12 @@ const Database::DatabaseEntry Database::PadtransposeComplexSingle = { { "default", { {"PADTRA_PAD",0}, {"PADTRA_TILE",16}, {"PADTRA_WPT",2} } }, } }, + { // ARM GPUs + kDeviceTypeGPU, "ARM", { + { "Mali-T628", { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",2} } }, + { "default", { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",2} } }, + } + }, { // Intel CPUs kDeviceTypeCPU, "Intel", { { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"PADTRA_PAD",1}, {"PADTRA_TILE",8}, {"PADTRA_WPT",4} } }, @@ -129,6 +141,12 @@ const Database::DatabaseEntry Database::PadtransposeDouble = { { "default", { {"PADTRA_PAD",0}, {"PADTRA_TILE",16}, {"PADTRA_WPT",2} } }, } }, + { // ARM GPUs + kDeviceTypeGPU, "ARM", { + { "Mali-T628", { {"PADTRA_PAD",0}, {"PADTRA_TILE",16}, {"PADTRA_WPT",2} } }, + { "default", { {"PADTRA_PAD",0}, {"PADTRA_TILE",16}, {"PADTRA_WPT",2} } }, + } + }, { // Intel CPUs kDeviceTypeCPU, "Intel", { { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"PADTRA_PAD",1}, {"PADTRA_TILE",8}, {"PADTRA_WPT",4} } }, @@ -175,6 +193,12 @@ const Database::DatabaseEntry Database::PadtransposeComplexDouble = { { "default", { {"PADTRA_PAD",0}, {"PADTRA_TILE",8}, {"PADTRA_WPT",2} } }, } }, + { // ARM GPUs + kDeviceTypeGPU, "ARM", { + { "Mali-T628", { {"PADTRA_PAD",0}, {"PADTRA_TILE",8}, {"PADTRA_WPT",1} } }, + { "default", { {"PADTRA_PAD",0}, {"PADTRA_TILE",8}, {"PADTRA_WPT",1} } }, + } + }, { // Intel CPUs kDeviceTypeCPU, "Intel", { { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"PADTRA_PAD",1}, {"PADTRA_TILE",8}, {"PADTRA_WPT",2} } }, diff --git a/include/internal/database/transpose.h b/include/internal/database/transpose.h index 1948e0d6..005a6921 100644 --- a/include/internal/database/transpose.h +++ b/include/internal/database/transpose.h @@ -23,6 +23,12 @@ const Database::DatabaseEntry Database::TransposeSingle = { { "default", { {"TRA_DIM",4}, {"TRA_PAD",0}, {"TRA_SHUFFLE",1}, {"TRA_WPT",4} } }, } }, + { // ARM GPUs + kDeviceTypeGPU, "ARM", { + { "Mali-T628", { {"TRA_DIM",8}, {"TRA_PAD",0}, {"TRA_SHUFFLE",1}, {"TRA_WPT",4} } }, + { "default", { {"TRA_DIM",8}, {"TRA_PAD",0}, {"TRA_SHUFFLE",1}, {"TRA_WPT",4} } }, + } + }, { // Intel CPUs kDeviceTypeCPU, "Intel", { { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"TRA_DIM",4}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",8} } }, @@ -76,6 +82,12 @@ const Database::DatabaseEntry Database::TransposeComplexSingle = { { "default", { {"TRA_DIM",4}, {"TRA_PAD",0}, {"TRA_SHUFFLE",1}, {"TRA_WPT",1} } }, } }, + { // ARM GPUs + kDeviceTypeGPU, "ARM", { + { "Mali-T628", { {"TRA_DIM",16}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",2} } }, + { "default", { {"TRA_DIM",16}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",2} } }, + } + }, { // Intel CPUs kDeviceTypeCPU, "Intel", { { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"TRA_DIM",8}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",2} } }, @@ -123,6 +135,12 @@ const Database::DatabaseEntry Database::TransposeDouble = { { "default", { {"TRA_DIM",4}, {"TRA_PAD",0}, {"TRA_SHUFFLE",1}, {"TRA_WPT",4} } }, } }, + { // ARM GPUs + kDeviceTypeGPU, "ARM", { + { "Mali-T628", { {"TRA_DIM",8}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } }, + { "default", { {"TRA_DIM",8}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } }, + } + }, { // Intel CPUs kDeviceTypeCPU, "Intel", { { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"TRA_DIM",4}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",4} } }, @@ -169,6 +187,12 @@ const Database::DatabaseEntry Database::TransposeComplexDouble = { { "default", { {"TRA_DIM",4}, {"TRA_PAD",0}, {"TRA_SHUFFLE",1}, {"TRA_WPT",1} } }, } }, + { // ARM GPUs + kDeviceTypeGPU, "ARM", { + { "Mali-T628", { {"TRA_DIM",8}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } }, + { "default", { {"TRA_DIM",8}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } }, + } + }, { // Intel CPUs kDeviceTypeCPU, "Intel", { { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"TRA_DIM",4}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",4} } }, diff --git a/include/internal/database/xaxpy.h b/include/internal/database/xaxpy.h index 491dfc68..aa0c2d2f 100644 --- a/include/internal/database/xaxpy.h +++ b/include/internal/database/xaxpy.h @@ -23,6 +23,12 @@ const Database::DatabaseEntry Database::XaxpySingle = { { "default", { {"VW",1}, {"WGS",64}, {"WPT",1} } }, } }, + { // ARM GPUs + kDeviceTypeGPU, "ARM", { + { "Mali-T628", { {"VW",4}, {"WGS",256}, {"WPT",1} } }, + { "default", { {"VW",4}, {"WGS",256}, {"WPT",1} } }, + } + }, { // Intel CPUs kDeviceTypeCPU, "Intel", { { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"VW",1}, {"WGS",512}, {"WPT",1} } }, @@ -76,6 +82,12 @@ const Database::DatabaseEntry Database::XaxpyComplexSingle = { { "default", { {"VW",1}, {"WGS",64}, {"WPT",1} } }, } }, + { // ARM GPUs + kDeviceTypeGPU, "ARM", { + { "Mali-T628", { {"VW",1}, {"WGS",256}, {"WPT",1} } }, + { "default", { {"VW",1}, {"WGS",256}, {"WPT",1} } }, + } + }, { // Intel CPUs kDeviceTypeCPU, "Intel", { { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"VW",4}, {"WGS",256}, {"WPT",1} } }, @@ -129,6 +141,12 @@ const Database::DatabaseEntry Database::XaxpyDouble = { { "default", { {"VW",1}, {"WGS",64}, {"WPT",1} } }, } }, + { // ARM GPUs + kDeviceTypeGPU, "ARM", { + { "Mali-T628", { {"VW",2}, {"WGS",128}, {"WPT",2} } }, + { "default", { {"VW",2}, {"WGS",128}, {"WPT",2} } }, + } + }, { // Intel CPUs kDeviceTypeCPU, "Intel", { { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"VW",1}, {"WGS",1024}, {"WPT",1} } }, @@ -175,6 +193,12 @@ const Database::DatabaseEntry Database::XaxpyComplexDouble = { { "default", { {"VW",1}, {"WGS",128}, {"WPT",1} } }, } }, + { // ARM GPUs + kDeviceTypeGPU, "ARM", { + { "Mali-T628", { {"VW",1}, {"WGS",64}, {"WPT",8} } }, + { "default", { {"VW",1}, {"WGS",64}, {"WPT",8} } }, + } + }, { // Intel CPUs kDeviceTypeCPU, "Intel", { { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"VW",8}, {"WGS",128}, {"WPT",1} } }, diff --git a/include/internal/database/xdot.h b/include/internal/database/xdot.h index 31d0f84b..b741e317 100644 --- a/include/internal/database/xdot.h +++ b/include/internal/database/xdot.h @@ -23,6 +23,12 @@ const Database::DatabaseEntry Database::XdotSingle = { { "default", { {"VW",1}, {"WGS1",64}, {"WGS2",128} } }, } }, + { // ARM GPUs + kDeviceTypeGPU, "ARM", { + { "Mali-T628", { {"VW",1}, {"WGS1",128}, {"WGS2",256} } }, + { "default", { {"VW",1}, {"WGS1",128}, {"WGS2",256} } }, + } + }, { // Intel CPUs kDeviceTypeCPU, "Intel", { { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"VW",1}, {"WGS1",512}, {"WGS2",1024} } }, @@ -76,6 +82,12 @@ const Database::DatabaseEntry Database::XdotComplexSingle = { { "default", { {"VW",1}, {"WGS1",64}, {"WGS2",32} } }, } }, + { // ARM GPUs + kDeviceTypeGPU, "ARM", { + { "Mali-T628", { {"VW",1}, {"WGS1",128}, {"WGS2",512} } }, + { "default", { {"VW",1}, {"WGS1",128}, {"WGS2",512} } }, + } + }, { // Intel CPUs kDeviceTypeCPU, "Intel", { { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"VW",1}, {"WGS1",256}, {"WGS2",1024} } }, @@ -129,6 +141,12 @@ const Database::DatabaseEntry Database::XdotDouble = { { "default", { {"VW",1}, {"WGS1",64}, {"WGS2",128} } }, } }, + { // ARM GPUs + kDeviceTypeGPU, "ARM", { + { "Mali-T628", { {"VW",1}, {"WGS1",64}, {"WGS2",512} } }, + { "default", { {"VW",1}, {"WGS1",64}, {"WGS2",512} } }, + } + }, { // Intel CPUs kDeviceTypeCPU, "Intel", { { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"VW",1}, {"WGS1",512}, {"WGS2",512} } }, @@ -175,6 +193,12 @@ const Database::DatabaseEntry Database::XdotComplexDouble = { { "default", { {"VW",1}, {"WGS1",64}, {"WGS2",32} } }, } }, + { // ARM GPUs + kDeviceTypeGPU, "ARM", { + { "Mali-T628", { {"VW",1}, {"WGS1",32}, {"WGS2",64} } }, + { "default", { {"VW",1}, {"WGS1",32}, {"WGS2",64} } }, + } + }, { // Intel CPUs kDeviceTypeCPU, "Intel", { { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"VW",1}, {"WGS1",256}, {"WGS2",1024} } }, diff --git a/include/internal/database/xgemm.h b/include/internal/database/xgemm.h index 833e2c7a..45efa397 100644 --- a/include/internal/database/xgemm.h +++ b/include/internal/database/xgemm.h @@ -23,6 +23,12 @@ const Database::DatabaseEntry Database::XgemmSingle = { { "default", { {"KWG",32}, {"KWI",2}, {"MDIMA",8}, {"MDIMC",32}, {"MWG",128}, {"NDIMB",16}, {"NDIMC",8}, {"NWG",64}, {"SA",0}, {"SB",0}, {"STRM",0}, {"STRN",0}, {"VWM",4}, {"VWN",1} } }, } }, + { // ARM GPUs + kDeviceTypeGPU, "ARM", { + { "Mali-T628", { {"KWG",16}, {"KWI",2}, {"MDIMA",8}, {"MDIMC",8}, {"MWG",64}, {"NDIMB",8}, {"NDIMC",16}, {"NWG",16}, {"SA",0}, {"SB",0}, {"STRM",1}, {"STRN",1}, {"VWM",8}, {"VWN",1} } }, + { "default", { {"KWG",16}, {"KWI",2}, {"MDIMA",8}, {"MDIMC",8}, {"MWG",64}, {"NDIMB",8}, {"NDIMC",16}, {"NWG",16}, {"SA",0}, {"SB",0}, {"STRM",1}, {"STRN",1}, {"VWM",8}, {"VWN",1} } }, + } + }, { // Intel CPUs kDeviceTypeCPU, "Intel", { { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"KWG",32}, {"KWI",8}, {"MDIMA",32}, {"MDIMC",32}, {"MWG",64}, {"NDIMB",32}, {"NDIMC",16}, {"NWG",64}, {"SA",1}, {"SB",1}, {"STRM",1}, {"STRN",0}, {"VWM",2}, {"VWN",2} } }, @@ -76,6 +82,12 @@ const Database::DatabaseEntry Database::XgemmComplexSingle = { { "default", { {"KWG",32}, {"KWI",2}, {"MDIMA",32}, {"MDIMC",32}, {"MWG",64}, {"NDIMB",8}, {"NDIMC",8}, {"NWG",64}, {"SA",0}, {"SB",0}, {"STRM",1}, {"STRN",1}, {"VWM",2}, {"VWN",8} } }, } }, + { // ARM GPUs + kDeviceTypeGPU, "ARM", { + { "Mali-T628", { {"KWG",16}, {"KWI",2}, {"MDIMA",8}, {"MDIMC",16}, {"MWG",128}, {"NDIMB",16}, {"NDIMC",8}, {"NWG",128}, {"SA",0}, {"SB",0}, {"STRM",0}, {"STRN",1}, {"VWM",8}, {"VWN",1} } }, + { "default", { {"KWG",16}, {"KWI",2}, {"MDIMA",8}, {"MDIMC",16}, {"MWG",128}, {"NDIMB",16}, {"NDIMC",8}, {"NWG",128}, {"SA",0}, {"SB",0}, {"STRM",0}, {"STRN",1}, {"VWM",8}, {"VWN",1} } }, + } + }, { // Intel CPUs kDeviceTypeCPU, "Intel", { { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"KWG",32}, {"KWI",2}, {"MDIMA",32}, {"MDIMC",16}, {"MWG",32}, {"NDIMB",16}, {"NDIMC",16}, {"NWG",64}, {"SA",0}, {"SB",1}, {"STRM",1}, {"STRN",0}, {"VWM",1}, {"VWN",2} } }, @@ -129,6 +141,12 @@ const Database::DatabaseEntry Database::XgemmDouble = { { "default", { {"KWG",32}, {"KWI",2}, {"MDIMA",16}, {"MDIMC",8}, {"MWG",16}, {"NDIMB",8}, {"NDIMC",8}, {"NWG",32}, {"SA",0}, {"SB",0}, {"STRM",0}, {"STRN",1}, {"VWM",1}, {"VWN",4} } }, } }, + { // ARM GPUs + kDeviceTypeGPU, "ARM", { + { "Mali-T628", { {"KWG",32}, {"KWI",2}, {"MDIMA",8}, {"MDIMC",8}, {"MWG",64}, {"NDIMB",8}, {"NDIMC",8}, {"NWG",16}, {"SA",0}, {"SB",1}, {"STRM",1}, {"STRN",0}, {"VWM",8}, {"VWN",2} } }, + { "default", { {"KWG",32}, {"KWI",2}, {"MDIMA",8}, {"MDIMC",8}, {"MWG",64}, {"NDIMB",8}, {"NDIMC",8}, {"NWG",16}, {"SA",0}, {"SB",1}, {"STRM",1}, {"STRN",0}, {"VWM",8}, {"VWN",2} } }, + } + }, { // Intel CPUs kDeviceTypeCPU, "Intel", { { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"KWG",32}, {"KWI",2}, {"MDIMA",32}, {"MDIMC",16}, {"MWG",128}, {"NDIMB",16}, {"NDIMC",16}, {"NWG",64}, {"SA",0}, {"SB",1}, {"STRM",1}, {"STRN",0}, {"VWM",1}, {"VWN",2} } }, @@ -175,6 +193,12 @@ const Database::DatabaseEntry Database::XgemmComplexDouble = { { "default", { {"KWG",32}, {"KWI",8}, {"MDIMA",8}, {"MDIMC",16}, {"MWG",32}, {"NDIMB",16}, {"NDIMC",16}, {"NWG",32}, {"SA",0}, {"SB",0}, {"STRM",1}, {"STRN",1}, {"VWM",2}, {"VWN",2} } }, } }, + { // ARM GPUs + kDeviceTypeGPU, "ARM", { + { "Mali-T628", { {"KWG",16}, {"KWI",2}, {"MDIMA",8}, {"MDIMC",8}, {"MWG",64}, {"NDIMB",32}, {"NDIMC",8}, {"NWG",64}, {"SA",0}, {"SB",0}, {"STRM",1}, {"STRN",0}, {"VWM",8}, {"VWN",1} } }, + { "default", { {"KWG",16}, {"KWI",2}, {"MDIMA",8}, {"MDIMC",8}, {"MWG",64}, {"NDIMB",32}, {"NDIMC",8}, {"NWG",64}, {"SA",0}, {"SB",0}, {"STRM",1}, {"STRN",0}, {"VWM",8}, {"VWN",1} } }, + } + }, { // Intel CPUs kDeviceTypeCPU, "Intel", { { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"KWG",32}, {"KWI",2}, {"MDIMA",16}, {"MDIMC",32}, {"MWG",128}, {"NDIMB",16}, {"NDIMC",16}, {"NWG",64}, {"SA",0}, {"SB",1}, {"STRM",0}, {"STRN",0}, {"VWM",2}, {"VWN",4} } }, diff --git a/include/internal/database/xger.h b/include/internal/database/xger.h index 1f7730b1..ccc7a06f 100644 --- a/include/internal/database/xger.h +++ b/include/internal/database/xger.h @@ -22,6 +22,12 @@ const Database::DatabaseEntry Database::XgerSingle = { { "default", { {"WGS1",256}, {"WGS2",1}, {"WPT",1} } }, } }, + { // ARM GPUs + kDeviceTypeGPU, "ARM", { + { "Mali-T628", { {"WGS1",64}, {"WGS2",4}, {"WPT",4} } }, + { "default", { {"WGS1",64}, {"WGS2",4}, {"WPT",4} } }, + } + }, { // Intel CPUs kDeviceTypeCPU, "Intel", { { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"WGS1",128}, {"WGS2",2}, {"WPT",4} } }, @@ -52,6 +58,12 @@ const Database::DatabaseEntry Database::XgerComplexSingle = { { "default", { {"WGS1",64}, {"WGS2",4}, {"WPT",1} } }, } }, + { // ARM GPUs + kDeviceTypeGPU, "ARM", { + { "Mali-T628", { {"WGS1",128}, {"WGS2",1}, {"WPT",1} } }, + { "default", { {"WGS1",128}, {"WGS2",1}, {"WPT",1} } }, + } + }, { // Intel CPUs kDeviceTypeCPU, "Intel", { { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"WGS1",512}, {"WGS2",8}, {"WPT",2} } }, @@ -66,7 +78,7 @@ const Database::DatabaseEntry Database::XgerComplexSingle = { }, { // Default kDeviceTypeAll, "default", { - { "default", { {"WGS1",16}, {"WGS2",2}, {"WPT",1} } }, + { "default", { {"WGS1",16}, {"WGS2",1}, {"WPT",1} } }, } }, } @@ -82,6 +94,12 @@ const Database::DatabaseEntry Database::XgerDouble = { { "default", { {"WGS1",32}, {"WGS2",4}, {"WPT",1} } }, } }, + { // ARM GPUs + kDeviceTypeGPU, "ARM", { + { "Mali-T628", { {"WGS1",64}, {"WGS2",4}, {"WPT",1} } }, + { "default", { {"WGS1",64}, {"WGS2",4}, {"WPT",1} } }, + } + }, { // Intel CPUs kDeviceTypeCPU, "Intel", { { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"WGS1",512}, {"WGS2",16}, {"WPT",1} } }, @@ -106,6 +124,12 @@ const Database::DatabaseEntry Database::XgerComplexDouble = { { "default", { {"WGS1",64}, {"WGS2",1}, {"WPT",1} } }, } }, + { // ARM GPUs + kDeviceTypeGPU, "ARM", { + { "Mali-T628", { {"WGS1",64}, {"WGS2",2}, {"WPT",4} } }, + { "default", { {"WGS1",64}, {"WGS2",2}, {"WPT",4} } }, + } + }, { // Intel CPUs kDeviceTypeCPU, "Intel", { { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"WGS1",512}, {"WGS2",1}, {"WPT",1} } }, From 801218ba10fd2b849fc27b81d8b0922b90276c5d Mon Sep 17 00:00:00 2001 From: Cedric Nugteren Date: Sat, 12 Mar 2016 16:04:23 +0100 Subject: [PATCH 44/50] Added performance graphs for Intel Iris and Radeon M370X --- doc/performance/{Iris => Intel_Iris}/SAXPY.pdf | Bin doc/performance/{Iris => Intel_Iris}/SGEMM.pdf | Bin doc/performance/{Iris => Intel_Iris}/SGEMV.pdf | Bin doc/performance/{Iris => Intel_Iris}/SSYMM.pdf | Bin doc/performance/{Iris => Intel_Iris}/SSYRK.pdf | Bin doc/performance/Intel_IrisPro/SAXPY.pdf | Bin 0 -> 13361 bytes doc/performance/Intel_IrisPro/SGEMM.pdf | Bin 0 -> 13099 bytes doc/performance/Intel_IrisPro/SGEMV.pdf | Bin 0 -> 13626 bytes doc/performance/Intel_IrisPro/SSYMM.pdf | Bin 0 -> 12562 bytes doc/performance/Intel_IrisPro/SSYRK.pdf | Bin 0 -> 12898 bytes doc/performance/Radeon_M370X/SAXPY.pdf | Bin 0 -> 13342 bytes doc/performance/Radeon_M370X/SGEMM.pdf | Bin 0 -> 13227 bytes doc/performance/Radeon_M370X/SGEMV.pdf | Bin 0 -> 13701 bytes doc/performance/Radeon_M370X/SSYMM.pdf | Bin 0 -> 13228 bytes 14 files changed, 0 insertions(+), 0 deletions(-) rename doc/performance/{Iris => Intel_Iris}/SAXPY.pdf (100%) rename doc/performance/{Iris => Intel_Iris}/SGEMM.pdf (100%) rename doc/performance/{Iris => Intel_Iris}/SGEMV.pdf (100%) rename doc/performance/{Iris => Intel_Iris}/SSYMM.pdf (100%) rename doc/performance/{Iris => Intel_Iris}/SSYRK.pdf (100%) create mode 100644 doc/performance/Intel_IrisPro/SAXPY.pdf create mode 100644 doc/performance/Intel_IrisPro/SGEMM.pdf create mode 100644 doc/performance/Intel_IrisPro/SGEMV.pdf create mode 100644 doc/performance/Intel_IrisPro/SSYMM.pdf create mode 100644 doc/performance/Intel_IrisPro/SSYRK.pdf create mode 100644 doc/performance/Radeon_M370X/SAXPY.pdf create mode 100644 doc/performance/Radeon_M370X/SGEMM.pdf create mode 100644 doc/performance/Radeon_M370X/SGEMV.pdf create mode 100644 doc/performance/Radeon_M370X/SSYMM.pdf diff --git a/doc/performance/Iris/SAXPY.pdf b/doc/performance/Intel_Iris/SAXPY.pdf similarity index 100% rename from doc/performance/Iris/SAXPY.pdf rename to doc/performance/Intel_Iris/SAXPY.pdf diff --git a/doc/performance/Iris/SGEMM.pdf b/doc/performance/Intel_Iris/SGEMM.pdf similarity index 100% rename from doc/performance/Iris/SGEMM.pdf rename to doc/performance/Intel_Iris/SGEMM.pdf diff --git a/doc/performance/Iris/SGEMV.pdf b/doc/performance/Intel_Iris/SGEMV.pdf similarity index 100% rename from doc/performance/Iris/SGEMV.pdf rename to doc/performance/Intel_Iris/SGEMV.pdf diff --git a/doc/performance/Iris/SSYMM.pdf b/doc/performance/Intel_Iris/SSYMM.pdf similarity index 100% rename from doc/performance/Iris/SSYMM.pdf rename to doc/performance/Intel_Iris/SSYMM.pdf diff --git a/doc/performance/Iris/SSYRK.pdf b/doc/performance/Intel_Iris/SSYRK.pdf similarity index 100% rename from doc/performance/Iris/SSYRK.pdf rename to doc/performance/Intel_Iris/SSYRK.pdf diff --git a/doc/performance/Intel_IrisPro/SAXPY.pdf b/doc/performance/Intel_IrisPro/SAXPY.pdf new file mode 100644 index 0000000000000000000000000000000000000000..3a51f306674bc047e754793c0c5c24132cf514b5 GIT binary patch literal 13361 zcmb8W1yo$YvNk$E@BqOzFa#&SFgOGYPJrO<41)w2+=6?60KqM|yE_DedypW(T?4@# z{vk*1`Okgtt@UQDy}I|;)m6Qlp{r{4S9D4eQmkM$E(|)qHNSPgF~3PY2MjQf9SAe| zfFUS|0TOqB8ar9SY$c4Hpg=|m0S^ie{I2@ASMi9VGO9P>C-AW{=FI? zNX^|23IvH8I~iNU%z+>!V{@n@kP|MT0{ou|y#H&$liZWE1Ju^(pW+~Kn5`51))C0} zcREM~>IidofWY-}{g=Kcfq!j>Jk-|Q$pXmF!N&&#Nm*Ju!JhymWewjP2`B_+3Vm9) zqZ543Y%tu?>h$$v>GFo+x7QaJQ8K@@co8v1#dEI`(1emh^bM zIH^c*Eu1{`oo_lMkOR`&Kg1dLf`}#&B5VU=+2(_74j`K`xssWy#b5Mk=C&D4+~;`zix z*u&MfhPr6y$!8=0B<8J7mm#3q~9*zs9X)6LZ zqS5bGjVJ4*S|2qH@5U0RiW5Og-naJ|ONPRpmp+CQf9Q|9HfIc5Z_g7AAGS_L#uOt< z_R$jMA4I{o=7#rZL3gVyk9S@3mx)KhdJpTrFPFc0DMHK3;&b~eUs}C(KnpCXr5!l_ zRjqcppK&)Ya(}z&$K0mhesy$zd-Pyc(EPA_N$ldKF|TUVRQlUw-!!9tU@``>fp7Ij zhfX>Eq+b6Nz&vBMn@EA$fzsYm#W_}cg|79{SqBr#AQf5icjt)Y{W#`s)s9S}JZ5MN zVclt+#tkNpn~}pRaXpr8BCAQDy)EJBvm^@NfnfrPHE_IXvfg1tUAd*h za2-?yrYCN!6lC^IWPSBf;x4a7GAvpSjYPr!7ZSU+g~YR7)tdw{4y=|;q9FU$7c1Kx zN%EoL4K7xW!w+mevex4?ddlcXFv6*P+qXrei?T##%9hc3<0F+?X3M`bI}dWO7y4^d z_j5hmI~cQWN*Y^cD=p?0wR%MKydQ&malN4VV+8^ttyxR?iYoT5Revy+KdX12e&s=( zB2G46?MgMMvQxI_{gS}R#pue(gFA*w_G9XlBoP-mkk!~Y=j6~Ki*!zl=wobCgc8`< zA$*67Ip79OTxP>-5cxL1>sQGzG&*FEi@GpGMiOe|Se@Le*pyxP7bl=3CpjdaK^j?> zU((O-G!z&zfpH}I@$?3U3bI=Y{J}Ha^coaIzqG7?qb{~`q=lk7gY0vI?6-LlGQTM- z#H<%KacCKaE#qAik)om(e=BTp6n=FVA#o#{$nDzUcF*8wqG+A@t^NdC-XwN(`3{PT zsKs6~#TLwIX?%=tdS2_eT_l&G01XAH!=;4;{(*il|J6-^XyQ<#hH@?*Nq?kD|1IKp z?Sk2nbZV4Nxe4JgdN9))Ep^~F4wKy{;02e!b z0j;%248U#0@?7$>ao{xXhL)pS&is!y!K){ zb}=-jn%B~TZu`|5bcieQ?uF0EaTbuYzR--scZSX3K3`0FLJvL2!hibpb}*dY^j^iu zkBHP0mTG-d^y;SI^Cb*seq#>R!eFIyOf$-ZYvtt{LMRPe83?fFXiXu*ux;`R|CnF1hN6tqp}%7 zj#N@8dT_&jJKYhoHwuw&0;UhX*81d~aUnIWJKsBn3TQ*rr^Tz zL~%L7=>-{ApPt!On%p1W%tEVb{MFaCyxCETX@AQhy&gYO9b06A3btv6K`UQp^_nmX ze8+{+cM%F<>0YF{1zo3kd>T^y$mzz`D96mUIU1S1(R-`&#Wm4VTL%&TuT$+gpej5r zmq2E}j9huL*L$b1*C($?xlC+TTjkZcBp4${4T?c~-5lL7a^ItQ3ls75NeP@rM4O2Z zdT@E%BNn;#pd(R{#hj1ulMZ-8j}k+A(Wve3nZ6UM-qS_(3UnMIO;n4?FC{blU7Yp< z7y`cnXicglWQV_Z!Tam9eJM>n3=we~XA?^3C`)n>x|@S=f4YkUxomhZb~Q$AlA1yV zoi_3~Mi(=d*i4@<%rfi51pJHI!sMj3pZaAsJ=`r?zL8LnH(;p@)c<>1c#DU`?p`Ij zm{|A;?kp3$r+#YQIvf5ciqHtN2+X-PNI)!rLDe5k4Nuivb1z?Y z4ANE!%Mc!ou{@Qq8lAK!@>pCB%$sJ=Jy%B{cC&ew_7OQ@y%r;c^n@p($6puwDhcKC zp`-RAZnGdd2_&XEnwjbqJotI01kE-svvRp2tJ*~TBQWesAodRr0&6!;*jDmv7oejzmgja59FNx{ z$I%*t?1c%tgjx+&F1thC!FI2HO)!HZ146RinU5lRSEChjhCGX`l_8C9r*kd`qJ}cJ zOD1V!i3{Po$i^2T8SEI(Brw-#FdwOo>aEmjCcE}=Wr~_-t<9?DQ#O%HE)}r+On*AV zbJ+7FG2H`^We?>|KDD;UxAc`&vs}>UL_`$PP!pD;8e~!vmxm0gFYRAAPX}nw41Z9k z@8+C`_|&%ozM zHCk`^h!QdHI)(q?9g}|+e=jH8Lp7@^uC$~tP#9AC7@Hh~O#xnkl4zBMdGF94vGpmq z+0^jPiLqZI%-+8x0g&_x7=Qr1OCtLXWWETM)e;k=9f)1AcsHI)nP`~pOuh(2#Sw2& zDkGIqM^kBb8Pml%ze-mlYGJlIEj1&~Cb+l9tpcPv5Nw-Bd_ND@&?y3t1Z6J;D^-n> zTlxUcmk@C8%K_7Lzyk1N_Q4uYZvItBWEH{7j^evKupo7@hFhSGF=EniQyf-@KuyKq zi}>$y;0Tn+YffLRRag!Z$=EzwX9*%Zr4=74ptk_g|BM1)jSoVjp-#vzblQjZHBVb})due&8Yz6|c>A5|X|jG46YO`EZ(vn3m<01}fO_JTK0yAwWfu(2MruZ! zU}q=7K#fKIqJUayykdO=T&@R@Y{{&`aAQK&6Wo#6r;o}DoOK*SYh5lzB8=8SzcFizDC1ODq2cMmsYL1lf~?Mb)w~9c^eTDML5ARe%Me zUX>X>C|9{&CE18Rlq9(>%f~7K_YqBDmO6#4I$A5eEN1WvU{w;-A0C8awfNn?Av`F^ zWq2(%$mIhQ2 zeq^LQC?@){{&3U#?tbxYfc{b${n!&-%WS)kE-lL6iKTjd9q$D0gSwX}6YYe_rpAfy z?a|LY(YcqpV{-#fA=ME3dpC*mr;g=)cc_2}J?hQcaVXh#--f86V0tPI1rcnlV$h%| z)PmKk+)ZWVda^H8XBE>UYF+l57YHFwc{S%2w1sda-)Okg~^0)x6!fiqy9oyB*p zbyUhIpKNppcYRb1S*g`XP{IHi=3iMOe$o`UJ)T^M47CXM8m;VdyOX7kb#N257URuuT+rV=ZvkB|J z{%C3x4IRBk8oEOB%k)N`$Lt4uB^*CS14#ROXNpn!BGr}(7U~SlDmbV#q~Gu!mY@+; z1O&Z@D=7b-(B$>?K)b-Dw>JM9Sa+(3N{8J&<3to-^iRc}df?;o#0_gzKOCtRz0J(l z`5AY+bPbmS!DdRC!}#?mZ{VWon-1FxdPh==ZCe()c!aT72CLob>NHz;?gr**iqBck ziDGGRaq?^|UGMyzcnmK5iksEPDslE3aNd~Q+9G%r(+l6DrwmK^d*pC?RDW&J;B8!N zVBUfO6q9-p)#x%L`h$5wY5=2MUOYVw)s?v@J|LFn_pFy03cE|+3gmPPkPs+qHvJ6H zOFi2g!NY<_rROWR9ZST@y;@Q8)vv?EN9Kx)4w)VM&1Avb|3WmP;30W#q-@fMMBs;%w0Bul zahl+Vh?f%Hr8KhUq5WM6!>D+>CxORmobb(Ssg6J#X7r)fBUqFLnV8se88xj>Y;d&v z4qinT0(@{GD$+TGC;H{<#m-+(mGkPFc}cgy-3v_D?j^)*`SIM6q- z$~#PD5^?xKB{~%ZmymqU$q(5z!t2Nme0$Mb!*wnVyf0(Z6xx>Nx!dyjQa##YtvnR$ zc+NeRr_WNBtoJtA%J#XN=B9>(v}de7aL(+%)SA>asVSPdEz6&&HB$w-YU)?Wj^rC0 z<~19rrYN*6@H@BBbESGTDeieFm~Y*PYmS+k-8(&(bVx=|?TyV$$H|hR=ftiV%Bjr{ zbJY`!)-!7u%P1)3PMxzs*iQa1V&S6NF&TTki%6UIgg_WlCJ*X;PqXk9KDg+1T=##X z=V@VG@m!JMOkE_h6SmDR8nMOXqv9D~Fl#_gqY6z~eiElq#!rI}d((ahu1Q5q?+s?a z{6S#efK2gICguZoO+_85)fa~pS)gi`*g>`#3W z8w?Q4fq{-W;%8C3Ne=bMBOCq{uY(?6H5u%z>Rq8@kpT_mdp^2TVajiD)-cxu(eic~ z@&$y^^9Ja}e&>-E1|$wAAP+?PH)sqIs~#)XVy~eD4xg~*{nG6ZN5piubyNX+4y|zP zQyWG+^qltW;3FWj!;j#Hu(dHRgD2k^(;!ZrJM9R zQ%+BLg1-zJ8&i@dV;UW$;MB*3R@N3(e4i}m_gSj?4cfC;fmlVIu|T_ zuE8Bs;#H$fggAhSb4}hh|3r9`jkjvSV3W=;!Y6zyC2)KCH~QTI(eiN*WoF%;zfO3GiFxt40;5}>Z|B78IxrU^_bC-I!T#jx=?+bBZQRfCgmV`OUDGtJ(+x&i5fLuQILD8io;sZrFk zFQ?3Q8uprAGpGfDRa37yAbxBE`j$5WAR&RX9TL^|7T?3(#z%r@K#<|gzTS`%DPiH~ z#nzrl-Y==Jg(OdUm>alOmokg{2k>SJ+y%O_82UH$HZzRyv6TonC}VbOXsx}4>qwnP zQ|9Emsjz)^R9(tMGg2Bbgj1#?1iVK+&}SD=mRv_q4tJZYrdaB;K(v{f3%{t;eJIDu zVEn30S}|nCqj>gc#8G2zLfO1^6`_nXghPb$X*i+(dzs%BPB^dSu-T6I^Mu(DZMQ$- ztY&9jOM9z0Ra$FQeI5$oio^&5yFt`1M$j&@NkWBgV}PN;x7m#Fo*GGa`L+(Dxb49R zU;pPNl_UiouRm{zX2SW-`JXqcPnI<9oHLi~av#r5@d}@pddc7{8TIW^2kkK z@|6H(`sk%#nPuJ9*X4Z(mDkn7ePZG_WXS#E(UN!L z-NTR^BJ175N#YP}`Tp=^rX^O|O9zqM8T8t(?2cPKb^mrBv~=J2=#b!6EM%C${Ep{E z+IvR7sQjS2E%cCRU|ckY?uNp9=06X3y^;H}SiglPYI`OyHRKQ^3I=vzFXI(V6w&rLE#wzS>r5XI z1y$r4e(k=!gJe8AiEckLyhNTv25<#@qG?SbVv(e4maf*(kHVFCo4Ymx-@*kfV{MJv1!}DAMnT~MB(h-Kh14r(Lh4_5iYR_>q|?-7J`deB0nXUH)GQPnw(K6UV_g$JMxQ#F!8i&qXWa}5 zcvjt)c8odT?-a!2JeAB~Co%qhJSv`)2Api6JLn`YP&N>IW+l8SjdArs_LMn;nC9@s zS(({17$862x2ej@gP+F7WDw10g~IEbiu=5Lm@${JJ9-T_~c{+#&vvVMso=i`w@KD@Bpzf7C znMW-Adpm^;aBJwVBM=Gt&F8uWA1lOel(t*%ML-&+Io_3$6o`t|(*5vT%JiGkC#Nui)X``L?nKl}b?4xHC69 zkVQBb0|xmeXi0vGu3a;*|DC+CYi?sC#PptR1~j+vlcC=$168*;Dl56UJ7CrZIGNxc zJfruWUiR4e_foc-(f4Rf{sn0T+uYPAe8~OTn5X>3@Q;QKjK;lYbz@{8kbzb%uE3&F6a+xt2e@=>XI!o8DGk$7g&j%6+` zbf$7%6Vi6|7AlMoQ7bp$tXJjfW9GrFb3*F!H&`tLY^ZO(aQW{6!o-P*;cQI@Un3=}kxY!afI=VaiJ}hWS zHZ7A?WnswljP@~cQU$5FyOLffHA&R*CJz-hf7{s4mih6c>QWQ_du_ZS!0}eV2lshE zq45hS`f@|(ZkReewA8XHEH-8RPk8BqukaUM-E_NBXjP54-#Wgl3H(+UGX>;>E8?_)%P>i^Ii(7j&oj-xo$OA@@o8iueG{%4h@>hx zVM)jvGSQ2^=kb@?Sv>Q$)SL(wl=2F)xipnig38>UuLNzDO zkZv+6Nb#A^Y|1lXsSS6E;%yGTWN{-I7sJI;lGny;4*khwa=;&E9}G`FDRAuVnFXhz zF-&n27m08f{o+uxNKM5&;lys9+r29(pJYI3+NjWxE3dig&T6f;4Mu^MCfTa7k1_UUioMfz|V0{=m| z9nGupMx3la?Lo)YEp&9C&ExzD79KmB*}}<+d2`Z=`CQ-Ue(Ek~$91Mbc+xP{N7bN* zCo*dfhMpv%mfmxoH%belYNLU*W*Ip9MDk7*M8L1~b*yuumg8U#%SE%oPuF(*5AI7! zSf6>PN{)KTKNR(nr(FDmPq6XYt3Fe-fge_)h=eBmIjI z^$#8tNY2vK5vU7%;ypr)3_VlX$LE<1dTpOXj3&C8?zAC##l z%G19nPH<)xL>1};)CIx0VQ>pjHz$1zkQAIQ1r!tnNpS$dPfAqav?op=_=zg^mwN@0 zbaRqcb$Vin2?{=GhFiCWIjGthL;fNlM-^!?Aost-|A(#h1U;G71<8twiy1pYO@Ta5 zQu_bme?8s(>v7*c5w2{XjDRGdE|w7JlTy%oIHwErPi7VeH$Tt+BQxu1P5)+QeNAvx zQ=KCU8EfNhQ;K;J7Wc!-LIqI zg#{rfSM5I|m6)=(BOp(ZH7z4uZgoEzNnNJ8^tQ@q8(TC#@|tc2MBfDlc z9daC9gZ|lAU|9dmkJldY#fX%68||uzqn#cS;3OL?+6&vLBK3UF17M6tj*Tg7hxZJ#WAlEd zSp&}|lWJ3}IsnYZ?z9QqZAhzMy_x0X=VP~FwP|OZ^3lIbjk$D5FbMQHB$c4L>blVM zPR0UM=0oe|ZbED<2?6^12udyYAtOd0Kv;FVGq#sKWwWP&O7|Lz`RkH}1wA>03@awk z&e2T=a@?S;?btD+i5Icj1ySgfY;W{BVVGse{{|6`uSEy$Q2dpgEJ)&?j3D= zC@4-+Vklo8D4$TL+m9cLoGcbBdXDLwRj|45mJZdsgZ%P0oGcFMQ zqv`7(M_!n{(n!}M;p53kUvJ0!AG|ah&(DR(-ETtI-NDAJxN z`3@3($Fuql+(rP-3KHU5#?WW>M&K2Hf@lN@ATfb&T_#DKf0+?OA&PS*E>{4L9f^}4 zWQB_lmCbL+=-C2-3OjbXFVi24VbK?XLH#cXRYIJA;{6oWL0Zw|8%W6FxJndpXmsLY zGKhqt`XR%hD>D(ycns z?2+-&9e@*y0O{9oHYv@_)E_T*YIbnX_-+VM0M8pImX8uH0V{vguc z2;=-Jtx|z$iv2LG9*0U3Yj8cr#}^;*J|e$~^a`L^bF!=FGt?7&ANsmmeMQHP!A{c- zzmc>RZ|TKR1WwQCpP^&um6RurH=0Of1~nso1bTU(j2N>qYgWt;DL%PIIk&g8c;}(- zyX~@l8-ulE1tkO(Dd^hilx56ld6@iYscoo#Gkkru_^J&UJ0N9>;~=Olza(Z$pFzFM z@J%_BS&R7@;~xTz*Hes6%zDah`JtLzYG-PCrAZZ9S#A>cGRH-D1_Lsm%DRYB|*Br1;hN#ocQh!cUMlr3qMiLf6DsmBScs8O<3XYJtUyP}Qa>=P9W@?L9s$ zan5|?A1ao4wJ?i-S|-A!;MzQW58r*VN=xLy>FI@}#I`l4YRP#)pG(zLH}nY@5g- zio>)R{*dj3=|JugE*dUPi^EBZA8FGQ(s=Z->xF}h;Ej24=nf%BcXo$sCLn$Md9nmIjsJ(N9yJbrszo}u5c+|%B_ylJ~p zMG-)uMnOg?LutS|4rb{h=+Y0q53cwe@!2GThs2pwh(m)Dn|+kmmb=|bjK`i=+ib$f zVT1rO$f3ZwXyY;WoTr9Y4qO5z=Li{jIgGs3G7`f2gXlexSU5^Jxjf!vY;u=1G*V}R zHGmb9wS^@^n^dbu*QRE#;l{!S_SJUL!mBUX1m8l{ViIy%QkKj*Q{Q{>OYcN)xv6B^ zWz5Ug+_tY_9^yKg{(;KmTW9;sLqI@IK!4VV@x*usc)V)*7$v+H17c(L)m7|`JAavr zz}DIHT&MPd=j=5xalTxNT=-Cqd++JhDcSbtroAQyy?y;W+vg1u4Q;k6uKKQ#9|LZY zaJ+ERrDOX73yRE(x8IJL8%!I#s=%(8u{OVWcfkbfo*!%J{`ezg4S!Shx8t(r81G!e zPuIb*C0NihlTWQr^drwBIf4$tg721Zk!W8>yr0+G+)U)RKi}f`kN8V=3Q^n181-L7 zx13^aV{W?!`Ty|!@jL7Cdqh`4kQY@XIu2$9x(|slzVeIlZ@$e&1O5rf6|dS}4ZS*& zNS2Vz_Q)a3!Ou?3G3;~ei|dntl$mJu8iY-T;&(guLDqBDQ`ZWJZ&((Z>;yZ7v-nnT zUad>IhqJLvYo)bcUE%f6rO`>_jei1vV*PZ)pOuC(dN$#kS0`7{Xnn|bh@XxMLxsg0 z4T826X`ATd8HC=sC@iH5Ca7E*~Dh1ulPk{JSDs8+zHx z8y&G+ncVAl%djom_N(|2&3HqvUoo}N%Hb^QF^yr3)#?5Dg@%`mnrvers(H%lHb+#5 z^gv|do5X`wC1XwHhVV+cYB?QJy*YiTQeEAim-D-Gna`REAIr>s=l_~%ac`+OIG&x< zC{WkbxwNe?IvVl#b~B2S6CCE1>j;NTfszp$(z*Nxk}g-t{)*V^n2it(WkQ-V*=M_zAk-k^7XwWNx($eR&1zABT=EsV0Rq9Q8Kc=Fq=?%Er%lzm<_D9Phlh zySp(7X_NCfO8X?_SO#i^{jvTrce?F4_I3C2K*W9N{H_XflsMABRR5rP!>-7&!#i^w z^_EHimy&>Q_j(sW+WwXoaxNzJy3^cEt!-xK|JUsm^(XJAV*jnr)eE!R65 zH)Y!mj<#b9+K$9t4IAkbZDF@5m*n1D_f`6PKq28hA%QXLXVtg&+auzE6fZMTa|qrS6KIL9$S)J@j080s*YZZBzf;#lbjtN7DOm!cq3f~ zp5FeisGI-aVJOBw%Mc`O;f#MvTX-G9Q`q`Hcbb;AZ*3hd{{>)xBrMI$pzzv*r{V%# zAQv~t#?sc=5y;O2vUi3#L9L-?P8eV=E|4h;Fd`#^132UxD~P z3NijB1t3*t6Q{p*8BZWQ+jwtm178w6QTV$r|B#V^TDw4fDb8~V7&4B;LIN*uR(+6n# zZw$->PrUwbjGu=aUY+t^7%yDYe`9>y@FJ4`lH=q5-!?eF|4Sb`2mgO#U=D6Rcvkd( z=>v1{asBuDz#RNs@LcU*a!wA$mex=Qcv%k+q-yB_h3^3nBx`F1gQtjp6FL$WS66-6h?jNOyO)+`%vI z|9}6zcda{Xu@3KZp0oEkXJ);7@ArKU^E)XSE+97_HnZQd--_R`-?+X#Hjti&9%gEd zEh>r)khF)GI9kD=QYMZNdNwH$5D!p*hZhI}^6>-t_+DTGlwfB6*FJSCM_c45ReD)_ z6S$=n*nwWf$r0}4h#dCL9%kkQhS)#!@^XW?d6C#(U$6)01-7_2HUw(+FbmMXXG0HA zcY{Od0g@(;CblpOdcZpq3y1?fFOr~2|Gx(a{*M6D1nT%tIzSQzbwpk|&4kYf0`veGD_ck83IH;;$jy<0fMI5khj}|V zBKOP=+cmAqp#M8WFeZIcE6qfM&+F?2c<~X7?-R;vi0ml}FFfGUcs&H3k~L?vPEvg?z3b7;g2C(>KHzpu>$Ih%@lgK^X2|4Y zw?r7%5bWqy`(!}aP_j0nf1~DDyYw67>?*&}4hvcBYVVEBN40P8xP1GPRQ*L3*wQ=0 z#aIiYi(_oTy2g?C(uVi|)@vmJ{v0oF7{NOa*eTdQKeMT=Zf8EZ%30DaB+_q?d9gj$1W~K(zHZ3WYY-Eh zy_ww|uY$brIB;-qo@9fpxI+_w7@ltR{C8^_OC6JkRWU zNx19sSyi=^(K}Lu^F_uH*{()W6lPy>enGRP9(_=7Lqcpeb?#Z+1MKT)h}$786mU1JG=> z+!)p;o_MW~u0Q~D*H_9X6KkO|p#UDd+onHjo+jJ&Drg@`HFOi}%7!;QxZY(7y0Fns zUqPR4o;tHB_upJCPwa@(g>AStub?`4goiBWrP0CsuElO7kERbcMNT~>h!2)LC#6q4 z=UUv8jqWB$x5Zpg#LsOyMTkm+QtxMy@Qu9g=ZgC!#>w2S2Ja2$Jx_c}xi1e&7xbGw zuMW3%RPHzWwMxBRu9me}Wq(z9fX*vdVqYBf8{J+n{JC=|+8$oG-5NIXak*L_Tx)6h z^XI0tC0F-CIb5~SrCa7{Fz$A!X02}|WzF@8(VttvmO9bK99BcaKVF{qgGQKDmwH8h znqoHlg|u@5U$ORvE{dQwDL?~Hs$55(5Tqh@Yqg9c z=lEs2@`{`GU5%aZw#x(XPq`;nQm*p_t0vL=em8u0`6iS+wza+&kAJz7cRdSiqiED{ zB+%H~5U8A)l%dcxu<>!EIKJQ)c!s)>hA%mw^2(vI8r-NF-IC?~MNnOch0}Ekm_YL9 z>bRRMZxJkG?f!+dxgbiA8MQkQU7^#z_uKk=@>o_Bd7><9ZK(LpaW zPZOI>zoFelpvkpa=89PEkJGD(vLHr#jCxOTbEACgB=6>V@mTN!SE}5Rw!M}7%C|Yz zMMQlz6&GUhWYjU6B?U!E*EPVLUY+`wXS@g+B?C^8Kn_xdPViEchUs7Uy6$Kqk0f$C z>%g~=%awD$W>ot+tPT&y-90M$lIrK{1qO7aZT|#|e;OgB_&kfZz&zq|ICPktT%X6( zl>;Mpk|Gz?F{9l6c!@W7s9kF^EQp->#D~&y>(k&<=al-*h4q zl3E;-rGc8K42&f;qblH_-1i%3q2wI4+Lli8)|MbZe9X|hWLdMXgzFr(mcK*}8Bd8p zVx~isL~Nn7wCVxPRvfoU16+Angkn^Nr84bDm8vwt|j?=or^1$*>qYN;?(s~S@z+AS?K~KXj`7dC3G{yT- z?V~hL9_-wsL@Pl}QLxfyAO?OHyJos)mzA+eNxycXC#Rd#Fl_`$pOB+}IW1pp=g7x$ z)hq4e)5P*HF@F^IWKk@xAz=VG@3?%`b3%fgV*tq7C7(>>ZX_^d{Z(B&QS>ltCX$3D z!J?yhR^GzhjFKZE`b84%?5H;S{D?R?Rot5d^x24np6*y#AWB1dyM(P}V5Zz9t6i7; zD)?Bq@|zSDm9=M9+z4IYA`@9MAwSX;kbpZuX&A4Oe3SkwbqFf4nhk4QPFpXXgljn9T>XG=<+qEh| za`JwYr&bSBPD}a|P~PqUX9MnI?X{v>cge4T?bVukT_kCuGZRqvT*~6j3a|XrogIVA ziN&DRU-ggMJ@kazRQJp6oU`&Wi#nDkuKE-E)|spna4o!e#|1fI$G<-d*(8!4)ttbY zr3{@X;~JwR6XMQ3W26sN4GU$j@cC;5?q=8zOWd4UtP~3_WUgmcp-!W5Ji5C%nkw}{ zMigEt%5UKoU9Guok>jL;TWiI2Q~sXRNm2ZeYum>#<>(r|9zL2m5yYtfuC1cvA;RyB zy)!S{buvqC!x^_NRR)omq$GLt4R|M5bhJb*^g?_xARxP!n_4_)ZlgWaao!)m{P^of zmuvV}c(3;Qf#b5J2@lE{6c9yX_$_%bGGP34-}*&m+skWHC_fJDtCs_&OT>~=?qq;u z*WuV(aavJOupn+3eX38}A>ws-=Y9;Z(W_}fGaLYJT7z>m za_IB=0NJiXM9g;E-_G2Mvt*>by1(FSy9F_%J(U>~(XU4zLS!-G2TBwWnfYP86ARa7 z+goF0^EJvTp3Rj2m@s7dYV7xu1tG}o)tFH9tPo`ALibH3@_K)h-_~Q?Z_Z`Ovj$k@ zwv#Bl0Lx;Yx9?9osEvTOI3ZhM&Nx^dWXQ#!YybQgmxJu23E1O-=Y%fi^SABfb*m@A z=RWI*+S)1^@t8iBk`0dD`Wy=M#92_xAtI;wFm!FcX?4F^W1hJ?b74OwHT|_CK9AFo zG;O(DW~6!`g@Fvue~3Y-p4UlgL;u}lgJcdv6^;fdoE||k)`qtTHwve;pU*uUjR&yC zu%g&^d8SpN7 zv7Yz%3z_xoXQtnZBaS$2ayn+r-IH)8mCUe%aNW>ecMtGN#JQsGl#ErUUTeMrl6Cz||M2q%;qLBMN@#o6DzXX?s1dlnD9_DYAuS%{#fDmvFOZL^v`E2`80k|&ds zV}6)nL-P6BaaZcgm_rkBd7k)p|4=6mfNWE(ixAQkx61S6CBgCRgS&r#=(;A55$>Ht zF%?F~2Ko^GHsXWpLg@4L^S=?~ly{bgWP`LQpc$>fs)*-h@Y2s!=YnPvWI)q*yDr{5gy*#Xxn2c!bFl1pkh1D;s4 zL>Kf2=GVb>Q9m4MSge*s4r#)cOp^0CCmF7Z9B4K-SjLkV^30BUK(tTEI3>Ujt1rl0 z3eL`pnbS^bMw{hBE@uLF0F?SLkxJ`Tu{gBnNn0;nEsz@n5mmfv?vl;Mp2F>rjlgWz zndMvFNKaOzuCPJCe$NVAP?X)!2-ZsYsy$DeQ^HNg99|!UxB4!LVvP3=Lf)9WWwfsk9cHroMef?`NXIs ziX=m*Aa|wCwDos+?4Vt2E3=D0+V1fpLj*|y$fpzorz?c7Q40-2a{JQ6v^9C4vk6OJ zlgHSs*VujV$C5!=6y(FBHeVC5Y;Xo;R2mw!vG$bid$ca*AGoC$oC+m*1&?w&vGQ4+ z2-|#vtbGX3Flu7sf8&r_ugyHMZaq|QA~_1YF{k_z;en4ISDn=syA5sCM&5}Jq-V*? zd{3@Bm;~S*g9jL#AD^1^HPt?IM$@dn^o!W`MJu+&P+SG~l?(G;gi!EMa*HxV;#dW_ zJRZ*qMaHgYf%IIze>FHx&z4%%8q_tm!5 zH7__yKkt123o>vjPuEfK;G_@8yxV+^!{zP6Z_G}ouwQN?LC#})rDlxQ^_XbC`f&>m za-y*tP8P3l$E-u%y(^|MZPk3K*jy!vOE^!K|LA$i2;D8sgfB8?3zNpU)Z5#iVQS%# ze&_v0&0&H3yNKt&RM^<>>#d^G7Y-hD0$-~%*qaY-_qEl`7zm%pZTA7MZW`>%ezzoX zbhEoAapcE)b|t8-y0^+Yt!b+byw7!Y@M$brg6Rtv0VUz;;P5se?$htUND_#H=l;Yc z9Gc8{(VVdo{30b}iWJXboC*ZGY}{Xmj2dET`E-BLLo~f;WT{uSHtRlcIJWuiFB;k5s=cy;XYsahuZzN7 zB~5ng3fV?W=3K*9aC-lE{&Ad^YFFsy882?LG#NGLf_@~ll zBG#T7AD_FBr#0tlU)>u{R;2QqlTLC+&Xd;PL`idZB<`=FS^pF5OFXyEdi)6e)?T;nzC2E+!ez`?!=q{S@c)y^*B9*j`&l zE;|-{6xg>{87^Mh&x%QwT~DmWgP{qTG;OGP#5lW}6ntL!Nv5I^@_IbHQQao#6E#e5 zGtJNePqDBo^`hC9#So-jP?-`(y$=(G)+G_tzBB6e7DzUUE-Nf<5K@rxkH0{ii7s4;1U%ASpwzqZe>$* z{TxkUdM`@KZIA1|o#ez-FU&#!Bw=h;$PGf#k6A3|QPcXz?S6DI%Cc>VqnK;6XK-?jxO`g^biWjnDyGPsgOIc#4z1=XlY1iXCHbXh&8 z5bwKK4o@iVP;2>C{DZ~>E2G;0eR3Mj<2ka(b9e?FJjG2tSrJEbZaG22#Eb~(5r0O` zQx{eux@6wk7&sI9hzLVv_=Y6B`7qE_7d38daXQ|mpi2NGiHXMYonHO!W7*9=p^2fyY>pS)4q^Z+}@k#A+noI?cX`&WFc%nS!?{;AJK3A$s)Qg(dL> z1oq?FJ`TEF^k9ba8Z*)xPQ(BpMJbLmyoB+SsdRKKx8~=EpYqG~BWHoRu|1 zxak8JGUvB3{fDd-etSW5nbOguzif>|#x~r11~Hu3ykxn7y$@~1#pDC*V)i=jpWRBB z58ZFL{&vrIf9Y+-%R3lCM}po3R_tX>j&;u?Q_N6b#*0h*;v@L|5K$0rNX|-gg`4U4 zvB{Y4lKf(KKkE92olQyeM{zS|mzyf?I+ltOu^7AEHz!_kzN6k8#Aj^r{z?+xv6qb!IkoN?GDsL_oZ~N* zf8wrSbW*^U;ikcZlhVQC)uCrdBANj3vZf_LKArj+TA?Tb3$0)(?c04am<#eSHTAkE zY&i>B*I26VOn9t#q^+HciD+KtD#gy|zfD_zkBkiK{g}uIFn?UMSQql|~L5(*@i$Ua}1q`&T(JKs&6I65et z8C}h*6S)w>H^=g*2;|eW_)}ypgfG{soKj?whmz}1j!!A3EkqJS#^{#9P8U{t)T#cc zD6^&f`Na5DnxvG-#>hz}<7KU)h6%c}a#^9gen1zrtT0yga12K+A+N7q)%T3ksS;A; z%azM)m7{c^(OIMF*)Nf z0nu%>T}n}_k4aoFq6oClVFXEP;j=)<>t^hLG{(Tas=iKJYE52kh@wLyp+x47D?yo* zNrkbAommKW5vZFui6rB%?hvMZmnDMBiW{pUbrA#~c2k2Q33gxtY6MlSs&ApAqE+mg zM+!SFksShMg^%@Jv+pc*iz{aJPcbtMc5WEI0YLvnrBsLc9sV06% zx-*zq*UgJ|CCRb;oe%Bv5b|rh=;Yf2f>-QE`gEGuJhbZGDeTA^Vz^(bVC%@<$sB$Y(xt>QrOk!CvsT5>C$OPb}2hO*MUUtV`Eh8=kVDz;U|RC|8`0=*Pu6Z{KqUT#SWI!uFg(-pTj|E73ZKQV z6KXciOC4Hbu3J&EFcR;HkQ{>2ar~Uu@bOUl=QSu((w)z9w{xyZxP`w#JJ;mj)%G&8 zK>PTmpnU{XMoGo5msnH?QS9vfQX8x1NOIr~XnTN0kWRV_vn3;kE^d!5=;(Z|rXBy! z3Q0*#Qj&z*UEJix*6S~CmkbDdX6Op{LtYkzQMMZ;8rmu+Z`5NXkNc~gFC#%p!9iFn5=Dv z|L#ybDl@vv(jm}mnqu$hXYo^|jwA_k;oG~l{xe*O>}v0;ZC0|f`AR+atDif963kaS zD&DnMI~M1Y4ZK`lmVZ(!#4BYlVaTFLaPSrZ<2GvggVuyzbzi#Be6@a-8PRz^(eQ~@ zrR6lH=fmo^EU!A*9D0|S3Hjg4HKuN)gmwbJDBXmeFwY~}3=%=!H?0N)R{(0CEMca` z*~baYi8?yB(#M=aEDQlF?`V(W9jxaUFG)Ah7Cu<~;+ZyZQ{-A7yoBQXde8FgXu@>s zqYw`5RJ(AD`cXTfyRwD1KjANZTtm(iMS(A_oM~&KLT4u;boK(7U2=n0f&@YyD#(O2 zac*d=bO$@he7dced8p!<@Kn=sd6}w~no;?x;Gz`C<)>Q@46G%}w!`GwodYst<{ste z(pb`EVY-Y$VQdc+B^`=|L$)^(e3@CB6Ck2uOH6;ruG*r5QuQZOufv!#S`Kxza&VY4 zjBX4}k8p=A9e*yUGTLy8N9Cr0^uPzm^g8a@p_AW^2&1 z{AFKS25m9f{c!|>YVfNGk-+yIqQECN@K4Xl9@F3fK5yYn+6UYQ@p=d=%Le2R;ee^_ zR=p-~QSBE#&*aG&65i<5;@GG+zw8(H9^{cR%41LMEmRZrc#J8)b+((gI#M)nwz znnHUk&kX%0(Gr>Ma5tA(_IW6@EV>vmSt`G!{diH z;2|gQxTpHZk|8Ch6dM%e{TXe&T*Ad?SN&;GD`3=(nYhM*jeeOZKV8Vm6$kWly=g+z zimV91?>WmY<>4!?nPB>urbB}k2uI;_%_WcT@qD$zJI)*DQJIc=%9=wPvy(puaEon4 zZVNW!&i6aa>@Kxyu=uSyLJ(@~u@vXBoAE7_vZrS*$8>28*P~x?=|Xv55VDB4V5UnP zOw*`$19vV1zJM2BIz1Wq`D%jA@Wq|5ic}H8A2;1xsNgy^+4BWnKeL6@z)2JamJ&99Mz}GRV;l56~IA^%6Pd7a_PLjA3HVDFwL9~;sV?COgz8XD(8zZ%+$2@*}vA8>6!dBdG9k=*0@w&yZL)hBN< z%ikS#a(<;cbr!nX-UCiokLk)|jx0OPWjQCAUInB+qm=f!E0-1zue6NPTh++kw^iyV zQsn43E_9k z$4ZbK)f&kCh|{b79c7mX$kHutf6a+zL^${5QraqI4i}|Jfa3Nq;~J+|Juhd3#_6uc z%DE_YzI{#R`?gz9k4JnJsWPLT#HlhOEs1=O*O6o3Nb~7N7Qgz4+c{2w6gMX7MS{o7 zP0r|Ll9WZ7<9O!d31-PJyz=#VXZ@Y^D2`2=xi6g}$j#f3|G5v#c6v3e=T^`);ObPF z0ix;Vhi0s^k+%_ONu$Wkhnlh)l9Y(z_sK7+bqhRYSXP$7Jo4A+F}+=4R=!7SvvG;v z-AeK@mNWRAf7i0&k_hbg8jA!nNeqe==6q10gxk)>{j#2olUqpj7&BhBa8S|w(_NcS zc8`yCp2CL|0R2--^G_+zgVN_8N}PX5f&QTY0w`FSIne9TKPZ1x=?#zyA(#_V=>wz( zC_&7uOy0m;>GgP!eZsr~^!$PX2LDAY^dR>6m$(P1Ndl`u9O?A{NL3Wl1H{$Q02?5K z6bjLciUMRn^uPxrsz|{PFFo)>GOif6W3B)h_f7T>D%<12nr1V4=b+swdkl`l5rgyQ=!s174ELBn8;x@gF;bsS7 zJ(F+!gu*Bwr4E;RAA@)TVq5ghm5eE83=4YnzI^8~h1A3cI5AbCoLLFQLZjQkVCn+h zg||&+)9{?df!D8klux$-=3V$|^mX~sDmbLx4Xr$F$9Pe6g96#WiE%aFs>S=Nw=x_|6JupnvkKZ{opvz6j zl>d=}MPDsAGL9&GX?R#^BfN8f2Tw5a`7-c#3WW~!l0Ay~@U0H9n;pe>#+Sc^goSwQ zxa^wQCVUKTQ)4fj6Ac4>_9>+3E<4V(yi;&d$`FvMsp}9sD-slgok#B)?m`BQ0VrYL zo1O5yKG4>C8mj(S=Ct4_o}JZKc$8`L0@4GGea)vF#kBwk`ml9HGc%eOH1`56zLTctMqz^KBA|*34V2%`S}vR zhX0~h^? zKRuG7CnNT)$|8^VFEM57x{!Rx&8W$AJ0Bg<-yDFeQ}6A@akD$ zP|q_G)euK|$sX$OLE4|FR?*NU3Eol1KVg=9^A?rl)lQab0d-OEcDDJZ-X@zPWjzY6 zWKGt4OTudscg*KJSco>=G6Gxl3S$+aCsBc9ZM}yeAv$jVn$`q3n&-3qK$VW9MFMSX z3BP;--)96OIpI76r0p_qVqBT92pEb#5q?3ndNuut_6uEI*xQ)=9z9|VX=&g)#rUUv zbRbt*#fym?=jz{7l==2Ot4vFpAjg@*yIe34sD`1a*9x-hjYtlWu=bLY*uU% zoC;l&U<71?BY;ulK8n;VBo(L?6ctqCQNlqKT@faTa_z4ozo>V@a69eGOl^VH*dEV3 z2tCkWMtKE1Uv`973mNH)Du3kop|Pk7XN7CQiE1et3Fn_3MBsOx9QGeUE@eFNy`M-# zWiqf4MPPjkeEWvogeyCCL`F!VR>Ac(6XDrM8>Y=J#*SnFL<= zF)`RN?6IaZ&M`L8$MwpX;oFPqD9*ouvSczWu(rI7Oza^M$+< zUK8UJHH9vP3*)b>lC1)5R;_nOg^G_H@$91dsrS=jg+n%Ge+BXn@;&F%LJE?oN781+8!H+U$vsm~{kE5#UeqLc6L|gwPorB&HmgE%a)0k0DEe6wKK{++h%W7hD<{ z5osDBK<-2#2GZok;~5f!@;BSO5%?geV?JhVKS&Jj11a&&*||^Q2vi6v0E>ZCppZeD z0rd5T!4R$yQf1ON;TYjmiiG2FDIK>2*U9Ax zEBdf3A7mmy;*Ug@N* z#ku_X3)qjD;f5a`BO%K~YifHA3tGd1Q#C(b`iAFWK?^T@Dt$iP3*1vZ(tR}RyY5@~ zs=GD8&+GM)-+JR-|3vgs#wNyo#sjGoDY+c?T#{U(oYY*SZrASk?zi9)Q>`w; zuoCsG2gr@h5TCEeeQjt3``g%EcT!eu%W_K$CAJ*Chx2?pCOvEoS_1I{#N*v+`4=nauMSVk}K!Xh|{2AJpFxAh2#gRlFylkcxqdu!=AJX-Rwf0?gQi z{1C~h@b3+sd{e^{zCWbC8-hNsB&x-oun3A=yv?xB;DH&}O&u%svmSe$v>5v7wi%4- zW$Bew8$0H0^j&L=6y+7WYweVB>xwmZ!3O=R0;{ZSeAp_vIRTz^+~x)FbIQ-W zcl5$Z!lXL|1!`>zWY*Mqbmgm5{Q5 zY};YY0nMdfJ2SI2G;CVj!vMM&+V4#cm|)r7sHB%kyN&NmwBFZ*mnnQ#(529yGJw3R zs@nE)l0SPJsiovmV!nr%o@{VyDBV5!J+7Irp{08PEj2zEbZ@yH!pIE{^Gfy-zM&P6 z5j^)9ysod4%Pp+j>hNCj-tzm1<~0~KILP&x3sM3v{^E3O+}_r|;Oa9z*JWD_%dbhI zMGavJq4i0<+&k-d9pVvP7FFqGacN+OS(W>RXj^|?_-Dcx^Ip^rtvID?WleWp{)$Kc z!3Ev8n5KjNn(Gu+duR?(rt*4bns|cK_SV+wIJil{{U9w=%%KF(2s^YLnL63<98TZ5 z*p+abKf5h|Iz$#_XlAfmzX~sOX!Xuo!Mvdp!Jxr-8A2UW4|r*?Pmv+$xg2(rFs{{9 zn$bWC-y7?>jak}xKM9+ETg%YY+iScUNw( zO)nJX_8x0l7c*Zro(JGU*H)k~(=lBiqg5#R`xxymY;l`xTdx-zmXy zuYWVJI_1*hdJEzqdCC7r|MYgeG}Wk}<@f#AKIFjh_vxJ$MX9-AQsa9chefU(yZeS@ z7b4!egYp9oiDd81EArE${rm`(OmDP{z>}N*k#!6II}OGDrvP6XiqyJWL6HjlhqU#7 zUujuEUqcx(gR=? zFetCNP?Wrv5%-Qd5RK=_;X|0@yy#}S0TMFBv~$<*=hiG&9bS#2nr*deEcEEN9k z%Re}8A-2vCM=P)iHsD|TLjSO%|MkSe-(uqb!vQksf7tl{aKQBjW^4BU@ZxF@F~{bi z2ZFG9{{5jB;OFP%r#GknHwHo$HV+?q=)WTht CfT6ko literal 0 HcmV?d00001 diff --git a/doc/performance/Intel_IrisPro/SGEMV.pdf b/doc/performance/Intel_IrisPro/SGEMV.pdf new file mode 100644 index 0000000000000000000000000000000000000000..e166099979fc16b66dc4da787254a7d228817b23 GIT binary patch literal 13626 zcmb8W1yo#3vo?wZ2oNAZuwVnh0}Sp7?k>R{1_?624DPNW1b24{9^BnMK(OE*-0dIo zo%cKEeE+?7tvhRN>1S6}*X~)XtE=}@)JkF!OsvctXw<%IzU#hYzEipmXsiGh0ND65 znt%WrP}Bir1hoX)iWxyc06H-~HWpSc7Is!PR!(+q4qkdRpgh?0|Mi)gCDa$2 zFtW3-G=TsV9ieuPP?(yM1K8Bj1my7il%1K4nH`4x?F;-2(W8C*hz7DXefEOw-`)U# zYOZ!508rEjYGe&I2LP3f%s~(UJ4`?Y@ZSpD|ChqE+_SU;$QJrv;y_WbEfm&-0C@hY z164o}u%m+s%pS*o*n1ZEXE@|Qw&qX^052yeI{+wQX$^(F0Z_skHaKD+6R;`h*>4CG zHfA6H)r zw2EE;`x5fIPc47+qsptaW2Q2NEv(GphlIycJ`ygkoWfKP6`-}~%CTh|?y# z+;0pLoLSt?O0J&P>sEBV&JKw+$rB!)Rvx$7CJM@QA6HUVo=zNeyBZ)xmi zp6+Rat-dD2ezh7-C(kXroBLVG)7v8Pw!5>yg+tA!DTx+!H~nI++97Afg!0I8 z;L5|rh@eLwo_DFKwMzg;q!orPn}8Hz%3)wYKdHUh}keVC@qqs7ED(xM~dF7XHlP zy{<@e6AS)e6@hK-iXIOMk*pfp#oL(SBk$%myH~SvqDMQ44!S&-=CpffJ-83erJNO(|Zp*c)*Ltpm2*TFNWo=2MmIu zCDHj6fu*OdZ`v&d4^66JS6}O-%JdaPY2qG*y0VrOMQJWPyt}At+!s~^%DLOD$WKFA zNZ8;%s?R{n?yuXP_MW`t<=t-f4zC2C4hx3v$e+%v-p*3Bw%xCmID22;?hER--Yunx z?(ua}laJWl)~(#5#ocfAJv}7U3F>)yc;CRTPxqI!#Sm~A)u*8K`x?q%mhWGFQ1I&O z#P>a1);%4MJ)ND3VR`Dd-qzimeSI>YtbRNkBVcsCtZ?Ss{E-%37UP-inC3+EZlyBq zeV{UHoz-_ZV{KFmHA9O*r?P=ty{?OPr|(mj!QB#hNIR?%7P}UBhNn=XhhO|)2Nk8v z@3SJ*60KXkEOIG?Z-3-ulIV<+l&T@wtQOM4bzz5VVZv9zIks(yWk3zzYHdW~@%0Ip z7Yn7iY9#K}(gdv{>yspP*5I)L?M!0};+USHNGg^#^DJENq z$f7FETIJK0oHL~v@#K7BTQvn53;PFeL2Yl68M^Ecat-g|8W9{tr`BTpE<-q?*gt3x z7i#+?8z49;b-@SWRok(~GvV7s6Hu^ce9!ugPN5CPJaT{0%e)cy{112! zS$yO7BQWXSjNMBH2&sk1+eYfO`NGKFCbzhPp};Fk+j6#9Ung6Ezzsb&QU4MFJ=2%34UjKoLv5bUrqbTskwiT<2c{(>#d>9ETPvA5L;*yEYn38TEFos<~lqIOXG_ zjDGo&f%u>#pH4&IMv~k=b&xRZn3=|}I2?2tGL2Q!2(z9KQ8Upx{Itban8u%f>RTl1 zI4b8b zi?+`zO#ol0Bf2M&oS~K;OK*c@+rzX49T1r-Iv4pWtney2uXE!n=Zd1^S(Bv$lzn}J zGV$0*>yo`>jSjk+Uk3Z(;hVo*FMF?f!W%>2!plB6p0~ZzyvtURo!_76AlkvukeO+2#I0MbeZ%@YcYjF+jadJNvOGkk2^9{KnO2T2(vla}sa1%0`p*}(#qqp4@7$)-${&{j}d55UPfs*gva+z}wYpBT;i z;3G&|(OFUQieqiK9Wd0LgOF|KGKU8dF%gUIUH8M2BkaW=Rnzo+nyr&;}_e>zp{q4~&?Z#F0xazgF# zXEE@vTm9p9wkM~ID|!oj<66S`oPRbEy398vKAtn9ee`epGq5O}6a?BK)(rJx{xG98 z3n9Gj$DAjTNc{Eg&~OH?S>AwqoZ}dEHkPO~j8PXAeGgf^G(+}l%OR7dR!VVOrcqTujSTS-e};opDBgB^h7xrYCO?h$%XWvTc(IYzkBvfke~~cQ8qt@tz2_I zosZm4-DMnI@>?N6_f-4NuFg`tZ|}g)Pa+JK5Ehf)6LgU^hLfq6Qhg)%8$&O|4wTiq zMK_o>j(w4A3`d2rt2LFeYmX~Jv|V(?7=N^fXqEQE?kzU%O^4RlktH46Y@pp?ZZ5ft zda=C+yaEp1ragmh9#jIiEjm8kU2H}9s&KiJ3a2C#4Ye$;XL()Xm97u+%Z$f!DYGl4 zb?w5CuQ7?uTI0F%>r^yg{J9GodmLB{6}a2F;C8KUuLeIi!zUDAm!<4Xj5?ms=g4|T zM$7v5DU;c$8NBn*Ph*(w15b}MEOwi6|AB?s}bH+eq3N?2ThA~j7c*bO|$YpC3^=sI{$8Rd!9n}+j}96sj1pYvKqmLs#^I}iXP z(p+)ti4UzH%l!d2hQ)*3Pdp-=eoZzvYut#rjBUq&Y{nioyzo@iQBrsQ==t&Wi2BZ&W8$)+1^P31uFV^Acy zX;;Ru(ZcI_?-a)S=91&Ih>R?zdxBf|q@RqEqzB!E1BHM+4dFol6#|As$QA$%8L{)# z-ig_pqcz;a>j1{L{N87Ua1fYyu%mq{d=C5T`pk^(>~ZDr$YCVg;9kO!)}&~*Y$m@o zyaEH~5U)nDT9GJe@Et88kqh$1^r=|q40z0gS&)-x5~{#2==Bf(%~{k4GaM)0{9 z2VFaa@t8S^ea+5qjfhUgI7Uf6tHtRXt^6ltkK<#}sj4Q{<8So7x&he3^|bzMgGFPzgOJd{3}DMTlygbzIAwz`IKQtC5KuonW(54kD9=6G-So zbOUA$d%j3?41YedhYF=)Ldx|*e9NG2pCN%wsC4R|L=8zcm@C}_tp_RmORQj)Rh4g2 zS{-k(VWA5PHInIWwS!GgI2HZt=ZV&ko6=KV2Of^)f=e0*>GbH!D8uL~T>R&_#jJrU z!*sVNT&yxrRQoGONwDVNu+r4?3Di~x^NZB3;*HvMkc@d-$>7vx*e4fQ1cLbsk=993 z5ezV~QjIB-AMvu9k~t(nJeut;ityT@QB1hW<$%R+^g;n-!>_?=7045mSaAH9$Q6+n zSUl>}w zbT8d2faHUBVphSQ#AI>B14TX=nHtntbujHmHpH_jTqTAhyuIAPkNAE4QbOIF_y^u< z(ozOjdjR-mx;lB__Ol>p3`;)f2!JU3&Do1u&2O+XdjO7KZ&eQlI z;IQOFpepwyNyiz8lxT*<8Q)j3bCIPu+8KbiYpA}6Uvhep4YM=8e|5=Y_nBxn+pfKp zA*p!Pj%lx-vJ7X7!t#sH-bbv3ehyT*J_9EQi$xQNROnG}v4qds3`=_}5DE7S_QR(e z;0Xs`5{646tG=^=%fD?n4;G_Wgp)X2L_s9Hv;LWF3rBOi>xL9a+Wyiy8jjH2k!6)9 z#PAusJ*X3Z6U9;?X$WW1hLxYoc#8n@GT*#>1wj~|DkXZ0UOTfK&P;Xs+-j-?! z^4Rn6-rQ>C?w&weMI}*b8vO3!?Xm%VY?K%VcjqEH8+{qh_3> zs$$^*jfs7Vyc%0?Q{5lMK56qHX;nlm;hI)D;KL3etOfle96#Ah-)~%n^lA8U6EIY) z<=)wueJh4_S{09RoNk9E+(_ zKB|XN``bRb8O5nthXz)RPGPf-CSfn5z$+7cpV}pnjS+-`*%r(=XLR)G#2$0>>1h>x z;=;;oCheNdKR(h-0$3K8hT%7wnZ1Ly!s)?n31%j49NrQ`Yyp;0+Y?7i2x93Tsye)Z z-pEef1#lB(fde|PeYcM3!zDm<76HrdKK&abl<}(17ZIm&TUbs<(WFtQ^f#oeV!{Fr zwjw-bOkFHszQs);YA_`*m0?k26PC((V!cjkrX~CGR~6xVZdng3;_S>{!F?OBW&PJR zrN>g>`q~8`q^6SqH(_(B`!KRKn}8h`fNyc17cm7N*~|Bry0-7cBpU>a z@=Opt`K|wuJCeF@rx=|HaVhovxG|d(cuh)dag$=CdPMH{?(*5NV!^}Q5OJB}B0)`j zt6D(eWXEl_WXM|*nf0WX6B%C7uP0U>50|hKoZGx^Ihj1&b$253?{n*wY({i}Rp`0v zTBQdw#f`Hh&7c4$iI+)5)y; zuc55pdaLRkw3RFz5TTdUnD+3s$=xUMzcmp!WY0sB)z$&>iiy1awfjF)4tzJ&hJG(q zY-L6Q#)pBvb@FJTJfwqHdou?<4G8EhcuV#);CocbNj@Ai@h($p@ck|uv*RJu+Btq5o^QU1X z81-?vJ(0cRBy+e?n{Ri9?6ZFHfM43eN1_{5Ov92yEj{I-kM6pKk^VsNIwsB{4W3bt z?Wr!?qK1dAhI@>%BtMTTl0L&_U0r+Kk5B2$`+B>ewxu-NM5(1kQ#DCzHqB|9AEu7g zOt3f=IcyN3uzEL!kx-p8Fts@hFZ7%k(aHZ|mXwaRH9h*lZRK*@pS?kVW9_+b*j?#Ru$FOa2 z-IQp3WPEwo1WS(uiD~ztd%^MY>9WiodWOvIF%6iX&Gi+KxAP%osbd>Uy2a>QTfD^HrTqdQfx;_ek^7>gD~b=fOLZ$2 zML}CRWawbKhqjE}hf_LzAl|O4$x&L=jyvN^wR2OK1Qlal6X~>LAWmo}T{2NW0 zxA_n?S2J&8ct4Gk5$2+|haoS?XEYhmu?T%mwkfv7-}qKb+Maw{knqu^a}XhSuK1#(d@2f;$^D=NiRDDfB6yJ(> zDk@1^t9>mgfRO}5OQC8%mCza!RkU*(g6^xY-y;wFT7CWq%?J!&{MuJ-U2n*<)+Nm) zf*drx84R(`wRmy)G1)jr-0GgCE8p7R?nEw>t`6!{OY?Ba{*Y+NnOEoj%12`#x<;n+ zD=y95j11CZ_A^=EwNJNPt2$rfC+n=0MCooEWcVWa@Ut6;1opTpS^uUtez{6*D#->0 z`+j!Yd<%mUlJ%2%b<2}-@-@<7TIz+naUj^^6HB*ELY-Sx4gN08HhyNx7bdK2=?23? z9~FRGAcGEO{JZ^up0yMDMPr>^4W)3KJcAC#E${ z_A0DTTeEgyFd;cAY&1u&HP-8n}FKjN>V{gya!eCtjQH_;2{4@9P z!2A1xjFrdZKCFQruLdo_FWQWv@+s-rrc5tX>-{EXF8pVr6#7XV;mS2z$FcKMa1n!> zvLbP;cW~bwvSj09n)sB1SO#S1c<$k>f=@lgoos(A;;OSMFk)6}CWlsq30GE!1vW=( zwSQ7Z^K|H!H#k5KhC7=w)k1J8Bl1?zE-Pd}PUvkKjTr zGR}Z+FH3x!`@Cz*zGm#WuE-OD`0=GzBu30UQ=np0ntf=rCBZ#1SBUlw=ZTY+W)IKJ zyYWb+t8PQQE@p0|6ExpXua2UznhZ#LZjgdfh6fZ-6dlh+n%*B%h%sG6RRq7T4l(6m z;*<^%Wtzf2{%!gq=gVsq{CA;B(uy0Oi=uIBB8q9vX-tXSRt>1-O_YaLo$2FJk^Hd& zY$STKVY71PeEvd7^S5dr|2&X6j;d>8sc#&p4M{sT6$$ZrF0orJo%sqkW-glBAwFs< zR9bFx*Wm+eD3j|ub0V46JY7c7hB~#4qCqgtPub-{tz!_0lL`YqY_lx+XXQa12Ueb0 z$HwM4TAr~o%<}G=Auri-rO)^z`#iNWMVe~*(~Ma%* zE7zYs$IrZ%>8Wx94g5kM^4iDCGefNmLs5aL!9v;L5=d}SCx4oeLNX~srsw%h`b%{$ zL9!aSpYi1ow%Aqwdf~?IJrCx-An(f4s0g8kT5WQOEtbXNH77pl`p~~nu;4UZ`%rVc z!^`?kEH9MA!=od)aI>MfV+lgYl{Fs=FI`D*97vAys;{soGc#{|Ps+pM6=!u$WM!zC zJsj;uWd$k^LN*aEvcqeNZj0fx2>DMRwdT~Wn_URkL87K`8Fpw?6-8O11b4Lg%0!Dj z!A7{+=;!)_p+vl2I9{?AMukkg%TdUADf?>hI$&Mw+cLEgc}g`j5^d(Uo^k&(=fCwBuqCmnzwW+{0ia_D?oZ7o%wF4 z*Qp4$e(ySizJg3|gKS^Dl1Xm}Qr>%^7UdBh4_=r4Bg=Y=s3(rG;hOd0AK=Q0xh!k9Dtx=ZYS z^JP$c)&8UQAo$KZRv?0AYLt{_jq(~nakZDkY%RRo=$5e;T^*WGRJ)hMWOFT-YXtIm zYo54L^CUi4LC}fcxODC3i*B+SM%!4y#A9cb$*6Y{#At0wajEv8nOWw>_`@{^P1Ut- zCpb4UIj}!PThaJ)IP&}1$t*hC-Fs{}N zzw9x$`;RJb`tE7OGaTnX=&14Z|9B}~zX{M$#*`n|p@=4Kp+0KN(POhuSjKS%&-KOY z5A?+^vsp|fxeZ^$le*J9M}XF3eak*wpjMe|d9KO#gov3ujX~B~S+2dw_p5W0R{2GJ zQIot%##*`g8db2lJ_t6~e}>KV50-A!uu!nv#Mv?#(V$<=C0|ZuLbmaXf04fT4^}K) z*BlwWYoehwR-C{v@JF8vSw6vnF=n|T0l&B`8O`7G7XI^nKK?>B49Dm1f#HA8TX@g+ z`FIO!FdUCxT#k(9?|JLZ^L_rCv&pu0y33wynnBu2q-+`)+Dq3inoRmEoAV`m+$`KX zi|dUZ{6{f|%CKsXsy>Td6$fTgVcmz-2+_M#D#5Z&>&i9vDf9yA=nMZ-v6rl}hGU(b z7Bpt?Up^eI#U>&VXBrM0qdu9UB)qO>q5m-Yu$S7AjItf!ViS;vRIuNhu=}Fu#wXa0 zncu~(BLea5JYh?FRvO!#PZF*m`Wce|QgVXt!OPTgY)QWrjP5t@9MXbS@?TuC*pDaD zt5O>9nv5lCOEF-i*DxB#_iis54;$mq6sOK%DSbQ$D8AEwU9C3%ezu1Ze;)5sEVo=@ zj~%7pwRE5sV}u1S_b{VG7mjHi!bs}8|2EG@fZ5Hb4nb00F-5SRFKf?!fpG(&y z1}B?z3pFtDu?eV$LbTHeQ|hs+N;5-nDoQi^qd8wz9n`@ZRqaS43U8?mm0hu`3@xe_ zMh>Qe0~n9_PkLmY!N$c6KpSjVC^a@pJWVNF*_t;`j*B*aKBXDEs-1W!At}yB92Np_ zCsRNx_Zlx^qmUF(C>*g7@Z$(5(C_tB3VuCH0>rTW*~5wi^x{ov>NH&6nXay67LL;8 z!)EE1`%hKC$MxPYC?wqUclt8zuqmIHC@tg{EtV@ig=}UVK+XIOEPPo?qBkT{x4|g04b2PH);z6q?9* zF65HX6FgtREa`GrlC9o@m)*73{)dpcB#Ig-f4u2MDo%D@x(`;2bCz%e<4_c?l2)Rc zYfsk#l5klNiV*SpQGa>b!KNB&uO6nWUElo4!7ZtZVR`bu zxA>a#GwL3BW)iY3jz}Z)g8PGhC&?QM4tuQkwy?dEkM3lN6xd&`tCT}m^Cl&z^KZqe zk*`j*KfMX1q`)hXDl|tN?e^kvDDqMc6<;*te4$R|x7}i`X3UDQ>Gozr1vY77JKqKA zV#n%E6m{E8sq@exHK+;d8F+wHs6?pzR5_=^5r`FLl>Ukyt6_*{W5*Gz2{0Mai6>Ab zz~*NEGwQcEMY5Br>{irV8!7~wlM6Df6tjB-7jvav${X4b#>2W* zktP8W_6Xt#vpEzb^QCgzEG!}c`ck(+y4eP5#@86Ml1jqaGj)5U+MXr-wYN^Yb**|* zxh8raq^wGT8#+*z^T23K;2(;vg99scXn6x?-ZmL21dcmWPBGC*gnZo zGsd@7mZ(VRWYQjl9ea6p;nAL2_2LaJjEly`AMHb0NNklw(d+0$69aW>)Y0n_FJrox z1|Ki>?84)or0RQIDn63gPH_AL656sdm$^`4E_A*jCk%kpi(j;)X?BS%o_iFTS}ps*gQzs)a_^ZDE^CzO`26qMJ*d2yA92^9XE%_ZMM45%C8%wXsGXH^1Xzjoe2E zyH@x+OX$0@?*$f0=(>KmjH1LEZH?7+H{y*oOb{E)onF=XYw5|AujLmULYL1eZQVwA zI9sYb8pyaQxmrSWwB@5#kZYRXVsp2c86@jBH0Z8iW@WG}mx*c)ra6Tli33|gz6)|D zvVUG8$5IwyQc4Kvx>)J*77&u5ZX;`si~Jg^;E`eUK8>87DMDRm_fg97e$G^|?F!Kn zXt>)#@9C}LizAaJBiK$GXuxl=&E@U$Z8r)J3O>8b(7j1juvu1od4qH5$&jRK<}tr` zCfIuSSWu@3UU|5G)Hw2f+}$5q>EM4_#{#byJlZLR>l-LASe1<7cfZkt1@NK5UQ+ z?M)t51%O)mjD$Q&$|C?NR8gH6J$i5o^uW6bDzADi^kUI%VoO*XF5^#GgYM6v_#v+* zk9j*3KG`!CS5cW%{dcgv9pFc1_c-oT`N~CL% z8m^7HalHKp`@JMou|{-VrAm5{c`(uN3L&PuwNfRSWH=42B~5SGf_X?0qsj9m+>2B= zx*svjbG~K3QXTd|nrI95(Sb1lB@XfAIMJT8Gr8<}AszP!GPnmyP_tRa&*8=i^`@^Zf+8%M9@-0Fi?;_ z9zS3|=e`jc$W;FWP{qV#@VKTe?&n;DeNZHla4SVBxpc$jvGE5x?AuK2l78>S; z%p$dHIBcd1503vh=x?mRl*jHmP?VQ93_J84KGUm)^47*CcBg97Y~jKW4k<8kaumc* zrX;G$T5%S{TMEt>v_Ya`RN&CQ=OJgZ&q^>c5y_&&02P5wrebhW(2W z29&ilg#dH_&qOd4fF6u626luIzgPi4d6224kqFoYpu+-t#>>tH;N<4g`ybq~XYSWO zTrU_!%|sOh1?T`_Bso|QAQz|}8c+hp8v_Uk043M}tj|VNVEiw30P8bn?k@=qDDDE4 zRE0j%{3wgeD5aJkn|ci7lu^gyG6{smd&U+ZKr{py zT>~9&EYhxj;l*i<_-05%u>JD7fvuIs#2+dhB-9Jus3h`G;DV!zUS!^asG3?cnMBjE z`7y1#!u^&SFgTtBY7UR#4q^3-4r?>xP!)s_(deoMeTl&hUmY8h-wy8?X2InC@@|cF zE{RBsWYq!AZ0ug^jjIh&6(#>14=)dk4UO4}EPRF)J#gpf zHUraJ)WBm*rFH3O)5zs4h9?@zDyq+IYvz_O*4;n4W_R;yD+GCj*bBvfa-4g ze*DN2y;ltsX~t^dR|O8-W7=CoY7zCq%r1 zh}-_+M>}>s9M*3{1Yx?67xsp%zy01s!HWS1-uTpJ62|(K8PXOaJ7!{Y_;cA2LVZns zbMT-r`wkhtScF$$!A$p|KSmoCdL0nh{~Avv7zz;WC#ee5j3VAZL=weTB8h!TEh-{~ zfG4z{sZvN%9JH5ZwxhE{2YuTJhap;*sceCBi|2;&js-QpL%Rao8mZb)k>_P(071vV zF&hs#vtM0%oE@Ua>puaCT?xOjHPHlp3$T4&WAkN)vtZ+QN{D=Op+d!`D2c+!MX(h5 z6-Aay-Vi4Bt)O4$4YIg6tCCzS`XD)8;97QkmUP>ivZBW&cx3BkU4*G{EAQzrh_u*D!b%|XmF^VtLc^|mTP9Yh}lb>6yfL(NQIVlD{JSeE$B!Y0v4Y>AE84qLnEj%S6intBubG6Fem+P)wW1A(B6- z(Y(Rii$4Q6M>yVbXjuH2A{kHpIr;N+YH+f6k}u~%E#>=RR$6h3=v}~NF6O5`dCAOb z(e2z(lOVLNm~W$nrsu5rP)^4ON9x9lM*l{3w_Z18w?MZ;x2tp1TgC^f2h!V?TUBH} zWC~;?aH}DY&(s4 z+M3(;)h(DfPo;k*H*V`_oqhE8&++fi8ZnxjXlI?M{CR>L-iu~pW0vMD!tcsk=ES#k z{&T)V>%e2~hJYYnHd!`&D95$;?E38O_Lqje1_#}Jy*%4jbz*fbwkpng&XMl^cZgV? zSm}~6eE|hU=Ed8>W9Irl^(o6S%V(|4FJ&(2!QBgEP2KL}!E3mis)vvjjWO=|x*6xe zv1M@J3cYuYchnQt6EVCt{G!j6Pmxexdz`PQaBe1&@Qg5)--w@h#|H{KDZ~EDsHQWF zZS-yDK)-RH@x!dE&WNt~Ku_{WR4nvzRBu8fT;YNobcU*q&pr&3GeOoYO=e?yOWvr>^q&nJEIYGn)Rt^Y9p!A(a2qkyB227%j( zR1Gw7v>#-gk7u**7%xhIZmtV-Zr2jzE@&Pg7TcW+16m~x9 z9a}B?8C!7maCKO9?oevNp)MCS8V)B&bUwVQsfS~JY}%(=tV*9Pc|Be==8T4$?@B7& zA)N(m*f4)8KSX=#dDf=ytKFeDp_8do@xu_Bw>^0KX}mbE#7$$roLT!rYcF`jw>F^G z(#o5zhM5uQ(ZFn`r}#~MB00PA!fRhA94}0~hnuV3x^N-n7u_6Bz2%?$VtH~|7it38 zGTHYsE8s2L*6X+tjW`3(U(q$7is3BlG4)~f)t~zdi*=-Q8q8xr@&&T076^)o?A?z|pIskU~{(^2L^>WhZFdzsl`{;%04*QWA=lesDNf=?RS zSGMJbM7?BlYc-* z(7E#m4We>0{e)&^Z^&<^eVeDIL}y&~hNH@(_kxLDJ2!;qCw~ee6f?XKuL92Q{;yRx z@4uI!X#Z7@A#MvReXz8J6=XawTmQSIVQDLD3$gqMKm&?dnwf!MMHtUj7CHb9PN0pY zts?}$%LTM|1VcgAATuZ$D+dSA6b!2yfj}$)KywEpC)gU-$i(q^scT~CVB%24UHTf{_i(C0L^HcU=A@BL%W{0zoZJjL?Apj0@m-ME_?2%im<;|3?R~ zMgQ}_|3?Q*B4BIN|Bqf=96)AhEC5zEG?st=09>%b7EXW};NKV)1ex~wd$oUqL1f9bNYu(86*e*P2V{ZEXQjfe9;{bOZk zWr0=f{4ZTrb~YARdi4(mbuhBD206g$n*cynOE(Z~3;;lBTQe{$Mf{u4ser*yz;oTv b-+u)HHFALd9W7RNb{1|lYHD#s3AFzM7SVKu literal 0 HcmV?d00001 diff --git a/doc/performance/Intel_IrisPro/SSYMM.pdf b/doc/performance/Intel_IrisPro/SSYMM.pdf new file mode 100644 index 0000000000000000000000000000000000000000..87d447a2c979b79ca2e385162981d4f5b3600b4c GIT binary patch literal 12562 zcmb7r1yq~e(k{|MaVt+l0XP!NKX7-S^XV1$MdR0kjRt`39OnTpC-xc2h-w{J6Ob!4$0BZgg zQ%DFCDB%P)gV{jsCCy-90F&fvPIeAnb}kN14jwKJE}oZ|KqV;X|2n5>1A`z|sRLx3 z%p9z3ESv!i zs)L=OE>0E*d))tF??K`3bSQ%Dtzgywer|pN08rWn0z>QoC=Ed*M-prS1%V&pc7`Ex zW{2sPTxrx3MPKbWccC-8rl?!T(t!CbfF$Rax`<9{r`k2-psuGZO%yk&mhow0q|5}h4` zQC+ETJN2C%Z)cQLI`v(ZnTXT_fp>K2B{m#}eA>3-+n$I3}4*Hw53M_N#A||hMZIwz-^=bC>u_D9Zu)(^)+1^x z@_Vl9oy**CHXAqNFz(UJ!tK4u?>#Qg)A)n?+jyvnNt37Z)dAG!dTV9^#G|dJmT4m( zQnnmZWjkl>^}@XHx;G7eZsK#Q|Axf6x4c7GE6uDs;RbLg z?kCunSLI>e-HD3CvnoGZ$w5(Vn`vKaxlbiz>B=j~l@{Rny3{d_1D-$Y8c-<6*H$O% zs}pZmUnO3i;zWf5F#;*;a*jN9KI(S;w&P?s__auu3n`U>u#|&z!`2uWYaig5RpuCy zZ?X0>nEUNUj!qtl6S6_^Y}QVkTUoQA;nj_nhw{p@GEcmxHetT+qOXoNM#r;~~o zh1S^9&bzrz22m=OBR918;^4THC^JI-6vu~?Jmy>EO9QCT*Arr}tp{n6 z1z#$;HQb-t?Z=P!wux zY@W)sus(00k((~ z7vdm%;=wA1PCM&vhTb4pH0$j;fo@5WD5@UINg*nebI=@WbhKY1m+SoC?yR>*a$9_~ zx)aSOsPAy!P4+K%wkDI!D-?3*s*7_FhIMf`cjnuMHo=91KikoU21h5K^L6de8ZR!w zW8wXlYIF1p>~@<~q~rr?skf+z&-;TKMD{BWNb_zAs-ixgXJk0e3o#wz>*d7^4-wj= zTSER7AS!gYuN#+v$VYjoLD$KGk zrt1vzq_y)fbI?Gb%4K=dvRB)2HePyN-#eFsiy`9LAuLhEA?-QN9CgFdk+JiYe29hM z&e(J;nI%yB3}@H(&2p%;cHuLU&^m|kIA$tPtjl69CN_v(Cpen8BuZ=PdnB{C+bvtZ zc?9tfg?Q$!&R04u7oFSQnV&$hZoA%K+$Yz&LM;iPSQv=!!aw59x8NHU&c5HFtZI3L zJ$9M;mPodo901|fVa<_%I^?@O#|m5QzQ!x%5LCS*syz3mB(IV2)%a=<(J5@iHoNC%Vf0TMxOn6UbN~o&`wv(OchwEXSVuW~exC_ws|5 z6B#oe2;Ar6q2DRJr>s3rWoR5DiK17Fb7`D|L(g1oc5I5e`XWUh&C1QHGf+|DjdsKV z8*skrjmUUcj%X0%bQuIr>0cW|*NzCEc^_moty<|={5sZ+yn$a&Z^Kof&n47NOk^a`J*gM1^X+uPvFXZD zrMmqWY4*ZbgO3=ByW!+=9hgKkgUv$S15fRGu`b%yJf& zo##Py7gkF-9QDT;qoDB>;hl7z^DoVuPy|9l#;$1#ZPQ9c2Bhm#t^(r+f85{Y-snKR zU+=*Kk&}Kj-`$?wZyo$TD&GV#6+QxT@d>@1RWuzv6iRY}VjeqwU^{2x`kuk>&q{4Q zx@WxD#$A=sV?8ptK21a(E0A7;<}mt|{*nZOgpLzl{H+}wyJYlu_6ZiB5N_$9;rq$! zDtv|XDfSMz&Ha`2XCcJv7@BoFogaR2s4u&+*$POCk8AD+$nV=e!&P?az0E;$7*yp8 zutlnuR$9Yn=b+?vTPL0U9q5N<>8*jvZ*98H$MfC~P1N&A!ZTcQp%W}*qL!kc^9j%X zs9q7j?R6Mjw8G=D|p>R^8G9cCqdtxJ&_?Ad`9-cc&G-n+uZW*_N1P{9C z*_voLw{jKAf2?`;W~BgA#?d*Votn4@G!NPohp&bkOlwHF1Wasq7iwv^YSYKvjP>#Q zFX+>Xt8$c!EJjzo8cTPM#Smj#r8>2=QvU=cUNl|?!$yA5(dmiz=xX5imBfv^Z(Ud4 z4Qkg9WR&`K=DK+nERGsR_t>-DJ&KM~PYtIO?uVW;X0L`5yw1~0nPHS*gjM^v@Dx?tm1q;VN7jE^XsQ^nHiIlQkiWsMu_K zxK)mkn~j>t4EnBpRLCtNgC*_;Nsgw}t7QMb;O8_6f z$c|mO^s4b!`-$ScpEhr7YJS1jqhBFJ@62+wWa~<-vbLEq&W1elw-hUmjPOA=A=q&F zl@z-QsbBk9V#yQS=ffo-Ss(eThn#dRTMM6;sk>P|Cs)#TF@zC+uv7PAb#>0<31ey& z>){#SG}0%@&7KP|HeYd6Y7kQ#4xssF_IQA<(6&QcjfS}qYsf91>-4Mg;h_;!sd`pWp<2!~xPBDkc6#rNV<-rhLFpJMU0YIfDFX+Vuw5)A>@{y2dv zEtE{E;%Sa8(oZc0^qRZ53Qa21*Nc=)e)U*q!f5n~-&i~tL==E}|6apCl?6I1&`SG@ zLt)=I_~O%+&$1LByv5var&fW^a_#(QV<0B#L0F-;B29uF_jjwXsFH=ukLa4SznbeU z_?L42*p2IR*Fwp1{V0lpFuRKeLlmKOW&5yJy3(HuW;}>ZZ0~-#o=fow-ErUi*lgE0 zo*vu@cJ{yg$7B0nL#`&W|}D#a9xGTQ&Ga&Gr6B`U|5mnbA z(X^XVUh{HHYEz#Q7!lx*pR(3*GWgQgJ^%9fEp!Aqq&`8xag0eN+tnsSiYcbKtd2`N zo1dX1iy(ORjj2Q?^G>BEZAs2htd@D1xT#Vmb3TDCZ-*7CQdI%lrdD+erZ0VqwCTs9 zf@8{CHgbh6tS1F}Fsb<}yQvK37rxD9QaYgN=0|~>-S1x2abnmE9exyexe2Ee;lzLp zA$7stLgr~hBlw*r{G0Iy#remcD6ZA_kkUXxBfj?{87p6?;}>HVw>xv^wk~z=tQ;?VJJR- zX7r>$H%qEZ2Ll^nORDQNegX`a7FGk|FB?p01}qpEyUe@+uy3)b=Q>OuzTr>)gQa{H zlO2YGN&*aWaj}o(O%Wxhcs3XJ5d)Pnr1_|YtFrBnIjuSaI`*BY5iLpHiqC`zE^6^Q zG$mrP_Ne`tHWC=Ub_qh$s^YH>&y`(?pd;PNf6S00VS4TfwMQPHz^y2kUR z9r{LqO030Aw=m-FvuHZ~3M zqLTX92B5anw6F@ro{trW)gh5_?u?T9iud=CB-FbL%){nr^TalK6Ym&?%Z4crKE3Sr zl+oW+V+li{Tqeci8E%xt;&zjZ3b93!!O6+`mdx4X1Y&H0l*=v7HDwIXz0W2fV{i23DOr?+Ew zl=IF!%sx~M)sKHFh6HV(k%dOuCm;CYS4V5Sc@g_30R=*_da0gIEa=OG0>$kyWKLra z0>rJSyqaLtc;d7hbJHogQCqs#w{reX>z?WO`h#h}K9?qHg#`ywD_ajQx)>1}#bjm)IYwe;;n=12*rpjjH9Vwh8L~{ZjQ9O zctcpPybNY)`orNY7cH7ATLP_~gtobl&pJZh7YNvF3rrq%>CfV)h zOI_iYM)9g%QsUfj*qp-)W>fk-Hgt{6;N5tt5C~077G7-mpd3$J1lUdbJJX?pv_lgXi(M1vg;S4ud z!!ZG&hr7LN7A+XWXEOmHI^2IINR{h7c{Q2eKK*?+h=F1ksVmU6LsvoNZfxST(FFr^ zxNl)>`_9;%1p#X<2S0QH9%HywojLZuj8Le85#XJ*#y<*%y0jN-N)syj>!*l50+J+B zvYTWKVzzIyhi?nXC7HGzC4JI4Gf}!5OF8v>fi-Q@HYCa)6@h&awuJiSnsw5&0~}{7 z)PNtC5{)P#i5$EC=s2#hDbP#nl6B;FC6VVD*O#Or;)qhVsf8ZGLUE3vp!vwnwZJd` zq0#zoMZR$PY=pr@e!(}h7E!g_X@V%TJ|I$2swT`wg^6|%&)TK!x(HuRIyLyxWL6s4 zCf11>_DHyzB~o^A?kK|VQ4t=*H+<&F?s;>^YY;wDH%;>AuU|y5BDcpHV#{{TQ1Z8( zBNxR0CGO40k4S!DD4N=0l7r{lxVfLNl5=lPoPAAyv`#K>s@CH5emjT05OmF)DP{O zB$wN8X1CtV_UFlS9kV=>e5r-osjb^!Kjh>n0A z(GgfnV078(_Vz%Sd~0!{Whmrj?OQU;E&qAsmRSl>aeDci^84zQzpMS6hK}-uw^z!7 z_l@aDd(0noRORGyQ39=u$QO^<3h}n! zn>#G&ZfBc=$lP=mQM(Gd1zV`?FDxuiGoS>79&43UL>`IHuF8%%Qb^AdBKi~}6|NLb zWY-@*TebMq)qrG_>Kuj`{0wQ{b>B^i%5nmimp9gqr-z+2+_TlwQo5Ok@GoaIjT1>4 z2b9)gCvL|zji-NEH8BL#b zw9+yvLRZP9+)zcx@(5WGAxk4<8H6l`kR@^{(S?=lH$)IJKSF+unCC{!^B`m(Lgp|k z@=VU5G)Ofp5=4v?sX4cYdBPeK{8W%3Nh5cmUL+W&66uDXTMf=VD5*&I^2qqq+AM49 zb&SZtiA++F#1=)Am(9a?BI5G^H_-|w=KM9C`Y_FD^pEKydeFnPfZRm-=~Yf3rBUgr zhlDqW(vS-iys3~@;sLG-d}I_fOy3VlO{nlnxHa50ztP>QC4?$mf8DBmrLuArElOaP zets+auBduq?Ph&F;}e?IXTl)d1Bw+g?WL-8Z~7eiR8q}QnZwc0rMHppIrO#7YiIt< zEf8*&T(;IXjn;KU`vva(M^Ow#<~Ra(fGy<;Nz5%}lGU`$u|!9?<7E$0&XzK0H3}Q$o^6MA4rAUv zM=;XWy@d8+ueO$x*Y?E^j71SMsX8JiodMhIEoGN&5JnD#2 zM?2He`=BV+Tgt0&Fu}KOG>JN*rcC~x$oWTZIf6^;fPTjjKFZ;g&ff2C_H2uZ`~ISu zQK!^{512z>Sm9sb0kf^wA3cW?nki>zwVdDQ z(CnZhLc9y!SVx(s;G89 z#dBx-rE2QiJ6Vn3&}_{K*|$pNC0VQF>D-)7dX99fm}@*V6D-ABHRA4l?QNu0J&Ntw zgPnPx_?<3)z=WcqM$xAlLtJxC0k@Z(JXjMkwuD6h9rd?HL~!5e276YcEW<-cWCryU zo9T&DgP%>qLov)0wYQr4g}mI6)6?AoH(z6YOxOK|J5EMihPik@R*!m%rcfxWR47V7-`(X$@3HKi z^%`yHT4ZPG3)X}>t4oNh;?2kMY6d%6#%EFd(|2QVu7mKH@OKaXBb$f1^F6KNWa?xP z9+;qA6l&D_G|bREYPI>S^jkq^tHP^@*9uEBE#ypKgDtgg*Wn*r(D6FTVaviETHA4b zJh^ggkwh-60rQuoJU6+$U2&)RLgx&G0M|P98olMl-u(X2^)}K1dit(42kPE^7euk^(lsi#@>B^t ze-%@$EA^dlA>;lGuU>m$i(4WIlfR2r)t!y^D4E>$Md=G3p81@`0r6sIi(y)2jUOUj zajL-Fz~2{rBCXUprZ zD*xbjL;2zcjp&q*n{-A7-kv0zgqI;+O6P47S@F~=&w^po^*x1qsfxFUu!;X&4|mB# zM>#2+j;1`zdlA2L8y;u5>I%(4}V>pFeN;mgxfNXfu zQR(wr0Y49|e!C>f4@4{_qSY`9&3>c-# z9r2if&u9Vh3|v6n1oYcu`cy|?-|c|4PtDi0#l360@WXv;61AzugmBr}M5^6%CKy{s z$&URjKlinDNH1I+aKzMYj;5zV3wh;p9uA}k~AJthmK|Cz5RCT>u zrxp`2@0;%)KHC6X)(B2`iaob%;n~6kBF@RMvT{w+v(qU()W^;~%UN(UiSc~@yfb!GC%E@IJ z%xsSx&RajDY<{F#ub|OxDxb#}klV!2NSo3KycNO_EpIO*5m4zBtAcgs`rEg~Hp_f~ zpfRC+ah2$_YA3j|N!**n5qJw!!&Jl~lJSf}z!xu7zNAgQsm{5NFC*soh0*+)k) zKajI~7v8I@Tv0@<{6s)o>^~lv3TK76O)nrxo$vuT{I4iX$8K>L;7}jbC=sEV1=F>K zC1}Z0%!6~jMTZWVQ$F2{K(9_xg7G7~r^7=FNVRt9{<9aV;v#%%;Qf8>f$eUvxG#dZN1oi9vv$Qwb1y&hz6#VKKT<2%qZIgq*bO72BcM|4`YhfWfID~ zfv|=fE$#QzFjf=2a4sB>5stcfh2+=pWKF5|7l5%Hg8~(~rK(rTXTJX&{0jRx3h#!O z7)>Bj#>E89Gy=-$nd}t6CaU={*PZ`hr-TJB3SGeon6}_nBMV@MI@mVU&?||@mP;5{ zB`<`H)$e9ql=0mTue`CY7TG{oL+_Jyt6pXC8o|pruHXpzRSg*mS=f@_M<@G%iY~aGWWCA`;~(hN~MUSCT?P zk&X6Yk9%On!EJm&VOL6`U2i2B>17WsYX4(K`GpWW&`)tM36Z$Y)+(Z=WT`9)wVyS0AIPHOE+7 zJ^x`fMbekw*>S6mI?Grwa-8kJm6>1HoNu}kY5{()tS6ymk9u) z+tbD~m6#6fLC@lMmN9%bEaCTgqSdxU?NgpZiD$W>To^_o)u3ow^mR8wjAm0oI#&oR zZMcn#R`)l{wSiZHnUj@$Iv43&=-c5{&liKD?p}la1yd>G)z`eWoCbt4dEn#Uoj0t8oLkJ!|%Uhst zSyDN1S=`;wXu^%*G(ndaUfXFdRYpKhbFW@wkGI$i<-(~{xlPdfKO_~SYWGc;4PTtQ zEGBQ1!>P3I?V7^sf1{6K-n%1s9)D3L{$hMQ5IX)rxcHm#@eiU0P{9V|3@`vZ5IocY zMhGGZ)CEE4-~a%Xz#tnlai|->fE_U>z{LyT;o~*>H%`a{r{ix92ZDNHp#g>g41fso z3&I1~4Q7N1lt%DD0761QX-)vggAsKEuY(J~@xX=oLoNYIxxr*KU=K7HA)yD$2=5T6 zlZJzt#UBLdtS%!C;Q3qqUv!iQ=)tuCP)yRRD{T}~P)Wvv@ov!m<`xt8^t)aVzz+%lALm`@pFk?R?B);B}q|#uGT<^cN!QaqeFRAFj?au7nB@HhqpX zB_rKX$LRJ;)Wy3vV}GB0GD(`tmUA8NL>#1&9B}2-b&#D6F_O^^ zvTFTZP@gFfDY&f31sCo}UFT)2-nz_U#hgDoYp8&nX8RJjd2pSE>my;}HK5)!cd%yW zb`s4W1>=w~=Cil-$dPRA>|b`c!d_1cx}-tBccCUBxTS0Q2#k@E?8#9CD8^M9bP|1h zoG2dn<}xkm60hYxYpOQx)AS|5V4N;WHZA!ZZ)zl_R|EhIPZ0Cq0UWDc2KA~hWGFEq z1du*`U-|V}%!fi##&2jYUkSMVcpaX>d@UBa`O(>YdrTkABCE6G zroMl9h}ruF-#?%epIAKz29W5aEDO+$pjbtHEJ2`38S{i*LR=Pw_|48&^>38l1Gm#H zHw`wKVB~d3I1<%g)vO7xi9OI?uw&%>)Gr}`JT5m?;eQh5Px`a#kdvQ=?L&2QtOKeS z{=UCTOZ*~%E~cn&F2Q?zg4Y=#>;xpgq{SoM=r9Oq^CJkeP;A~zM^IZTzMSk8z`*G}in{H}(sn(|F#RBM#!E#?`4lC*6`-%$T15{%_FSxN|- zw9eNrgH)U4n*^u)*TiUk=@y?k!k+mkQZf*iqm+}CQ;dWO1dz3VHp?m1EqFanxf6`j z=2T)1;i$s&#P=lhd@L9S_j|DnbEx7sF%(k%$lR*6sPDk&pyNPPL)JhzkALtPukGZp z=Lmc$?S<$4L^LdomWk*yMv=d)_$xEk^r%5;euWwZH!(WGvyaNH4jJ!j0(IqtB!yHc z>6_@)WG(493F#@$i`z4#(JnClQ2Y8y_thh&!>8KJ zqfD?@hH7p(A9c7jPc;n-5{h-x-6S1lkManOyJSBVwyNo8Yi7bUw*rSNIJKsv1vCXD z+{>LpjvuedJY{M7xGb@x7R+?RWW{8m>Hl2?tWi7aGAg~Tx6O|u!IkrPP~9fG0&4A7 z@sc<=I7PlfK2<)n+mZMsRXW8G>gLud=e60j%e9~utCosZ(>B8x?-=v<;<5E_?%$5a zc^0l#hmy?Ka*2(sev3fNmgva- zZ=h3-92k$woeO>4S)E@Umq)vYnn!@gp2x*0#x=_w-5u3+&T~PIc)0G_tc`BQ;BQN&SMIj zx13LbBcFqUGpLWM_wjmtUl8jci86_J2wDh*BH>7MVhaQura#Q;$BM;T&yuD`rrTy< zSH4|+ZEXilv7fVscLbUfS!-C2Se)b+CbCXcwVzKL9vd#y=5M?kfZJQycT~?G)R_F5PFQKh71NE(*gF(wgYZ`mb3pxXQQ`KXx-2?N`fQ6Sn6+RKa zd4E$N>m$#;Uw@zXrlUF57cQ3d^|9EP7~Y4z4^lscX&q!uJI^EPPjEJ{He3Tf489-S zOTYN_xg{^URD$4S(pz`#RIE4{?u+pEo}5d43?7C@GiWkx86Ml<_6g zq{FQvrbE`E&|Ih8IC$hEQL9Ub#Y*PNm*rg2YnIts2ce%L>HJIA&sU_}L)cixb(5Pe zFA3Y|lj&s$hdyz9V*PX=ke-aze>(g=yHX*y2C~n#Pn3!dMTbTmbOSe3=xP~a8HMHF zDb1$}B`&9`M;>c7lW1b4=3eKHXE!NhDb1vvzZ7O@6h@7YOmNhcCCI_J;jH93;S4J8 zEe|fw>Pe2@GvuYmB;)~0Oof!yw{cGmjJ|J`EHmazT8Yz$K4IW{eIc9bl*$e@t(`hn z>R~*FpZqZP)&FTUWboCXq{3BtErfD*amAQQeW9yW-h%a6vO7tnF;L<~D`#>r)0%n(}&DvPi7U_SV+wh()7<$3gNZVdp|% z1N0CwICZk&HITA(u`B96e|B4n)lV8`3^LlSTXo2DZub7Vf__8u8jT7~Fo-g!4k&1} zPnOE(wH$mCJEGHAoLW!fus7U!8@aTjHUXWNt)Xq~s=wOQzAoITcD5gw)pI6=SFffH zHwND%T~K&)-<29|1B6AkgHm@Q=#1^+(vZrG%~GHdhimppkfBh$eEX9S8G zU%K7OjYlWuwuv(THoWOwopSx*cFW05EXZ?jczQch{Ke$kkICP|```oEg^CIhx-S7GYS0b+3gVF((vs&ub{=Up#9m zdjzG~#vVbmez>;&OVY8i7qfS^`5V9lO4?Xjf)V8E2lli9fSU(sXJhZ;3=rT2I=Vn% zUKdngP9wgdp3{sd`(XaXP*vwy>_UF@yQ zoLuZ6W-c%Q5NZXr2iqdNnfGv|8Rh~^gks2KOC@%Lm{C5 zhZi>|uq7rtfP)j0{hu!YFAonF55N-ePmB}M*gU)d_W#5 zLi}z40MxMY03&h$0Ls~0LJ=+EpN38y3WWh4eoye{ui^|dbAtWJ76&IMHy0*7y_AYH G=KleA_I2C< literal 0 HcmV?d00001 diff --git a/doc/performance/Intel_IrisPro/SSYRK.pdf b/doc/performance/Intel_IrisPro/SSYRK.pdf new file mode 100644 index 0000000000000000000000000000000000000000..48c968e1a9a05b243926f766ee6c51f5594a7ae0 GIT binary patch literal 12898 zcmb8W1yo$i)-9Zb1Ofz4aCd7OcM0ynod%kwacDF+1cDO??(PyS5L|-0djxlPm)GQ; zbMO7m_y2E<#~7Pp&%J7`+QsOaYgNypQI?Pbu(5EW(D*L5CP+K!6b8=o@b}n+Dl!dhu{0)Fo*6_)ZfI?s<(8qB* zI>G162E{e?8%VK=h9~NGtD$|uZ#Fy7bv!OJD;wt1LA>{!ozG2|o+6kQ|6Js5bM?Mk z?E@Csid>Z3Gpd%GbgwCdlVACe`sf@*eZK*k4`N+|ZC3*OFG zBLi0SLY~L#Jgt|f$5kVwna!Z-hlgFW2f5Z7hpV0rn2*aGi;wHe1jF%%`(wN(RFA z*49p11#eF_^p6_lx&vu*xyy1K`W#x!o4B1+n{XKjYdi*~9nf^o=QrX@K`F7}7s9o% zd&!}(X(Ir3sX`!;PYyu8hxOBEr<_dI=^bd3 z!hGE=hj;VUR$|@n}48tb?W$z9N%I1+o0nVT)cvHiD&n{H^bYHqL(aB;l8^Wfpu*4pasc(vTMlW{wO2Q<9jKfVGg zMU?EkOlTTQm!a(hdf)H(++2(lW{ddT6^Qtp?XB(bBs|=2Hy*TH$6JCLnPv090^EF? z#0wRc2@0Lbc|#dq=Z39UQ)^T5?#pXk3l|41_dDo%hOtwDuRZaR)*^HduqiwT~qrUA&zcI` zoPViBFUAiwtO=9#68R2MYfYG__=43Lo(4pk$6=WRDw%(7{s!KxD&+#LQL z_a@s@X+|L@pL>4`iBM^_;^i+iRW5`f73fELrIg(rsn3}(-9m(ybn~Mx=Se=hqWM;C z60P%_2jmj_r#y!8FU_H4De0bOv$EVi?!gVpQp1;G`yMA z8gw?|9)J5~OLsKs>F6L;Q{O;lTfcFdbrD3irT{oR*hGrSpp(;ouVyunq;nKdcl}&C5{)ZaO}%>bk75TjqBZSql<*9)bj_$cq~tt%<4Cf(@y>=!8>a~U8XZ71kA~-- z*LgsD9Z`?IZFW6M^WI5hN_2rjx}y=2T!*mGcZ%jnGC4wXWbODfLkH zqNp6-IX8SpQ_v-o1bHEgXr5#udvK-e6fGyhqpx^EI=hzX_JOniU8DwdeS{s0h4blm z#YGk!ehW}%88Z(Un>nm{Ap1=XlMY&>Ks-e}hAGl_u!%-xByH-M{}=2DP)MOnFXItb z+2QfiuRks?<*c;O?Ox&92@$7 zxn$7y;|kpEYC><4*V}obGsV)%8omfZfh?m_l~4oZRr*lw0^%9NF;{Gl<(K(Kt3)f{ zo>Fe3*A^2m6PaiGESdMdH%^KR9I^~~t9ys8Rdn%nu%%r{wwy@{`jW{;@;5rG_zDmPjgr=_;o^@co-}%0yO%9!oB0SjL+=asx_(laaQy@y z=aI+>&!bljYYW7FX(C}U{E@ZV@iC=OZjUG6V(6VF<3fQTBpyCqI@%b2UDv`0fYP5a zB;kFG`?B6xh}OFx*=yMn);HsVb)4i}7}vXl`8C-&(X&*%*X%s#M^uxD{})rAM9(b@ z8ZuarPVyHLIn8LzdGIvt1-ilXOlO%Zaqd#5lKlV zk7RaC!=VK1%2vEHJi_%FNN9d(e9t4uWu&7+LK}z|UQI#_Z_s{U)dwz1io1B1Wx0;M z{fQS8vLuNh(AotycC~zcy0py724~y7#mA=i%E*%5fU|I~q@(kZT@4@opjqB*!yodt zqpyG?J_``jA`9dZeeUCh_0h1jI44z4sxp$T@E%(#^}BPq#xAy0k%qKRy|wa>`K9!X*UGWX+PFB(QbI453K4L#GHV% zD(;%jl7Edk&$`XKD-OGWBu5thYHPo_{yOHhwn(2ZX`&O_7#Ggopw7Ew)2FNBds;Fl zJA*8=m(f1H8zXdMAROyY#A1wb;832ooEfnfN2ppcF2#mYiA5bgsP}7~^pk?w>&Y>~ zKx7$R1t4hmmBPv!!~o7`yRx}hDTxJh^t7C$qGEg|hm0}}6%@Q7qS%B1wW&r2-8}w` zH*|*?#af<5kOON$7FWDFPG9DmGd zci`gKoOc`Nd{<0+*b>uw+fIqP9~wDg0wu|_|fYi5%i3_6KYFOLEDi_J=D!;*2<`RjZ*?= z{&X}`^wJ&SHlY}YX_Z2jI31rL=hwSxhHPZ6dU&W)izDYBm~^aTjNz+)xFAum!UF3I zTd1pS=a5ug7*4>EaHXf;<4U`V*;S(IYKO_PtifO$>>${AvUm+~r({19#`sz0JemnD zmz&{#okKs91*eY)=6&I7dh5yo-d^j#ixAX^%s&(~NpBEMPm1;Ht1&QCx0*zYZL;1w zEl{9NX||-D6utOk3xbEnfcSlnNnTnz+$|8PQI=JPx4ThEu-l#&N3~R18cD;p3ob->AvYC z*_jl6B(0p_1pgkEDbch(gV!Vjffj?6Ni(NtRp6v+zJ6mY)^5fYah@h6!Ookt>(HSR z2QuH^2U<^k!2*gIRLH(>-ml;&=4L(DoTbI+cuj<7KGwq>ubUi%qn%KKP|-f&Pueuc z1fECVYZ`x(-dBFPOrNrq4dU)))em)Jdg&`UU-1(68LD>Cw^tUi6tucJB#GZAH}Ti7 zE%RN#2-d6|GTbqgH>3XFXPf%95d6PSUNVrrNPgb&>UR;JZzX|12p^)jyyAJj=t1;k zFtA{dK5)cD3pIX2^2U9-Wa7s6xHqGp{<~we{=4S>??Ux zb_f(R+666P(={=nQm6=(|1t-MZNVX+y<6(OCtbzx1=g=GDzoD}$O@~C101mA&(9I1 zQS`i~MxTld)Lip7oh+m;)n!=xRp-Ufj&LU=X0#ayc3B$xz3TS^$OG`j7{mux^ zBAzkt0nfW9f_9X1(1?XleTzIZjun4=!^Cg(F?)tng}#@dm-aj#PGnREd|J<{kT(J| zqwSgP;eYY2i3*~j!oy5*uKp+7YC10mX%gK#ycqoDVD7uARo-elF#;0U%ewoUmb$%$c7f?$QDkpX$+{6!JE<+ z14Y0(%F;>gMx*{M7wsg#;MS%++$ct=M3wiS$Rp}Nn8WOxt*TrMg2ubar^ zGsvt+Jbtd2hivrd_LQCT>MmbZPFH3KMZ^mbn^7IAEGb{#Y7jJ8;F)d{*XiUEPl%m6g2b*i>PwX_9x0;qqz-R+JM9ZW7r2CgOW>JUO5H=nJr#0nw-1kB zg)vy<7?%zPh!?!=d$IGrZ0^%r?HJ1qL(C)55(R%KP+lfK@;y>pHhP5q)~q_-`70>Z z-YfJ@ATikls-ail5Q}`qdH=e=?K3^>(wE39;mwLFvW)}t4RB}kNFE8D&uMdqn6%@6 zR%V*mK9So*_kLdLe2AQC3MBer^u+Dw>e(!~lAEBW4jny1(Iz}i*w3X0Um^UlHpw|z zG{XCQo7qr*!+hLPDdk>wk9{?UX-PRov0G(v`oy0ed7h*r!GEP@3%0m|-Vt6IUt5IuTO`rp1PXsC!$_Py9D|oV6Icm*8~L!roew8`M<0w^l{$RQIGU7x zt&LahF#TEfvn=j>4@Tqfm3Qy5u7mF1ieR)Lv`3MJw;*_OO5Wi|R7tqkd8bE_k=!x+ zUcC_xWuXuWy$A_P?9=yAsM0C$MrW6`m!SSNYewbj^5wnmV;!~FMJat`_Z)@Rh%nHl zr()T;I~zWNOmCKDCh=ZErLj`Nc{)w;kjDXxj-)M=AERA%-$T0^S7?Lu9{VecT=bW1 zwI4jgbK`6hh|hz#cy$y)tarb4J1$8S1bjf4eti(3hh0_mI1PmvSm#;|m*a?!BqETn zk>sVD51aMmzQCHj6HfWT!@a=8?L+nX*$KAss-^R@k9n=Q$S!bpnUIrj;;$7odKp|P zN%kBU+faN^PAR0|1?Q~vn0aw{_|9$$wl*XK%~ldhTQPX^x;>B4cS|i;pDQ z38&R!H_R-&oA4#U4z_9WUZ#uhTzM#6_{&efy$Z#>xP^LuPyFc0oXvSI3wehEx}cU) zb;oNl`B^!MKF#Xx*THlI^t{S~*fZ*9qtURkO=})8WBa}kc2;ey?%`LcyuMekcg4Fx z$!D7->0iiW^Myw6w>Q3^sn~@PC4PUV{;d-^BODt$^@iBgj5b7*+DQdjVeKZi3gh6d z(Juo1AVL5mQEcjW^%f;JY=H|gqs{M>_L;`hg&H!-e+;GTXDcxAx1mV&1j5Bqmh^vC zei{>HWCqzM9ZWoN!R4cW>=L$iwZxW{&``e;C|M>*D*ORAwlFot#FE-pbJpW(LQZty zVB>%l&IUYAmD1Fc6)(o;+&7b}@)?X2OZ7>3UR-0N)9ll-aKZr@;33sxWBb6$#KU73~j+zRI9JkmbgEc7}e89#A;wo%fDioi+eS`!+rIDuS9P}tiNi7)nN#d6DY?F2+Z!?i{M1?oeQsQvt zy_D8Jj%3hZ_0l_uWJo=h>1n6INNSg433tA-zCr2}51hEF8jlP={KafSv1VpQfm$jg zfU5IqWlpEpI9wz#KtZamAQ_u}#!<5}@b(;n$R1<)7M2n5g6)QQ{BylJ1g2=6Rj0;c z|BYpa^(R9@qE3R<%e`26!7l?`g75ojrl-~juld)od$t|Wteoq+3zL6|X%^@&D}lxST2{$<^c| z`H8H)U*kZTvgZu2rM)uIWfcPz07>@ca@}uCvAsic->eWb7K*zh_fBHtJd9s4M29kk zwgqwpmLYAYA}?$DNu^kH72mewQVmT$`y$E*Hw&ts zN_cx#wz)JqUf(49YRaj2KCX9}nWaUxeea~7;U#XJsU?_rm{*fp4fR;VP6{A9ls(G+ zP2z{wK$naPH!sJxPUTgUid<8bIsszidhWGK{6;=C`B-A7cuj?+{R;WhK!%H?7CDY@ zhZ_+brHRHPlkG{)yfpFKLVoN51c(998t^3V_R5 z)q+2EnXeYerU#irh15{V!l;K5Hy=A2Mcb$I4h?3{a(pw={;Jq}iUv&5Jxnq2h=N;Q zgq-b{?=Zp54*h%oio4)jWmwpFWvgjCIfK!!)rMK9pn(z&SMg)~O?Vh&@x-UP}h(Bw%W2EC{$VQXS0Z zGU!IWkQc7ZY>-lo^D%NH;0;9-X`4^WUOwz^Q?5oA^HT6zCYwdczExkFzwE*Z>BC5u z{&KCTNn7yL?UdOV`M{Kqt#Pt^o{SZznQIcy*iwHfTJ+UtJ?!8Eh~2xm@b#MvIif%Wr^7Gc%*Z8mYW4vF zO`55GF$s-VGM3683>kRQLI#fmrn(RSO@6!KLsbD|hzvs}V9F|eFxQ}-(Z=(R z4__~|mMiYHPD0LF$3LC5$|l@vjTaIsWv#~*FTIvy)5u{^Xn3wh51zd#x$2rX-0GTN z$Q`(0(%FpVwo-Iv1Y5%0JSiDKR0wHbFdXE$zZYY=zh?w37|OJ_v?jak-Z16OUhr%k z-^G#d-c8*?4$7U34`AbKcMD191Gtt$^E8J+zX^5wZ4a%A+7+SaA_g>PBPB<(D z#CsYdW++_b*;HB^1ULdcN}*tnVbNP(RWB8hCawH(w}X2m9511zfl*MK^r@-#{ufu7 z<8PPGZQpR#YBtFp1$fR{g#x=uq_%=y8;0VA8yvA;)Za`moqbV+*`96Pr!`sR~O(c+B&JL8da<#&tNQop+hTyxdj?^}0f zFBZsNE*9n8dBsq^Xz8z+^}GNY8ki?HU(UZOTmZEsW^ChFuHXTEoRZ!z0SzB49zxac z??X_{aZZ*8W?C~wr zOY}~kDG0j-VQEiauH%2bNw9QvA?9)Onc!g_yZ5wb*j%8c7<4OKw1^03}J1-F%Q z3FERN`ZzyqO7~FsXf&25Ut~Q?B4@W_k*O-3R;p5Z?qp6adsk=SNHIC_vkBeG&q6z- z9Ycq<&CCg!W%JPC?kDp$glHduN{vpx_$rx1SWofp1Jt6(jgn2pQ?3jN4KFofUMWfb zXsTI6ZsulC?jjF7@Rg0_k>~yl42%+QeLYwCn%L95Mx<4hy7Hy>;wJ0|ACFz(;D-Vm zDelrm9K-0W@2a)5;wtbw%UL#%a-vwWQgpMd+~TWknm^@lP{XYaPKv5*Dx^lfQ{Ji_ zE6I;?0hC@WLC36C@wIU`&W76FuyqKTe*AZyvy}g)lNder#!h4!bRreXe95z-o$#XS9&>*nKo@~7g-*&gyG1k#50NKW;h18Zm zok2}l#eGcl2A>lgDAuj;Ye9HT)NAB8Cniy@swZdk%-w zEMN|&TC<8S@w9mlIr;PQ-1&2+2Xqg=HCxwOA<>UK!r4M?$0D8LjA3i3_wi!9KXBA- zwWu|O4`hme0cDHZ9puM3caw8120fyu85`+~2nQ7VAdLmxNL4)Mgkfg0M4VKej<)ia zV!<1rgzEN@24GP@h6{KcMZ_JVDlWMDrL4B4pcK9!U{QF-!-$YbQ}mZ8dko3+;2->Y zfAKo~9IE`Lli{JYX9-qGzAHkv2KnD5S+bt8%*gmM1lhPpq3W;B4g(#0)x0+=lGc}+ zN;|#^QVZ-`+jyjQH(+L zG*3aKMB9OAE!`2qres*6`l{Se2jQXvvdYhNmaHI`9+yXQW;%1Jq zS+zNjs=ASIggfsx%BcKxoBzjUc7on_I>TgGr!5J z&p+n?hym}HMB~s{+T7|S$yI)1e=2}~3uhh~yOlswOvJKCZ^nqeWSL6L23HX*V|R6$ zTnExrlpUWrklIdJ;4>aez-df#&I~qU~-m`lM&5;ydrvY(CF4r9eh!}7qYG#hF5f&pV zuXwkwA5A!9g`6y2^y>&%v8|xA)7pFZLFy|_sap;w0gCstyogxm8MfdN2XVT2tg|n_ z%>;hbi}m_ZG;gV<4e$76tvQDYXuZ^7t!|Q?Ic`9-yPwgjp<>a&vD)-&gMWZiiO=hd zdcEj>vMo^eVipDAQ6YOb>aemGG}e-E0g$tpQKjM!ZDJhQlRErV8W7w1aT{$35tvW| zWn5z7ySTrgen{*UOw?jFYS9m#6nER<&Z%}j9{63XRRc*tRc}!bc5<;W7+3D?EC&tt zHw3_vIkXaxqxNaLyyETjsduFkZI5hZCu!RE{fnHMU!M;b>Tn=h9PX%flLMge>nxT{ zW<_@hKNtMP^c8y|#?n_MqL1ohpF{~?S#^__PJI%k^@2en5gUYm;+w4b^x~f$J}||o z!9^}6e@RgOQqVjKWd0>!`CCErFX0SO-onI@T#x)wFr!Kif(vV4&TxSY8#z!BYGMHv zgSnFHvBID6ad4A!@ok@;H_1DEnZ)Syn}dO*0C2p$3I>I6aoO2PFtFOk{=JY5l5)^#&43BONb5OGbL;fH@M^$Mta<0G4 z|0C~tgdRid0cFL-#lVhG6LRiHE70G9p~t(wUsv=|p=0|P1SkQ0ZvlZmIt9Lii-3TC zNqN}0__+T+r96*g`nQxPE#5_4Z4NhRq?xB#IT|xGcGSvT^_dJ>vrH5VBO5ZN-1o>Q zl-v^Pb`mO4`7hb&7eC~PM^#dW20T@%+<8tQF==o2nkZh@q?BNx$?ae$Wr6I%+bW}Z znDqLRaIiRmr~D?o_WrG zvQEOe#A|LqC%?L3oDlic_T(WWwqA#l?TK>Yr;X#wIeh}WK3*ZMRGM0)zQh_%#bH*t zVM-l8cFh>aV`e?i^CTz+-wC_FAi;p+@G|sp^%vbrI3IS)rzj%=yp5Na_3SOQkdN^4 z^=&t7wUWT|9rqLZ=wB@Bj%p^>07x`FyB}c1?a87{pW)#+PBz0lYd0H$uap9_ynMW@HUOIz z`bi(qZA$dT`*;I?pM3%evdfNhE$<|>Cl&e7Z*$i{HWqK5fOeiLH{Jye83CV!er<6^ z_p&E#@H9~ETxK?7D*5$GU;b%^6$5bd;5q}{N8HeJM78Dj!5Y~0B!)NIiA~&q$JW$6 zU!t>jc-iji)p|zIB^lDaGbt|CEpPjc_#-HkOo_b~EC4@1<3zaNmnd z?Gpi!mSC^LRvF$+zSVukN&ZCgEyDI&qD=&x@6T(#W7R#uSVVXxN+0~(-iU3{?{(x; z339yGAHHSc$NH5T(G?;(XJT=F89Q$y%S->6EyMK zOcitN>o@L5RIJGPZMqd$)-S4zlz3l8_~W(p9kTP1vG~<~kF!JY#N78+>PT3`(nb;S zEx`JKiN&8C#)^g8E+rP_N{x&~Q4)!r`^@6)k4Vy7vieY&sDfU-*NBpmY|09;sQqMb z0+zGmvoy1~bBa}vl+y}@Cp9MNEm6*}6s4@PhsK6CpExn|6QqT)NNHuJjFN0tZDO7B zUcW*7m<9R77J=`hKur6l>RA;*712ZlUjRY(CvbkH_80zX;+;^mE{6(ZYqlB`4@?hi zj~4iRIc^t`hb#M@85MeKH}XohrI{>N!s8TY=-%>>6`YeCmASZdDFp$1m2TUo&?F zFN-g!gwo&8o6$qm{fm{LYW0)OlTzC{+q`Jv9QiLsRW0(WVdfvJ8Qz43rpZ;yrOSo) z+rMET$s!s<*xWj0zXo4BUkhjeG?X-2wrQuhr7$DwHjphs5_%-d%HN;89sb#)DK|Go|JwAOr z(LY{1&YyFpo>I1#l~&v=e&c_hi>}e5D4kg)zL7f&2}J3Li5f06Ic3Xt;&Q%orfE29 z_}IYV-tDg99^k&`esPL?&3s3FM{?bKt%k^tNP+kQu@tcu?I@7B<8=op@Gh`C{8PB` zCvJRa0wH!y4s_OG9$T&!D=}_+9v#zhBZr~akbZVWj^8%!bFa9oc;wki*ofGJhDZiq ztTzq?0Y-7(;fjSJh7l=XPsAj3SVJRp#{nM!sDMW13>^aPET|8(5m{Z*#An zKw}(pHS-C`Nl9rEV5X+~{D=Or{z84p#`_U3TQl39+Mf`YiF8XckyD0e+(&M)$HV+CO!~2Y8W1AY-7G zBm3Zkaa1tJT0b-x_4&oWD5q?u9H2aqNRp7vcF%c}gOi<-W7y-`6Wb#LDK*yWHVB;v z#_4qKfvn`Lq%0TUT{Hixw-am=&f;CVrd*M93u9rP)=q7?yu|LJNu`m-9t&X$0fZdz zWu+nxpN@aX`zBvdXT8s|kCToBLxM#g^aD4PsOxFt=!E3nE6%42CM~C{Mjfkv$5ls7 zFSst4&TDyxs`xYGoI!}TSqLE^D$!nD1}h)=hW#7I342i0U{z>U?m%k7o<27X3N{y1 zd@k&3V;AS#$mEAkiLVCi$t&?{F(`ka-7Y}!xb>Yw8AM~@lbr$>i#foI|t~7Y^rSeR23$S(Dmdm&ytvExkAJNs&ioq=F z5zRr(rRklYziLV7wOB@gWIsv2Haj9gr28Tg1QK?el)+jmwP6+VU*&ZP^yffO^DVKX^9il-VpDQA&z058_Hb~!ca&flx=lQ1M#%cB(RNB4bPFZt&3d| zxB0W%O4MPz2m=$)Zo{fwk>hvo%oU^?GJZr7M1dgUpa!4-XrCaR$8$OKCT>EjxjelQ z*KTjT_cm&2M`Z>!FH=X++}C)usd-(xQR`?s@=M1N&#QJdeY`pJCi#NMoAa&`v`sD~ zye-6kXw>#m0P?GnP6YaJ>fN+SaRu)3AC@?w`YGK*>qQ3?F}QTSm7R`BD(DhndeFZa zT%B`ib-iV0eIvkiuYYhBjj?gV3$Oo)@v-s3U*aDaI}iMb!#^+%K6sh)4~&x? zeiGq-Vyx_J|En)6{9wjE?BK^W*#9{eIKm6B%l?NyHg--Pcro}l=Hvjju!cIo&w-Ew z)hyhh@Hrp{%G#R3;3eXpf=(3%b0U8{pYi9v;^+i+aQZV_Z0uY-d?++Dl1fr2{|A(_ BKd%4) literal 0 HcmV?d00001 diff --git a/doc/performance/Radeon_M370X/SAXPY.pdf b/doc/performance/Radeon_M370X/SAXPY.pdf new file mode 100644 index 0000000000000000000000000000000000000000..9db0c574c1e2044f07c0c4ca18034ad2e31c7eef GIT binary patch literal 13342 zcmb7r1yEc~)9w<2TW|@o5G({(SlojXoIr3{SQgme?(Q1ggF6J*;1Jv$5-dRj!CjKv zCGYos_W$=*-K{z`r=RJWo-;$*9kur9I0hy%)!5o~t99*1WP988fm%wvOkdm$G|Fup7?qr2%^%^Ma zU~FdrhdKgPoSp2Poe<4b9c)dVp)iMsS}t}lI~M}`W6SmddX6b7iV3qZeHaD!pQ8bS zG~DfAK#-)dld+YpIS{03Yz}h-av>C61OL^4?|(FS(0kB!fY~_xLmeb(YvY6v9fACR znuA`$9BrK)posps|Es?Tg})!39V%Ya$EzF)h-DvHRBf&$&`&l=wE2zvLvj_7|D_LnMG)rkO-+YZj8+<&Dlag)Jvvrjs)_&VGlFk0b`3rlE>pfF&vz)Dw z(XWkDRGF#{x2q$uyVKK|D#LD%){5W1_KJT?_*!qBrf7BEj=@^J?pXAFlz-p!Z0eQmiD>NR3KP zRuUEviQ7?)c1fG|`q&qf+*1?rBkB=ESs!N6&dr9<-d)+B)@MX$T@`l!5eKGC5aRiv zGn_Ml6wBkP-l=?O<{Ll3_FyW>en|%OK-LkE>}7p!{dBCFHL!iw*XQl1sLn?e#nD11 zj1w<-^KfmlO+z+_su3HMB+(J0RCEoRAdUH&4nMcXmyE(1=vX;9mijG*yQT0k|LddA z?bO3f_Odf5Z*MXe-o11666Wy=+wX;bKD`N*eS6bEHJsuhyCKc(BbIpdHKspu_BywM zCVSl1t04IZ?(HUj5@SqSx;0|FSulMB7v}zjc*#1lXbbAM-5)UjeXVX8j~BEVi*f(e zc&0%{__tQ}&!e5$vXlu!-?LttRl`;9-@e&1$BZW{TMOCkHy1nEzqd~>CRAd|_t8@n zf4$_qGdJ?_m-o5)`TPFx;>*a@5&Q3-x1ltKorQu6^YX7@i!rt@9MFTx>*vgFfslA_Q$&#ry`cEQpg zsofr?SVW8?Vy6*B*%%gA^KSTwAs~Uz@DM*rH5BvhJ4^O+(nE^c!C8T%eG755*3-8p zL^={7>8j*=6{l4GqV(M+`p-JiykCX0;(23wV_tdr(dw-0u?$UR5uC7HE1=3fO1OER z_DhjwjD*u5@E%5}xT?x;dp;$t<_WMk)tvlgD!wLx6IUu0FppHFxJJWb_#{=ovfaob zLc6S5Iv=jk5RaAzqu>zGN1l7tTmJN|R#d&>OaX8ICXQvI@V!X#lBg{Bd(*4R-t5(Cer>|`RVWC>q3C4ZjY`&o{M0X5EDM{TZX9PwM zkT()l?K)Q5;oG~`0FYexH1W5Py^rVJ0an~VIpwx$e0M%_$ujKZrBzsEpoiapOGi zr4o+KG!3Y3Q%SpjWv=tkyyX3oIFTDk8S+mK#nocrN$k6Nt+f3d7xt&VMjL{YcykEz zudXY}=n#mxVW6d^-u}yNvEQoZSU#yS}@(YUSO&Bo&l4k1JrmV;7 zYL+zdo^{rf#BS!nN19Y@n!A=wC6GSkg2`SEH1rCv?QUaC2vN2CF=jA{MznZ>2kd2$ zjw*2l1ts~Og~g0qs^XEF1v2?QoAwHBI!c8g^dJz0&ZBLcvs=Et6|R>ZxOQA@EGMR& z6QJu9g;Js2+S-_-^$x;dWZ{qPC9T^P(a>s%^~(cUGoq1V$xj{IJ|Jn&^<5AI;z@-n zO1!b&4W-3ZM%AoXD>m%@0nl7N#v@_NHS71%>yj5V+vAcw)vO*aJkcHd_I%d3zA+V2 zc|5YLgWnt4*dJ-#5m52eWb9Q;&9b#(QH3=Sc5YvsX=5?O0%K|{$FOH4Bi zrA3m01tvGngDK0psclm8`1#rnfCmS8z<>rhtskVNs%1oowLdZ*oNLQplH1iorO8y2 zzOTjA4-JJ?J!e9IM?Gtg~+rvZJ_s;t~$@}80rccAAx+QeJd@rX=f3xs4dQcvP#nC31*-R3vap;kMCNZS-VzhjhCbTiL+Q zPCA}ESc77Dz4-W$({Yx+6h(qKX`>a%NYA9EUST1h>70$SecNvk84pkycolCYz83JL zyB|4-bvTyUpnl7-J0vWN;2H?fnQYSdJUS5eoVj|UUjVm@{pEYzCvoj2BiHh40XY)U zVIqK80hdqyk+N|W#g~EM@(wJpgrATyX)s&H4>W`EOrwlTmb1fV=aF^@%`v7whf)WG zW*v-^+~ZuH8+ZHAQyzxcYNOl;C7z*ju%Pz|7Z;!wWY8$jD zf!YV@0rooT>^7YRkS88*p+%zL3uHW9CGX~nQrsq^*EHxic9|7&nFFRg2Us(q?^niM>mhj3U1L*F<^g6k0Jd*tfbi6?ZwJfxq0IKw%Gc zz}`zSjfAV`Ge33&#|&YW$Op^r>UFf%Y9%!MTX|}@DdkZDO@G{>Og#_5puH#1j#6p0 zxB?0C4^+O8d-Z*SF#i_a>7m11efCnusM>B9#_DpwXiJ38F2e1Ng9nyHCj?x@{8qAq zUcI3kAb5%5N36_t`F5NHgM#8RJS&atn@qeZbX&U~X)!+B7$8(&0)Ti;Lx2%@fP~`M zio2Z(wee`E7$ziotL#Zi>wTddBUN3kJjj}`D;BlLC$@O936hy-G#k)Sh`T!JV2Ua} zAIpE4nTM<`S&&v74$xk0Hi|gV!;lwbvb===HYUj!i*?Y+cDnK9k*X=^S>mX=_=&kp z)l~NwOH%{NP`a)a@urz^Ap&;vb9*m4iGGbhux?4{ZF|h@3|<9?sjd?}K_c^37-h_s z>mvg?i@Kexi{m=n3a|ng5sqYJ)K$pu#LM@XLvH8&?PHFqBlT=I3rfJK1DJ%26JiIL zsfgN7sgFXAFM@G9f&F^`iZfsmU7Z;)7SKLMJMcwG{wjcMCyMPFd~Ldh)OCH0)fpOO zbPbW$M^YrkAj$HdIJtWAvZrgix)s&JO?vMHdQBG|F0#qX(E>k~tZ3bDIFiVF3!xAaH>8(yUVGPH~cR!XWagqQLu zcdU2{gs(63sQ*aWdjz3C=k+Gp@dR*WT$6N3sM}aSN)-qw#pA-y_1ko|kQ>=o1)K@$ z)6ARps|%IUk?q3M0tzO>>!XtD5RWH2o4P5@20U%LpOy_;TWCD`>!v*c`pMny%g8X}ZG-Ghj`pB3Ksowuk}uvMFrb+^OQn=x~r{ zcB?7OjJKrGVXy@|hyYh7O%E#}Wl-ex#lAF2Qn7UJTbCiu)Hdx;4uivpLJl3Oi^%7m zR95PcBQmtMyLc#vdrem>2qC8E31}m+|H13O#wT&Q*9@M-B8v7qF5Cw+QuD6@9}giF z_bv9*zJQ*Z*puNodM(E#m1;9sgWB+4Z^;dyZAwyP!rk>H#f*@XD^xa3#|PG*l2m+E z!}&S%{kId&qD<5PPv*NEJ|{_UeMw=2QrV3?&JGs_71rE!bBI?5Dyw=ELSv%D>0H`Z zUs432QSwe!LrjcOh~)2=CImA@^n9gp2k~JY)eV{s8SS7OSH-dRXP;p*DXkb zn{oSYhSAL6q@f?kbKSPJP2wC2Urux_Jh!V)$!B0gjpE!?;)YGe+xXwa(&(%&yKD+x z99#CvtC2?CC&t&XOK~XJPIi&b_v|wVX;=WHi2Bv}+Vf{Nk3sD^5G>sfgE`KJj91+0a8>z{lCSNmn2D3L#+5 z5{O8;Nk!5dJb~Q9pppG4UaCE-`HqK)Ujo+>ZyC^Ry48qxo`#akgNY`l6Vf&sUfO#p zY<3>2qk3cUo>^qOFyg4|^>=WR;<`;LH=abPy1S-%gy2o2w-2>F?Z;4FBDZtP*xO#| z9K1^m4XRu589jE%PYY9}3!%(IX0v-yg2+h-P5vYfT~hHo$KSU}GMqMIj~j-h)yd6l z5#GWboVzcuyaqN3MD*Pvw>x*Bv@K{+L>rOxvA~$ZMId9SbwohkY3v6AE_r`rXD?Iz zu)cF6)^}H2>4yX+rW@@{WO)r0O<$%YiwKgVC#rnPF@sZ z5l197XN7E3H1DbXN|K`SQk;dGe}Y9llFJ&Jqwk5M!j?`@k`Wzm5?Qy+um=DvJ z{o_i^!%bSevqC=zeb&h&DQ@(zboj|C|8``HrIfX)imPD`E&EvdseOF|o+y3=BHv?e z8b!DVJ#%CXNQ{5Th}TaX?B zo({>A`8lTrdS##hLiQuLuUe5(^%F)Ziyr%ZckZ*%IVr+_)wGM5Fk@n;8ECF*4A_9N zd2W6KXwsXEy~G5(@TdU_`kfdNRU$#2RoUSH6u{fVssQM9jLT`>0zI|M%vFx z1sx$a*7W5{iHKyRs4t;Mn*4r4%%9K+0Wa>^c$5MAm!v4wrVPpj#*|%j=8>#r{weD- zKXEM(35ySuRScjVH=;^%)z!d&8~4;N+{abK<}o?CAn=Xh8*NilK?~$+mW4h2Z1Z2M zfbc9mGB0|~ZxBm_?^(G(oqyqXb!6;EA2|CBM0k$fWjgtB^`!#} zN0lTUxDNNBya*K}ifzLUUm6u8>T1KCt8yj->Ill^3ufVh0d-EX-xr)>kq8>pAnUQs z??9q|{6ssup(dbrFJ?b>Fjhz%8R6b4`OMoMkwqVLtv{>D#~{LlAkrQ~|1AI{ysAa7 z9%Dw33cxx**DHab!wLc9{VDS6lyy8w>9Vf2< zm>uPEvijnoad_h`fR0YiW6G|aP&~MY5dXU0Iowfijyt%c2+Rtv7>&)^v*URxJ|%Hu>!YGgbEv;C*Ajjm z;@r-~q8nGlI~gyc+`~ak>izviZfZoN&!(h1VoR~V2C27dm{39=v3Ib2jD7z}fm?-OJ7(eAI@; z?Q^iOfj>vuil&|DO1_3!6LIFmC3DJ>HOfwc$gsJKUiXduJA(9w>bd-=Kh@{Vg%8ze zdD9QD4?RoOgD9Ivkg=k1%uOev;}6b2RLGqQ*E!tsFPqh0TyxKw(MG*{Wi8Vho!-2& zmc3fq={5TH^6Ja+GrM$ux?tNt^kVWs%Ch$zUE8FOpN~#X3(Vku=ZS!k0b)4qj%6)O zz-Ps~Eha_b-(Lp|X~m*R;CMYrWy{-Clq3mZpZugTI=TCp{Z(x^r+g~&WD%noyIQ$~ zOCPeDCBEWzer;^R)F=9!)L+s9W4tfANhAeD$q1i!6xLMGg6bubI%j?G)5&z*U(WYg zB|1A7d?=dvq;cD*6jK6rb0lE1QM+EEGnoxGGzS+CZ4>w!f2OR)fec}Ll)fTNKo#m{ zMQ_g)0?Gy+)BY@3A=%2@nzk+GACuUrPO?ugZEjuM1lE!_xwGq7m>ee!ZaoJ@MYKka z9@MqgL?kex;$((6K4@td`jIc_7{X1OE%!FL)!nlll1o~@l($WO%HcXo%`{^XQo>%9 zXEvH$-3(?#-N<7{K!^o7p4RTJak36w2^+6WSYS-UC7nz-B~aST_Edt9t5e#-c)O|O z$6DTHs4aea>J7C+xp7D6@Gt7OogJampQJLERv3%ZhVK#+G6$SJlStE3nXiDEGm5a{ z@NUQsFRy0;X*yX{OX^=rJREzM3OO2Z6NwGI`f+$s?aZgHnd1_kKU$;MqMlXt9EdSh z%KJkvSwP&JqDzS+{pJ}Xf}P7gX}0o&8-&VmW3T>}3dhld6aiNnjds7O3X=+xrz7q+ z!zb>eiQ7?T;u3h$l{);sd6v9{=*`XnTA+mPi&7eOAFS#3adsYywkH zFmXt zR1O)g`hJPdQe7zbhHAtP+GFq6u|$DVFV;bO1#21yho6;VZmLwbp3t6P`@#ZQLa}E; z-N$-!m`=yX$;aIMHc`qRBJ@N{+!q=QqOrSCX2(Is!7LA%MG&!NFv~u6^m|1&^^#Lt zuAkF1s3L};pR~-VFy9!4eqRtI@|aYCg46B!8vp7gEtCe^!wFMo5%ow-t7Q5)&xOt* zuu^r)S&r3wJ{M7@`h1YgMLQy4%unlV6;Il|DpfgSiIY%C43rI9ks8PB~K1OHl zo7pmvGjJ=tA+riXeKQZLG`j7}VX|=umW3+(czyfmO`fX^7xze8Wzi(E`2tfudF$fG zmk-@d5W1|5SZHqePgeA+dzraB6IU3+!Z?C)m4ERct*}93=b~+OV&NjphJj~XIrD0J z?uQJ^JJKzCWlcCGd(I@~Mzl&_E< zd8GDJy-VA{t%_T7dA1(K+H6vWtQ_Tgh$EJ_Uv2XVg`!R=zMCy?-o3~x`JUuwwTDGh;@lnJ#qkxQF(07vsW3JF?k4-4b>v5(Q%7+g9K9-l8Ho zS3mFyCqgfy$t>BtMVSrlZu65-4y-OZLPIs!e|cTG7G?WC8=Tr>RTadVJi3-WEvh}qld?>{+& zMMqm&WEy6yT|Jz;*#F+DIMML*0p28HZ%jw4#UrC>Z@ZpVwSC5vw0-Kw5`0YOE-jY)<<&~sM-xU#+f=ok(BYbYnksI$rme#VXZmAOfB0$G&P#kh3HEN?c+#! z=-bH>iQ5E8njFhK1FfC25T-}N@-FyrpS-p>JwyyGFf#u6;A|P5k7kiZB!hqI(4p6s zhS>HXy9xeW&MHOh^@u<%)juI!0F0M9+&bc%(9pP)(H5%+d9)eK@CwG1)dmmdi!BI16I- zO14mb3=w!k>EWdjJ>cMs48_>!Q6>T}$n&{pg9q|2;fPHi@qFf)UF2+Du16R@lM!p} zR>?&@4OewV5N5RADu%z5lVC9WcRViMlMtL;!eCHMDp{PQI3+9c7G9`vIh98LHC38# zHee&-W-cIRZ1)#Ewz6M~SZ-R(n&RHrPmIre#~3mvOvKLIcVm=iOoilV@2&|s>Y0%! z4KWTrL`fbqZEABc2iy^I@5J6gcGaVlI7XCw19TE1OuwwF3{piwYUb(0;-$Xa4cF+$ z8YC%Ls^|-P;5jQR>bQU54offl)HIqAhS6sG8V}V7HkusqwTMfbIV7p&e0~suh<=qEcf!QHt<%Zdux?*fANp<3cmws3+2!qZNdtYf+@Tbt(OxMFt z9`8}b*KY5}in)01Acn#*^C1Xv+ncSXBBXUJPI&||#obdRSKD=uh7 zrASwdg~mv`VH;#@CS(=DY8K;b%Y{kxreBjFo{7HXME*V#h2aE~8V15M|94wAi}4h? zRxb7FoCLA$8SfU1pqkkH2_+35PUj$2_L#w9@se-%SA5 zv;l7mf|x-9Bo)#|PV9@r9h;EqB=4ai2-{s=ez)djX~JO3egnok{gvHlelr=H z{;y#lBGliFxWZ+UHT2KZd?WfU=-54e&QZ?ngF2MFY4-6=h*OYBh@KXXvc{=D7p6YC{_v%7=tdQ! ze{|jQM&B6u6W(3)SG$RK?YuX+jXJj-MzWP{$!Ud0x6Y>e)V)Kiw~xRX$|IPR0p*}; zX9xpnD(s932Fn#4xfB}y#)N;1HDUiQdV*!N=X(r@*8?A<|4yv*j#bl_(lAcN?uQ0k zTGYlW+8vxF=z2RKHt6|l>q75_#@N$0LVuGGWvQ$f_bFiq&MMP%Bg3x2cC;^Z1K1W- zz$B9|Z_c%Tqp-P4{%M(JFh}LrK}_2h32GtSlyOKmt^Bt^5l?hfvliwHa_xo_Ze9w` zsD|?2KmLJUo07Cb|G1=$sky=bUoJx{kv1VYduCN(DJfA!KyY40n_r2ZaO!H2&FlH5M z=CGtxC%O`K;Dm2UO}GQj$2=PR-=@Ri(ON5XhwPfUmOLAROL}^c;iaNkbR49*+;4|@ z1hlCqJ&}IpgR|}Ja#nbV6Q)vwiIz0}&4rCoZE1ERm|okPmd~GA&dkf(S}A!2-Wosb zFKgRsiBd4i8r85zK5V((YMN!I%vAYLhylr;YgyG?RWTm1U*#;o=2lGwk`t>zWYWs7 zAdfGV`c|ZKAKPto=uh>nm}Y8LI8dT%`QPL?B>i&HNqddhqj)9_tjpu+#Qlaw-D7{9 zlT~Y|=5n_+eA;D_XKxV-#YE2B8QNUgY9m((Ud(9IKkJjyc4yI8E!M{qCT3qd*6_*$ z(0b9fupL`EDq0;Ie-!jJq!eTLY_Fr-9Q_ESt5?e`wvG1TNH- zfm52vv)8HxO^5W%xXo{Eg4cF1Ex_W(7ca>UyG;xJ0pqP1G<%O;?BC29lRt23ph0V8 z9fCzQF`dULT~8h8D=E+j4|6vtMJkEW1C0|TeR@+rdG65EOG~McV{g^t<4P*oqR&3F z9TeN=G);JduOR4LZ710LLD4Xbf}UBV{VU_R+U2%4L+}%{g-tCPXF_C`?YXDCH_>V{ z$>h3xXenFfA2{6Y3fgZ{4KGoImM8b1#`&Roi6qsFD^{iW{p zH}~-Kxd_8W=gDKa@U#-|bX?WZS&p%J-f1V^EnT>y&Mo(Hid+O*ciGug_02tFgc~f# z!dK^>fyK5052lK+h~8-@#fdlUfk@vCx}rBfT57l4!1#9tY)2%fCpyey{B%~8L>W}Pf;(9M}aEP}fl zl>ww^f*eT7rZFc6%lf6#~i!G3xmH~mFE`kVdq7wHM405^36>H#0fPOpIm z2vU@-GlJa22?Qy@OyR~7wr)T@4n&;*7cY>9kJsSe_)!o1roZ`22pSet9p(ho10jfD z2n#SbCj(5741ya46cz@_fPtJ3oxDbHoVb9T54^BHL@SWAo0F`%(*q?;Sooo5gmo)h z2X#AR=pO{+_*zy1$n&@Qzo=Oc(1U3`&?`wv31df?DUkO;%iwRa*Ms!;=ZZhDt85;O zfTUn9a477dQ;;%(zy~* zAIagg%f+#?aAG`>?}`P`@k(jfNvXvZp@W&%{qiN_YUm<@k<@DTACpVX+S?I5O@3wi zfqbpa{b(#>jpoY7GP`|Z#r(*7{v#mvJ`klxQ5(Y#J6D_N>rP)-bfh^14GH@gfNTX= zx2vm@Ai0)#l=jGLG0G~9@S3#!BQEgfj%7wHsvGb-JF!KFjuW7o8nttJy`)c0I>axc zmC4YgGL+iLtu)RdKTf9;0M<-!JZ07Mx=e%N37mn0gvm!7$G^djHE#MADXrR-pkeB_on>FNK%De#P_+|EOM|D#xHfTIEIDl=#9k4DpWOO_Y(jJ{4 z9Kq`wAJt_ipf3(1XVg~@iB2Gl{5mnAv=iAk%7M!l{p=g(Vj8&)?5hR z{tO+1fB+G-9nQGk_S7G}3}1ix#%lhee0f=40V&(^IcWFjHXGMh(#UJ#b;rulma*Ge z0)M;{r=%gDjhRQ0)ThDmZ+17>+u5PlG-$t^sY&qf={v$eiPBOdMT$Vh4Pa)$2V!`dzL-4n#_E?vz8Q<0N=toz zHxcm5TdP^|*B}YKZwypc3fxSfHvS|1UiT3<5Fq^$dGF=ZU1Y+p$BkY1%>cZ0G9J0iK4-lpjROcLg?m%8UGADoNIyXNWyZ;E}@iNkD4%{ri z=f{|%FP{Vj4?ZD&9qI&>9Hgub){cF;iHstNuS%JS&LAlv_lWrAe$ML>%CeBXT(e!h zU1leWj{qFWrW`d3f?Hw_v}YU`Mcum9_*N)&5EXv(m>|;bp<^&V4SPUSSCSpF*OS8_ zmEM$fd~HlI|6+WCo0gd{3ZpVePJ+dlEjNBrhF_ss!A+c=;37=zEPB!_0Vr(4FP^Af7SqXWUvA zhclFune)^0r!<Wz&NB7aAFr2f6Rd+9mITE(>tq3@FLw)Jg6Xj6*{(dlTZuOH9u> zi=23ze>yXKy!aUSk;|jsL(L=DqqD`WB$(+eWz?xjR9OHa8JRO!2y!L!LX44RdF(T+NScz-J+G7cuw~kMN zvz+rOICP9^6lJ@0ER=1MM43b)5;gLvBEd{TTCWu>Mt7PmkPVBil{H(3T)R)tx^A!O z*23EMz0Hb+_dtjVp@q7|4D_u0LmJycWB=u6{Zsw5mhv5!32z&7n}Mb;P}iBPw=^c5 z-5m?R0t52{2Xn`ar>D9&r)uU;P$T;>q1I;aT_uFv1wOb4ZlBLDb?f}_TD&18EmBBV zh#bjx??1afqu7aV*=upo-!~|92)EP^9M`la_?DXHT!$xCZG+dI`_}u${^tGt z6iF9p*>Bsg^yNTTlE1fjK@N)eoH$;VA5~$Aor># zsy8{vtR*+WW6B?|)ftqGHx!p8(N(p{{LrM1#r>#iR(O{Ag1()>3PVtX8O1 z&?VPjGJvTzH0*gh%U{SvYbkktFgqyvywK|2TJ_^(aYnQFjh60}O%>#5%%k&m95p{A z!aLPl;EtMChVRmM?Dk{JtNhaX@4Y@>eZKpLA$yO-jE%9Sv%x;tl@~alLVCJK*4%t& zR{E{VZHt>ys2_#Whf@1yTpwKYiidi>tB$GnHorEoMr+6~Al%bm6_`t!W;lr1rxvAf zt8W@8EZ*=OIl7{m5z%zi-*Q{R=n2mw%vRpc&J;~@-uwQ2a|YV3;Bk~0F5>tB)Mk5Z zHMw-Q<2CXA`_&II_tlI08mw{B7(-KoA0IdEN*%j=ayHQJXarHIP=!J%LqCFq3=YY& z_`JSF+$GIuwO3`elGq(g58lUp-B(+%U6pI5Z69jA+10%Lu+!vdGqJ4WNb23RnKj)W zahHDe)Q9_LjlmvJM08I?@EFn^CQVA+5Mi?8R^WyV1=h*G{*4FbA;^&oBM+`QN2z4{AYTQY-TjZKTi~BzDCR%z97x+B z=>2dT#HE9Wu=QV(7TiYM#u5HEfC-XYYcUM2z8-w2dJ~PnH9_p2y%j3nIa%-W2l3z z4G?7F0DDkGv^0i75qB3bL8fqoDMz>?5X5d{>x3X#06`9a1_?zZ0ajMV|At#Q+n5_W zI9pp8J39eEw&u1rFiV6r<3Bw+K1A*Y|90u(PuTx=ApX~_i$6&LNZr}Q>CYvM2N02M zC>vWNhJ;8I{>;l?I&v^87nlOR=AR!1y*U%v_Fmxzg(hbrMLkTm)(B0DAEe+Bj9g1{HBP|Wm-QVCF z_ulvZ|KD2Qto3l_?0ybwoxS&&-=SBLlxE{(=f`iPf zEWr){Wk08qum9O3}rLI|h<{!4-HeTlI$EDiQi|o5gMt8>tYt!%9A5M0R?$4?X?hje?z1Y0(PgyE=mfHGPa=PUE z=OhM`dctD!^MZ+icgK@FB9p;of{lkCU5hY{`uawwg@15gO0(a#h)e`ltbKaEeQ zatIRHoRiq*B*v<5SoPpf`$aXd4+$wFJ1xTx6MSOqy!lzK+})Yx*2n~|%7g=WucVc{ zq8Z<5V?QY*3IRB7NPXXuqDcrQCPdFALa)8ip$j*p8$E84C2p0KodfIAg=ga7wk8z> zjE4o(dKxtiY)d`0F%lfymJVn&X(c0TAb1wW9787{9U!MXoH)vsMG~_0O~0^cr`pt-uYTm&?%5`_eSxRcwQDCc zowFIGenh*t=!pKtXj(N2JOLAw@La$dbbmN_zkr345Ka;#2^=nQ4!^oFCj> zZx$X}+m;`e+FEb-!+MNbuDZPM_s@2{TRrY}$;_uZai9HJ@6DJ0t?hl1*8UJ&kyj$( zbwTBQdmcRC3wo4onr(FIp zo!A#NRAxJY5(UvtN@2V-jnbECcnPJjnWZJ_7G1k{}f)C+%JDPvvO! zNHH9DT2izL-LDjS?Y|u*kWEj#o?CDKtW=O6ywp8P)wRHLOl*|_MeV@`T|C!U|nZ?*2N)NnZG%LuMTEhl2{B&GwJ&4AB6 zgxPj3Gonp5`K;4z~iGNk!8d1R3KR_XS9SIf=P0Fd@`NEc;UnH)z3C$DLQ(CeneD^DqF2r`#_9PZ9r>Y#JH^4pD ziGnQ37t$AUbI);Hb9K?K8}0ivrhc*%W{i1``f8q0A8_wPmySzBN2o*w>X?c%8mKi# z-eu$1+(e6PG)r!qvo{9_usIr&V;A1!{0W0@V_GF2JH{e;dLygo+8>!7iUs79gS`1c}sxP5WOZ^O_7;Mxl zzf-F;HEN=jqQZY^+yUljhIJi_lufn6FHL%KQjF=-#PB%hmY--<8x?e;BUi>UnwEFR z4;yO=7~+5V+y-K=7{>8<$AH-^ZjjBU{Qo3UmtAg*8OXFzm<~YwE zxAsR9Yj{m_?Uzb+JRqIo3)ZqDv21VkAX<(^6slIWJo1j>m~uhH=0rR{h?rH+v_fS+t)F}pZ(I(rZ>31el9c_7+5z> zOHWXLKk&wJFw$%3_u0!g?_Jx~nsAyVGq%|eYTNqM-G3M9>iwv~Oqg5#%EqIIi6*kG z7|1iwdXB;ew?3SadiGl3;&x!kW47J(W_9S=ycEErE&9;reU;XJZKcWe@-$c+4_}#q z4@qI_lzU2_LhNmnVow?$~SF z>y@%ht?zohC=#6zSx{##_{T9j)yHpr+xZOXbmoS?Y+c-%$AiFuHH$L`+`nB+w$oz| z>VCVd&^AeLV{=6ku!E|iR)0mtox`i^{#1`-MsQB!DFLnAbw8HA>E4$5UBnm`KCVs^ z3nMVhfOr-+AB=6?VnpYN;=;$no<+`t$#ritKl$?hFV|8i z#S|etdY<2d81nW?5lpG4g=>GOr*wSu;f4w}g;yM#JroQn0dX@4iW?0E+N?U>Hu1%L zNkQ1>zgX0vK#FtcFQD!7F@AI-UC8&nd^_sK>V8BPCGqrvBsAtu+wKoBp4}6|4tUdx zm#PNl7hz6o!gCPl0sr!#kWJ2u0v^B+*Xv%$Bc^Iz-^GShXZFDjyr??80-Anh&Ai32ndoA+(|5}qV~S&T z>Z9~u*vQA>$dZeEng%O>=$sQ+wMo^5OwwYdH}5(k4q^n&YIBc|i$oV~eS8dkr^qdm zi{Wz%(rOxWF;Urx#9M+6Invp)>bH^PhhsF<^({sH<%C$hWT%%6N7X%{E%F-fBVWm) z%`J506|A^eK6tVItLkIIWSo!ZBXt~QVYxGGbjkte$Bb>g-IBI3qov_<`|*AtnqKCEuAxGMK#{>LXvqh^#t=u zDWsar2ZgfzH35W1wdk%ua48+Wp5B0Bn6N$OBLz4}tpD9libU*0&!kPzQKUZvy8fC^ zq$#(dW83bXSFXl@O-jBH6Z_6UKf&_L?^>VNmWPEM_fei$!B737+AuA4agdU+@OTSj zu{xYAi&3jM*21&XLyGhUn)8{dHX7bXb(dj9b zV~9^5A+7!uw46O9h(1jFDv=<;l)I;b#D$2wBf=hqgY}oBe_=^frKIHm2tlRtC~mr( zAq5<9;5|3i;DF3RJ2{^qfix6pM?J|#KqOut{Drm^>-lF$g zHwdwX4A_PEH+^BwIi=zJZmaMiq|*)^u&WmQGwjFnI?fPfqK`F6$7=FJkwnF(N3Uku z#?`=qNzo}ntUQL7%X`;K@(~1vPmKuWhYSb$;_aW$q(`tlrW_1cU97@oqTE8sN$_$s zBB?}gmUj~t5}}hL6OH?hhe--3{hhu+Rr=fgd2)W`ZeJmTRy4ScqZ=AB-5*|qu`lBbae0BavaLOJT z`N%pouDWr>)i3kiKB4Azv$pDwfHyXoykbjf+eh%#1{zPb$HNlr-qz%!x32+=r+j|2 zm3K{P_-w>5LR})Q&?|>0&QXx^}|*S zZk1Okrxmg_U?m?AM<+?Dt%=%8z74m7Oh8T#v^0Q^NU#!6@ z7z*cjTa9f!>0UW*r|xZ(0%AX2Q;eXO^8di{nd0-f=NQ_7#N*zrTC@^m5YrvdR2tSY<~gQ` z{3^&J;|XRR=);R0uAL7o-8q5_jAex1m|@(z{KOf7+u|7}Z>rhlPP8fDliAtSM&{8f za~f=~N7iS)cIdF?-sCw?hG1QJUT4)OYw{&b7X7R&_d~c-JUEeP8@lm~^XiVkW#kmh zF~9aET|mp&3YrRT8UK>2QMdnVhBv(PZI9C^Z=!@3$y|^ZuN3c`YH3NE^W?#c>rv5E zYVX00REE3xQ-RZ97zkTH5TUP-w0*3U8!2&3(!~6XFPkDb92u^P$fmTJ$Rm!A9hXx` z$C5FGut|+5AJHzAtx_8mdhL24H7_+(ONhL(AQr@K)`spV?l_NwO)=!KjYo?HjCJ&B zp0FMfUE{p$(6}ID@cF6V%q8f96g2G{Wd%YiO-}lBh855N%KWfF^fXy|c@)hTNvngOZB#1v)>yj>2`}5B>=0nM*DYC(OF3~DO+!PU=K%^E0RBpnP7_lnM?u9jENjdvzLqNbuo6rG#MKC4s^ z!C|fnMmI+D&T&S1GWe@*;e$S>s9)Czk^+HDU;HI1Vy}v5|T+4*dQ6dniSqSMeMvr*5leUf~1vpk?d5(7n*bz#p%Gq_p-{Z((b#zIia{i2Fx zNNp&P5z1&((osLLQRtdzYA6asi#5Rk|1b#@E%Eu$*KyB5AkTvw{%CORV4MZ!BZ-VP zXY!61R{ED@UV=M)UwVo2(aF@kt6{-zV} zX_Srz%PMSjCX>=H_>XVgUQ`8n!cmTM8eE!g43vMQp&1d7G9s-O)Zur8!WfDb{-zfC z?A>iL&5zrr`bQCe1Xe?n91xHDaKqSpArkFWH=7r$q2o{mL=K5~oomKbj>sXa{VuU2 z+K3#|O<;YF*q)9s-kR!Hg&S`B7bNH&Y$~`jfU8X3gt_Wxj#$f+5{?LrL=Mq_do``> zk@r)2jL8}gSG@2T5XX~IdUesn$B3iIUo+pbY#!)HhN6*wOQ8?FcNb#~CwyV&Eyfg{ zurPp0V^Yhib}>)&91Ov&zE>hw&@H%>6^2}#3}|axV?NkydYBH^EidwxVYZ~_se`J& z)$!|_M^(<3Dwz7isO#e@j%xg2E46X3s;c37I@36qUBz(yE7Q0N&N6=(dN1nIZ_SK@ z?H&druqxV8Y?NYKcM5h}K4qH6Sr&zdyfE(-DV5-5qLcM8&$+`P+xE=2prs2)$nHB#v3u#**4N{CT)-jCA1kF!LGWM@(+5o3LZz8_N`u9mHM{O z@fXX-agN|2(e{tVb!5CjDhx3?(Z=OJstL`_6rb*?55-25daY=lRk22FYAQlF4WmPfc&S7tPO^o12r>v2f0zr?FUo zE>uwcb3E#cgktOErsNbOx+B3VyW&mHBKQ_^Bo*<4y$abgJE4uh2rqZlA?%L#LG6dx z1!w|h(Ag__$`n0pt?gMOygQwgwE#y`!;#O8Ex6Ie_2SKzHP3|-%zDd=S15buX@~=j zKAS_C)86SolO%dXs8VULQuM?p1=)aMEdz_ENf8YLhO{;$>NE}|he>xIQuJyK%+Nwp zaMsj?9popRT&EJm=lU!xH8vF^Lm=L&b?8~&P0b>-sNQYOb^8N{1(VsuD1c%I2QK$* zB16OluMhfCOS*dB_FBC?Wza9=%Qu65OQz%+N{;c24&Df~H=#A(NK7tvJmCgQpNsJ} zU!_#W$ZrrT&=h1=DX<}qyr|rcsE(z9DSDsD!ONHXDeuGg;}$hTIM*T!LG{Z+5~-Hm z2nuuuS7>eSG`en(E&blIfP~A<6#9)B3Z`H58R zBIC%D%C%TzTbvSN-|*Jnqd`?oqO4?9wIyTcd`i+kQPfT$l|+^>za}dBb%~ja;m@$~ zA+yp`G(o}iRvZaQgI;!2d|xHO|Ne7QXSfjBJJVp6ZUr@>@98NKCRsY9w0$#?+Eqrj zV&wF!9?w(~6sSazd=g!z)3$iC`?=X`Npp;fqe%pIQ%t`cC4sq$Sc+C{lTbu8`)>oq zgjAz&zT{#RVZ8t7RAeQmXXRK6z8%9VseW76h7Y;Arvr0xa!` zOeyBEoaegff5lYeqS;ge8BWL{sidc9JxhP>C6|h67k$48;)n~MttS5~XBMS{@M_tk z0P3D^b`(!Il8NPHKPwGSo~K+z(Cl;(ik`!!LB zCu5c(yP3_=yoqc`LmpvG!3*f6WJp*4k)3N-f%}8|c<382dlgrYId`%PWx2~cX2Of# zxt?ssu1&u+#j7@E0r806oi^oY6`$m4hm8Hv12W{CvOHORU_mq#xIFF$(Hw3GqkL(2 zCyFA9->ZmpbXYGEH$J=QHdSSlkQyaIvCRE%L`YWyqYnKlx^-5FFNi#NyD<-rLJqoh zdiN`9bY3lSjOib}`R-hDjnnOh(-s>S)v(X4NFdGb@nVO}BkYLGgP|(30E?FBZHp-3 z>)2S#_OFu@AJ#sYy*F|x_}#RiVUp6Y?D$io>Z0?a#QQdDx;o3K?Q%9PtF_VK?$hZH z{y-`6&?WT+WoL~fVml^SU3+0uR=~W@0Y#mb8#$Wbokzoz(63MxcYzu-Gk0n%kEK~x zK1R3x!9QflGZ3xAaf+1BPnGU`8A;oStX$-9bf*eCN>`hC?oKsgKaUh}1}ne5-74&E z@jkhx88{5f(!WCdRN(A@<;`w$x3HUKkgb_CK0~zB;(DX~-TUMutK92u%)3=cbE0sn zVw|G-O^#7dnzZ})cRo+|tBoP5HMYDv@B8~bXV`sb*|YZNdV|WFQP&Fn&Zbd*K`Tsd z-e#G|ZM-I@TJk|KhIG~6 z)1lm~1>>1o`rRqhCQ(kJaTcd8(Sh3M#PVA<)ccbaIHi@_d>i0JWx1IaDQt4Xr<^aA zhUcp3bGVYr=3?cE7Z^Q@+NTk}7)&!?(&NoLgfbLKo8d7r#qY$PsRywV zrctg0lxw_ry})RL{yNA*7eul0yR~KyZa;k4W&m$ezOGb|VfP+Xkb$gLz3H~rwOFgh z;E6`m?oNE9#{&k%rcIuzWqpHhQ*>()=PvAGcVY&MmWr|+TS>$v46dTzpD^ji@B2rJ zd7>I1w#*bHhR(01ot#a|f*#94oYTh7KW;TGyf@@WFP z$t40S2)WC2%P<=`QnP_lPPxTg zD#0A-$#wGqRm8b`Zj)if0H?K8Y;~LkIeMA3082Yg8)FAT$E)i2Cr$;E1VIQ;X8q#Z zlGEM^L;&@!yCav%_Ge-0fJ74b5cU%5o_E2{#x;=EN;o9L%}#8sdB&a_aR_0<@Z!c8 zild#Q6B@ax!KBr%$)vR-KoAO9vLy(mpF{kKyNaJ!btMQzZ^GzOb85iovT`H{br5+U z_+gvh@Qe7-F_lV=%T6dxsR}FI23LuSu$Z(kr!qLR#_{qd&=h?(F z!X@=2+fDVP-lnt zqi1C3LF5cmZ3_$IR`Em5@smGZ)>sR9zB<#bz2TOlew$SB zc|G8AqCft9Ls(m>!d1=3Pi=Q%51kWp$_#;*3Z!uqee#cAg{E>8h$b9q|ScDlL_^iIEbR05yL(OYCk#hW8 zY0_3Rn}0+W?UY+k6?*b;vOJ;KzP43WlqT?pB;{;enLT|oDf_&0?7`}aWudx9bx{ro z`!4T_$;6h1>Y{gB6ib_4Yub9;Xqy>yf#E>V#tK0vST3w3n?M%%$ z`Y!V2sM=)dx4_*&xA!>-@19GKin_KOnwV0Vs+vm@@!F%$W665rnQuHnG~jH;@ZO3i zc_}N4$qc8MAL&c7^1YT}qmc0q!z5=n^rk#o%S>HlL{+C$f9z7@lhdHOsVi@InvU_i)|;OHUcm2CZi66qfir$W4* zeGQk3wVv2UA-hyPQz0kD*(Bi+kP{5efI(S+X><+ZgYg)Sk60eraA} zg%;vd+TQ0GJ=kcIM?zF$_bOX0mKJY7Ue+>`=PpXBo6zy*89wsYFf=#dL33P!^S;_5 zU%ExJKjr07@ov)}rJuZ*t5652!55pxuB|U$;h<7AMlo-Y)XEM*yqet(W$zaV!-`$c#{ z(9ynaF7Lz2p4)5R)yzPj-TiphXC8;+W7u;x-1G>+s7%l|XGY}|j!Vn^iLZ0UAkdxf z0+hp`IJb784{_qz`5EyA6+aoO=!ER&n8(^2cO=8l*U}n^vpdepI(KGCm~fZ=cIaIr z;(1f3T;ua`x=`1nZg)I=u#kU$^URWA>zX@k%*iu#a_5?R)2sIF5KX1g7v+=VgYm-F zYj3m2gEa&I>7Q7he?pNSkw*VO0{sh$^bZ6QP~Oti0iXwXL>{RD3=jwbMIbmn}iY!siw2TB%+8hj}Em2)~ zeavbdSuj8FoN7XfzV%1>uAqbNgO#K6YGt$QQ$(1#G1?QXLnLHtq-EQh8gar)>8B}A zJ!ZnKQ}8c|TAQ!|S2rxxCv`-X$MsWlromI>s?w(5$sEZb*vI0n=i9Kj#J=^7q<&yGh| z_=$u;Up*)y4nJ&VWJGZ@tZR@1n=gX)H|IMBiZ-d*k*bApb!vODn5=pAd&U0q1(3Sg>tGv80wjaoCo0W%!9&JCq|oX% zM{G|!$|es(wI9D(&0m+!&+E%S$@<6)+&Z|d04OBZ>Gk4&LP-%161~hyy~OFf z&zo&b4Ec45JDRBTG@qJO)Qb{{i3Jyc?r!>e^uQCdTL$@RDC}oSa>mVw-@T`HgTj3; zA)R+5SVj`mfTueAC-wIIQ*Ho~lqm9!DA^V=e#f(~9k>lhILpXS#h5-lvoq#g_Inlm zL=r&s%BL=yINq zO9HiEI6$J8ygE=PnrsajMFLlaJpMVogt+We0@2-UwIcG8pq(7EExj!!IB65o3yJz{ zRSUdp0yi{T4)lU=?<;YwQEH5p`JYDy5Pj=A1o2a``_*?O*dlwp+z(LhOj^d(!4UB+ z#PxZJEBHB#1DEi-w0MjQ9Xc*`X*6EmQ%lib(Uf^qjiItJg}r*OP^F|eRTSbe`>6;5 ze}7KQ(aPb?Em1{N$tZd|p*g{1g>iGcoI<@dIXw%WG%4J0jii!Tqtak@?p2aX_@(jGWo&qX4$ zsG0D?(aQs5#aT?)a$-lN`Q;nrUBu|{&ORyqu>I`Q5Tqj~EGeu^PTxkaDr-i^%j`=> zZ9~1sm_f5Z(+Y^|lQzY%7uHo+6o)coQ6t)9)od0WmS;?due4rIFu_^$Rb2``X>)6w zYUqDSuF%PGk+hRND#kPHlMN~Rq53{wBNwi*9W+)8(wvnR&=8Ptt+5X~Mp>76#oG1h zx5SESDANs-ITKhTphOv>-Z zPt+GX7cY&AS|(ctd|b2o^OL{y2##$NIY7Rj9xD*MIX@M^GsI2Ht!=SCPClAGH8yoT z-al3{CYXDskygH!lTp$taT9Q!hppM8D3e_yv6(jv4#Mb+iy1C5J>@Kb^Elo)(l?zo z`8RR7b-Ss$1-k9IU7Vs{v)<9&QCzoPtD_2{Qlp}vmZ8?aI0|CzeAQ_XbQe?+5gzd& zoR`><Fm(b2e~}boakO#_`0-l!@yJC@eNF*%TWwH=Hu0slcw7wl+V1 zcg_s^F*nlu!+kXPH~zZ%p2L#%2;Xe|q;vnsA}nx;*}K*|`hoX>?8*Bl^FA9s#iBhO z3BI0UdD$q4({dcYAwQ{aZ>Vi$jeF0dn@?VBVs1JI`i=UG?&Vy35ARG2^rVVJ$HA;X z_a-*MSAF@j-KWX8&o2?Bf~J*bfaX9lMN;mwTP{H^{^zt@qaK%@_#Rns*$3@z!_e_h z_&*$bz^l2dX}=4Ju36_BZH2$R&Ea3UrdgG84P$4W(n)W-yu|CGPp6l``x(L+!WMEM zkdux&d^+ZnUngJKV7<@2kDrMKLxaU0^aD4Q=^7al7~i~eQe4awPWhdw7IUo8L8yV5 zS$JJImEWd>sW_K)&isa<^$l`TOtPJZEN%h%4XBRm1Qc8|SQA>4H;|sRr_W1|fyV=p zm<_9L?&6*uneh1`S#1bPT}@PvJ7M4xypYYb&*Xp^H_jd_4lo{jp0peKzW-+MQ!iVu z@~bgCf3yEubF?JC)J=Q0g8luQwr<#vZ(Ts0fX|s<2spcrFTOH8AGJTOrLP>vGR7|v0>%%JLtL5L5=+7EJRO;$> zJRRSi$wp`^x|f;l75tiRc5SZsb2KxqRj8@`{sLNId@$tJem#tu8x-o9>?v?V$t%rw z?mcwf)F_u*T)W-rwc@qy`w7`|C~|0sEtL&YW?P!)cx?Rr+rW~G_xM7$bt$Z{K8f;a zFkLXEciQFNS*KXAdsJm)t*7~=fel(+ZXW)Q{-VHS!WjKto@V-d-CAx5~R6q=&q5C(?|2k@EGJ8hB*GsC17Rd0u7wq2Wt=45Al zuEv9^gV!R-URzhhr$_sR;mTQF$QJ=8H~*{Z7Wj7=it*1208&r{*4`3|xB>83w*J>l z+Y%}Ub+G&w0Rt#$X=VmNp#C3W{dxdy9-xgS)X@PTzzeiyD|dtcb_3#X+5fLX{2x~!{x$_bb;l3zzc(TtiHL4P$;1W`5~5M~yDtBb zk%d@0LEx5P6Aa+L)&=mmqW}HI#NTG(|G@#G=zm=J|8T%24zo7>zj$%6hnQh-060My z9RL0RczJlZcmQUAf73vS#^&(@fc~4t$%|;b{wGaM1i zj|b7H{ZASPh!b(ok z>XvR0#2NsAa!@lEqDB1M(5b;-aKPh5m%slN2e^qn{O@XUf&@7DG3e=~l%+BL4+A*r Ai2wiq literal 0 HcmV?d00001 diff --git a/doc/performance/Radeon_M370X/SGEMV.pdf b/doc/performance/Radeon_M370X/SGEMV.pdf new file mode 100644 index 0000000000000000000000000000000000000000..fa661249c9fe262d783eab406b639c145440d5b5 GIT binary patch literal 13701 zcmb8W1yq|sw=Nu9TU=WpSaE{86n7|6+(U3H1T9+J-3t`=;zf!UC~n1yySo&Z8~VvP z=R5y@*ScAYmCQ4H&z^T!*)n-H?K??n7Is!HbXxy4|8@US{|S93bao&c(81IiT}TKW zB;f=xfm%7(OPW9-Kzd054mNfkHcoa9c5V)Cb`Az~kdlMh|L>fJ71S1{r3RF7GI6xD z0y_g$T%e9FP?*{~CkHbZFvRJ3mXno(l@o^jwdL>(F`$cxphN7v4A)OIbi~7!2eR<{a*^ta?jFE5PRr9#X%Ad_E6Z+8OZln9i#?v zc5rb5!`9>akM*7f{&gIR5PJ)#C6J$ogAWLjwz7r7o&Y3m3p*T12-v|4^1N?nDD2Ga z(A_`Rdn?t_HhkE8d08f3SdR&ii` z+CL){Q)&0S67{(o-sD{tp%l=+eDb-=c#`%#{B!n{<@405(0G4>4{FbQdicZOH1%|^ zvF4Jmz`B|~uz5peUU8HDYqQbo(eJ@0Jb85z)j$L7mO$VCB|U_PIc7OZw5w zD~=~u-(|0&(Wi&+SVB+VkC#Or_S)ZAj~_pJt-50Di_R6#NM9*TZTZ|B-JHF@zCBxn z?RQo1>ou5Gy_aQ!r^vVIX4WY6w$t>cBEL1Z#;d+5{ zdWc6K?QyIfa2n6sq>5Xm##)P-rHT)6&?-`de;lxUI%>5WhFlW*e_SRVdLDY)vzT8J zid`*zBjo%T@#W*_ybbp}^@woi$6%R*){(opWe=3{%{j=4^;uBCoX8|ZRJ&2eD@AXW zbYd}x{oQn{qr>{q4|aS^RsL@_183e(>kmQdErffXCMyZ+Kdih?Bpw`36`!(-*0`6K zEpe2VlzZ-4+GnY_JsIv2icqpXlL{9;%rS&Gxt&T@i2NvVx4Yui7yllkeu{JK&?tFG zzW=y!^GK&Mxkr6GGs3^Pyo_7#DeRJ_-;3}f#8uQKO=#fZMauY*x9?d^*8NwHKDr-{ z%!t>-@6YQ`51$^(p62Gd@?%FIGaT7!>q z^ADE|PbZ^K=Vyw%WUNLH^QY%APupKUKFnu>h>s4;kF5vn1X6ClTq~{%40D(_fd#bY zxqlTI?YNjWPlumv4jTr{c8l;BmT3Jqh##P!fb9K}VfWnAObqs%?x7r&|A3Re>NrgdnB(`VYHbN$SHyJ$u_ z%dQ27IkEYswL8>s0Otb8B}VU0>d1zP65KBLV04^ViqZ%!?Z&x?;o>*NN80lmPTQj= zE{E$P3&Qv#m9GB*F^cXYOX1L6pu_P~8Fu_->09A(Fh;%1k9(f++N#BIMfk}Vb~=s$ z@LijvJEhbnjVq|P7lKp*>9}m4uSG0z^^4&DAckss`q;-vj{d@CA5#qdEz&lx91s)p z#@jy_nyxspa(^#5VR1`5DU_`9)vjaGnET1;=3L#T!}o%^rMigRorh}6$?wt1X8$L}jI$jK{tX2cLQa>TOU$Or6qc+#YLHwupzyo}L;DO?sP z2OdSMgdPmpU{_-`yz;zWJ+*cwvs~|A6g~HbW=)lOHyVuZ{uO>JxGr#E$2}?b5F=1H z7cx&aG1Ij^B~2DKmuAQz!kAtqbWS&wHybj0?WhONSWg*tc_#=))6Q#|54>U<7Fqau z#;WD@$1S5c)2c^xr`pOrPvC?+eKur1NR;?_Gj9d49-(w|)Z-t&j2j$+XD5xU&GpuX zw**MrX}lH|#_<*bQ7zUK_FB91Ol_VcuOFtzKhOLfJd41wuuiBeu%)N88v&;CDBGM+FyjGNwA9)gH10u$*YI`?iLy5oOE<`9h$%I1}81K$56 zJ)d>LDmb$-GVWOdd*IF8BWkkbrU>iV|5%aily3>~coEK1wU@LloBaJP2;-&Zns9Q% z7yc1rq(Bg+X8e$LN3JpLW7h+^q@eb2Ao*R(FNzDeqALHSYpg324R2&7_-J}#TC_Os zFHG8DPmb;I9mtwOqqtRTiD&|X4TYbQZeel`qevnMGv4256mm?y^pceaFQwd`!c|z{ zoWRj**kj3k$8YwEeyO=9nw-7m&?$ zTJ=GSdr{6<@4R{$i-;q#fKOIF>D$SigM zWmlt_CX7hi#rxaljD-Xlg&`B|Z8>}5J21Pr@yS4^Ev6JjFbk6mp4o_i9)ESgLP+7P z+%!WfvC0uy#d(e90eF^sw@z|>IGOHdEVa=(TA=!i8_G(qNo@Ofmx5xv7k}_PE97u1 zY>cY8dI2%2`6p`F7(2`N6TH3IOAlUdOFB%zlkx8Jt6tuxi>u|Mn;`x3WMk>xypb1x zJcqfo=nRIw6iUehO~Q4`OKJWVBC&v>H2i8*_^>va&{nBLV#s1Kj`cB41phc{kuo!U z!l1DD0ZZ;rpHhiKVbfD4($gE%Q0SA#uE}m~hybOk1 z$O~NSYzx9NeJpp!_w6^L&&6f84mOnXs;`HR7Nk>KPXQM%juB&K^#V6^gv9M)IxgI{ z-G`Q_*zT5yF=`u7NrPOYdkgZ8d4|`7=t>@Z9PxWLIt>=+L<0tcWT2FGzrq0>wOcu1w z$HKCjuY&5rSG)#2inqyoP3^nZBgfBUGHLk@a-KGti@S`krh8rrmqKIERPN>YXHkq* z@0#t$LL1AQHX@M8SCjw-CpvLF;d9bOrww)07BCBf zKoRAj2I?DhT!JAJ)Nmp>DM#e>G&(goHB`C%*!P}jEsy!31BxiD(@-r{Fe!;IgBO8k zvVNoF%kZ@%PP3If4lopc{S(RZm@reFqU0mSi>NY3@$lqxu6FC-HG&K69Bx62A?r1( zSA@wP^x+nTrpQ;C##{+hio>$VI0$lREwa&Z>;k|t^>wN^#?};hg+ChA;RT_}gLmBs zsf!Hp1PPB+By>M4f7ZyqNGlK~6~_1)A0Wx)NqCF0r@^t01dqB4qIIFtENpte?#0(z zV>|3r;a=~NxfFse7(}swhQ_?*{aa*#TwJ`|y}l`0Cum_;kxj&(!T6|1!KeFxoHpD1 z)I9H{PhPyy`i84mO%5ztEztz~S#mM3Lu52i875$ig$(hdV$;)mGRah_@Xsx_%7FJjfbV^!8(2!X# z_znx0LA(N+@ema-l!Izy&sn2Ue=xInpZZR$B(dY51C zEi2@?DibqPRb8gpl7~&g;9ozP4DF}`$Pub~cY&DjluoYEZZCnGS+sFR^YCm3B9@;u zytYPNfOjLWyU{w*F>f(C20jrECt@iN!FQy7N0JiUgRkOgI0l2%%lv0_8UdQ%J= zqY2B4nD%2z_yJ6`Q9JOKLH=eaRln@w7yz2IOe27w?x<>^wX#WBK}#&bfu{2uApp$| zO8{WKoC2WO&j1Y!JT7oL%A{la9An-l2Vh38?k2(mtUsVd)wZA;6F2N2m%(WtoCzZ# zv6<(T`hfuDY*CU%5-%`1-UX9pT=CWQHMw`>D>EAcg9w4AD>p4pgsF$!WF1gPgGZ{N z`Mo&n$|ELO>LdMIt|0L;gL!ob*@-V*~vbXkwRujs>t%&vxTiVwX5ZN%Sw;j1X~ z!?*O=MG)Oc{ z9vzs)X2Gv75IdfO(T!h)UtJ&TFd0jY0-SA}!Pas=D?@8t z5)PMJfgICVsCd14{iX35ENV*ESdW_eNqWDLL1r6yXL5nzjKKCB2G5p!(txEGI!A~& z=4)5jlA0r^R7_X6Vj9)av#7aq+hIVCc~r^>-!z~uH z?Le~3YE)4{=eAmc)0@6b&3&7GGN!$4i=#J!x={^?!w4x+yeO7ObZ&JA(lvZd6Q07V z^a*bSEaEqLV09W!4BE6$xU^l&&Tp4mRb-g^PaU>T^q_rRjp2wuPG-#U&S z0Tx~s?xbr{TglLV@B~De!2*lyA4%9)s?TsZCT8}$qccn z&_vAYb(e^`Ke_vvS*6kjX=M#sB6c;k3(PF%+4jsw%-qEgK@~O6l}_*QrpeAH-PP|O z5m?9+LOfs}^nGQ%H)O0`We@Gtw)NIb-Z?)=@Jwe$Ec@m|j0H&T!>8TdXI>16Iy4U>6!{|;X7Mg=!3A68ZaOq8eQ%#ps=FU1nmMylK0h%FcxBBJM(b{2(_ID&n>Ghs#TkjvjaW+eQ;(4bDt%T$^+sn4-)_FUX89 zx%s^PAzT&YR+r`94d)XRnwluF0G3-7)3kT?TlBpGj=5>nxW`Y-=^`?LUy=4#XdVjG zP0AyxsdcH**OAo3od_5r-`gc{$vf02w6aOSS#D7$w#;ZBgHIAMB7BwOT=GdsD+~Wz zD`mfk8CGE$W9C${g(esE6emL4olGpZpfWi`yamy5v>s9{-711RI#owS{3odBOD!eD zGn*Sz9TNTMi(kb@aT%Y2ctc}GjA)L86vQ1@beiiJ9B_&((!wfDuh{=|^B2VchFc=y zvkkY#@ydF-ed9P)!meaZJxkE4h#HfE)e$Y7JnYQetX6WiO(qL>sgK?2-D^F<1o<7p zCaEdVt$yza4v8!r#kc94E>7^1z2Zi26I&XcrOpmhyL0>~-{#$;GTQFuB^zMmBGJ}V z#LD0IhJmtHZ$;RgY036$Uf<92fsb!XqN((z%8t+sE)(%P!eQ>dINq6A=uf_YS^KxS z-kK5{sK}@MQ*&ps--%?9OutEq-oL@cZPxAk+SHbSdF*7O6Ph=noD@S!mF?2Br+dp> zOSTYHPN|<&=+-?qKi7y6w-MbEFRE&dU{P+jNV3vZ&q6{^#<0uE zyuy838Va$L^3-!cnhTkbg$1M;G2W<2jo}}yQ9p^^!D7sySUkb6Ed{>8Zbu^ph^BCBC?+*0k-pq*9zLOz#1)g80qCXeleYYg&hDZjwIqi zOg^y0N==X&Vm-0-Cr`tApBlmKnIaCUX=g=JS;uwTsmxJbxtY6aKc>Ob_TG{Gf;{&sIztot~yt6N`p-8ZY1FUz6{=OsU7Dv4h zC+4C9-Zq}z3*rvY$c@?QgUsJKz0e)^9{_&y#l&_1V8qaVW;HMit!ULGR2q#rE z)*=|shly%-M|fb#hqMmu(|7xj+ca?amdiUd1 zbj)Iv69dPMRhwSQ8J7Ex{f#kscn!$1Q7SpYdd+z&i53fg!32+0$i}ex$S67^ z6K?28B7uKj1heBnB-yte3oc)`#?dhS2+lx_2e#Vk<4yt%LcR+{Joee$JAmGm$>W0v zVr=Sx1$JU04j%?mc^eOkW9C+h&MjI;xv!;~)gLSbwtb4hcG!D9$wq#0VdlGFtS%D2 zuqSPwcgqa`Wc*WDbA}zV3)Y**$` zhYJI#r`;8)cKYEx?L!si=}CH>-{+f>Txd5nhn{v9(2?&{)-=mM#e865>`W3JD0go) zQVYSf5lx{~up=`N{{9|`Ia8`ndS-&>fMBqdZ!bgtlj88OQ7@}%A?zY@Q^Qd4if+hz z0d^~y5~Abd`)!oUL>eP6WOT++%(lei1R6yjc8W78b3??UyT5K>Q%CP@gGEshmmBtT zJ49vLay)rSR9n5_*jx@A#*pFc+K{DAZqdVazQ?e!T8E@!dnU(f=kcebs%hqC^;JdP zW&@cDue{Y@<+jgaWr+K5C5P3~3_|mkRvg#W{;J-2ndIEreCM1Op{#Q|c{+FdKwgs( zR(HBl&-el;#303}0QQcelVTL~Jl04tf{Z6WU6dN&+bt+7AJ$7G0F(ZD?)Gm$w4AMS>fqy}=#@KL$l6J`NYdqrSH{!&tsvMp%a_8GqEL#&DJO zO4=}Opkuen+d*Skq1R;_=Gr#}+?oIDPkm5Lj8>hQZkw@8 z-N3lnQkg&mLqAlg&#X-z*l2PPLD^qTyhAkRj^8!chNu+8Rk8{1aBG$Z_*5WkGhN{M zGYYr!%UKIKo$wu!MOl|{OXN=9mzd_QH0?qv31RA{wKSctloPqdx}oNg+J!)gbGN0c z;yCLjA4M>l;Di9);3T+MOYW06)pp@5)f~IQE^<=%ZaNL8y0bN5%ZC@GuO^bh)zVVG zDki2&gweeqe}z?$&ThbfmlV1?keXV87>4BF6}Hy{O~P08Nd1aVW&Lt$Y228Tw}Mz! zSZRhE&zYgeSfn2vLR!96h{ribtv{hSRDdhNPSujvX>Cu+u0g8~&xl67r4}l7^jV6B z%l(!|eJUgetAuxk1JAjn%b3kXjNpZ)^F(l(m?bFb6OJ=O*AO$xS7z;DB_d?2gAG!1 zQ`Yyy%*1?61>=KD8YZSI5910y6b}o~1CCu&f1$1o!@z5YPTC<@9l;t8)jPFzC}lH1 zve!WT$z?Q@$aRBmpY{49rk)|KVemrkn#6kl2b(uPX@5a3jB>u|G+{mnk$f8)j$v%1 z5gcd#{U?Ye>3fnX1}8;Aj5zu?#K-~l7_}9qd^weF)r6cCjw)c7RZRzW4u*;(*xvOF z2g4Dw?SiNYxo##F-^z9J4UY&G9v+5HxH$>sJ4DG{JItjO+&_AhktDJPKl?K(k<7F; zU+fDK-UfEOQKNv6?a0NMVEe9emNC>W5s#ITj&U)Qous^pXzG|4{9&W(pZ=*@h~fQE z8bLFhrOO+F-F#MsoxpakXktv^cQ@nf4BBhsG+{2jg{-^sD@oN^)~yP)if`v>1__%{ zr>jO02Qfo<%2$PI`;%NY?FeiNCu!OWKUac>>78_yagijnS$ooRP=J%Y+i}X;)HUh~ zg|>_dmD=E3jXknq^;>j`1gkw-Ri0X4rs=Q-u3oD|x>qkLv{^(yJ90XhimB8x4;Exx zx-k5)lN0T}st`t~v4`{R8Qq!{dq$<9QzrxKE|87r`1iDbjy-(n684WrRcbi8HN)8+ zDW#5qU8Tv0%I7)aLV5>&(RI0T^IWLH-2L7iBEFhh+_^)teCK^6yE2aKL&#kOa3ff( zu?Jf}n>pINs*67VIk<5zp*i#_h3e(8(<6$_?FFIru=fWoj} zbQ0;6OF-2hC3!2iX2jHcZwgCb+Wj%Q42|7G79zZ%$H=?qIlO02c!(TmvO{;cBpcuC2)EdrMT~oO# zUw&ZIhs$!u_^o@uCzN#@uPNdzR>E^6W#Re|P-(Ha$#rzLzp8&VC^x@36VNbNS!6lf z4*S*?$FQ`MrM8Zf>-h*{?)wPOTKf=?JGW=)!u))6laWe(x{*|G^0IOUt2jfp4xXSK zZ2mQD9v3zr1)KMS#ets-qCZB`QzujJHaE7;%EnST4ev1A(yH%t@ZOWmTP1`@{d8U0 zFo~+p(8VZ<$Es)M6};*Vz;q$O*Dol3Qd`*pq%`Jk&K5^dM?MIAW^ z*vPfL0)M{ddMEtA*2xsm;S|PsWSl~Jfh^gbpV0+Q6vTSt?ws9BjBB{q)6=d0i}{O+ zWLi+b@Is)DUMESmylG29x z$*j^`$f>jXIIe1PDum38iW&9`1cu7i>7wtWgp>{yi#?^h1q=kK60&{DJt{c0!^2ip zP1a8T{6-CjupXV;4PW;V}+{;xS3Vm3LeyEg))E?Ri4Gn*`UGXYa z>)ugXK}zQ&&M{r>uj%QfnzgAVNu3jtu&RjAXDNdMZ%r?kRSE)#7*!>v7J>T5u$nK* zBk(#cvl;XXiOXMfOHE;0Nyg_=C_5d5sm5Zy&L~ZV-8~s_STVJIG*^<+>9|c+danq( zC)%g9PJc+b# z)PI$+dCKqJ(fk1MKvVD{3^uDV|0+A^CV!G-K%TNsjC@h3)5PcWywTMOy@f<7oI+>C z02@t|>>yctG^Y~S9oX5jhfW^>{sYZ90;{~)FlPUSW!TN#Z`h}>X2o$X!R{*ow~b$q zRrgb)NFbW{yx+<1OgH2Oopv3%n0+0|A(U|=(OiRphtUfvL{7W5{r7Lbx=icF(eF>g z4@;UP|FpC2OLD2~JacHzJXhssUw76ZIG`1gQ6S0AD==Jh);Jlwem!-?A<{eK^b6-> z+UWROm(}Q#9NBs3#+6&n<@Ykzuhj={`LgqR9ovD~;B@_68uD+|uaA+s zIkWS86ir-r+3K%Q*KL@8+>m~^j->lL1cRB^YOC2xpZ=ipNJAQ{>fa@>V-5P+eI9sd z2~AcqNr!6tG?t%JZE6Y(ovpcc*hA3l?+agjJKGYjYL2*>TDa%3PZ|boZOtSUZE4km zuCT}_QjCTU%fpM8zIHL1+WRDZ=5Du!Hnm5je!b~gxUZRYzv+QKm2~Rs?E4ss1=ZuK z+@IJ#3P1KDtZ8t$6(V(O3J9x_miVjhp2=FNew&kzk&(xmHs=?mds`OPZI9@KP z{GnF#3u{N-WZ?Ix-J>6e`q;R=&H3H2zDuUEortpRIc~65X|ek12QLNZr|ZkR>?NUg zkGu2lMy+=@MU^1m5az2G(H0N4oAa5JNc~fl6yNsy-9xODCwJ|Y-Se#$K_!;$tn=wa z-bJ3}V~RG<0jftze9K3^+!S8ioZ&-hapK%TAMbV1RZ*>U@(l`JL^ zn*0xZdPAj(@29`kaD*^UFt?1D%O2Hmb=MidH4I`|u7C`D(lS$ait{ANEIhC>9;FD9 zG|@<`ATL$hQ0ZW=zi;ZP45R_z`d z_hT6+ZW@@E*p^Y5=r@PYgIP078GZ615t@Xdg5 zpDGsK<)8}GDHgbCfZxa`TUiU(n=So`aI3UIU*X7vo-Gw?#$Y+&ZiQ0L_^(G3az zDuv67WScVs0W{;$q1YAgHv2E!x86y6k}(3fO932ap$@ot z5)rUW*McZ(BIlE@VrvsQ`YmzFc#ae-tIeYT+!d3D(}rkFZxn3ACfC6JJQ?`&!)cRW ztNG(9yiC(k7N-Z%g>a3t7_Ze{jn{3RY&$Kt13yr%VlK0OOuHxWG7M>_Ctb#LUV>+`%2F z#|E3@=i~u$^YR${HyYYAjqG2vF&NtotPX(!^*}Jb9n1p69cq9Ml7>;xfI>nbX$~O! z^CD_6Dj6q`{h5CEm(K>0a)-*OL!X&rpnIH^0Dfd3*OXEhmdAost-|BC_l z3_Y9H1IbB9h?_V=%z!-4QU?Fx)IE>>^|)fsBsTkJBOpnLs}&gXyeLQ+#*+j6lkLXA z&Cm1y$##3*)4$nnnaOS%>a&C)qpiHH@8Yq;6US^U)!=1cw93Y@GP0v#%Xh>AsCXnb z93@rb3Q;-eSN!rM;;N~_gWy!F_Yp}XCqFpize<)fs~}l!@i-pNSf;r0vB_>7U9>p% zo^A%j-UlH4P}D~A!_3jfU)}Brjf$`^MuEdT0U+1{Rvc?<#R;#a;nU!~en;A-;awB8 zHopYk+%ZmwMs@<8vJ;zhsMrDTQX{v|u4na0i2C`2wLa4}sPw0P<5C)7lOLhd`NW}_ z;C#lc=XIF|!QwyX2oxe2bRJoQoNO%6tw-_WwnfGnlMrpAUN>>Hy#WV6<$~Y-bl9jS z@lxgi(8n*ZZaJ%)*|LD+={Y{Jta|`fWc!Uyeu1=yM)3x*dPatHS#fEKLP_4}s|QCV z;6<#Cjw)?O^bE1RPlM^Zy2_?wTPkIjz7u8n@u*Wf-Q z{>nAkFwpmiM3UmV>r%@n?FFE!5K=#T8)9cg05I5xd-wezWY`!42(M{#dFlOuyxGf8 zt$U5xf~jm_L0Dkj(K_2GpgrHKc>z0OX*#_D>^pKVGGo4dvt zOV)-jpd@|kLk^&4!~vpxG-Db&_Qv=rgK#q(F`ky1c{lp$(Oaug@v)DP#y1))BgxT- zt2%O!ey=#mFvSaJT|^9EqX+;1w=D6juL1W5fkGHMW;oeheq*LvOJIgZi)u)Iy*@iN4o0 zLE5peHV}{`aNfO6M5UDwmxU*IyZ=S4_;qRUUXJ;W-VQyKv>EV1qT!3GCGIVOCkizi zT4ASd6^<=Zt+5IpYIGn`Xa5NY9|h~DhK?jh1TXBPK$WhP6&!7JQU4+wKWrR<+z2)t z!XMJ&aqcu|IFx0vxcTr_Z|7pk^C_CbW#fwa^zf0Tq}bmnCSnXw5CpB|Cg*78@Z^=M zqP)v27MXlMNpFq*2S-WTCUDT+=FM6D+Ol{e}p?}2wi2D&qFxoqSdJXFM zjn7D5NI8_L`~8ZpBb}p`BVHp(3+@v3aU@pH`N`lZjQXnXg`rwp8W$S+<*Ak0Iqs4l zWKTNwudO7m;*OL){eMVujR%HT8igsw@fs)p0w(Ob}i zH3CajAnHw%E|b!GI(vLCBsdF^#?-6|>KrTs>KF*Z!!zaU8a`& z--kn3#t4-O z#Uqd-UMb>EB&2oOLZWqlu>`PSuzY9E)*;dE(X*@FYq+(vbI7z`wDj%`HpR14x10c< zmsO;(%zXQKIj4W7zuZ)|?K{s;Pv~4h^SB@T_IvH&*SI$^*QNwRMTFQlm5Oz zfjwq}WJ9aHnwx=J^v8fZ1T1f?ESZGfz@ie1(rvL(3&Uwcs>+v@Gqx6&@|O$_-Sea0 zyFZSFtl@2{A385u|!mj4Aw zY)TC4)9@#$PGL$%S>wLT*ze~rwlTKdfzPN8hnJg?XVmN7o7gK0t}xa5X&62cir4MZ3trD# z&sZxWx@BHyaun(m$>CeQrCOKrh+t)&*8bdfeT~~g`VOq1y1;{K3yDhx$CU=(yYviP?yn?>$_z zqmzE!k~M}L>Fder3FmKk1+HYXoU+&)jGJc9lm_X}ywBSW{dGGH#`V7FRedvt7HkjP zz8@|Y;PZ)M|4U&qP}@@ir=H&BVw98b@!{^PT+7eNp%)x*ov zXj?oVI!FJTuhHtLuvCdc!JU>!p+bR4e%WEmzU?|`SS!iMdoI2XQZ~>c%k)R52&Npgyfe{KZHTB+s8P@*(VsPdysNL@^LCN{BO9fq z^s&PHuyAhXyT|v+gVWy=nnmxmbg%3yjgN;t+iypZ^Mb>@Q@#1`$a$oBFMWq^o15hF zO6qpId{%vS{X-GFhogswS<+b`6^>>3E@#F+ItQ2CeJ2)w+Lk#KHKdTkhtPzO`(|7p z{^=46`503bUFU6aZD5B|pO=rfr@zEMmGq1DFnXU{gw(yRp|_xD{o~;A6~%Jepn(!U$pn00G+ zzvo~h5afQ;zqp^M%rGi$|NZpq2yzVlees}0QfY3O(xU3?yuz|?_w+s04UelC&~6hrs$do*OpwfLz=lJ1ct^XINtf=!1&`6k-c8hoZA{ae>SnU@a)l&Q?H> zg_DUZEXOqgyF915U@IrEi=DYG#2pBNTG^VxAUhMVlY>1FWa|k4h6$*dP2gbqq zzin`^|L1yaY#jgF1`L8#MgMa>PIkWk+#fq92Oq3f`S$#Ra-mdlg*v)IVZ&6NA?B_iu=8^-7aJ!V7mWAE7UY@5h$;U~+94HBKfWn$Cl>C2& z1J%JU5LagqOdt1u=zA9UXE~I>4wg_WN&yZY9!j8$wH*{T1E7o@Y;mN(Ac#5m*>4vp zY|ZRZ-P7v~|RU?!Yurz9?atL`ZX!08ggnr4bwp^?wb(1-=2F( zidw(cUsdeE@qdtLA1*omdNed>|FFYH;CDN;QvrN>JfDkb5p+W--}s%2Me^$|ZOG54 zJ!m{aW%Z%ILagoKyoTV@`=rN?tjBZX{R$rAr>D(pzsAO)IUr2QWk*Nr>GH1=BN{*Z z4ea=qP^d@aixijEP)DMqF^AhKBSRkNtvT;<665c^Dyznl>E5(_zD#q?XCccstfY%* zO*ad3ZEbNR#-di|bJsq1P0?#Q5OSgpK4YKj&3I$P)Y!4#?i)^S7h-~pa}sNcD}A1~ z4;$x`ZOz_KNd&(y1>7FKCQ=zN%P<+}ZzQg+EbunHkDBCNUg4GYa%gR_Ke}3mLY96` z)qx$-P8`x!codeZ7cJiTbH7TUpTD&|kpYE5{`!o*v&ej**kaLeqrm1nq_c)^JkWc6 zDMdWUXmUxn@nx)5o}o>?#~Z^pUez1q44@7P9<1PL6dQnE${Rx%YeR1p?hE=2zP{ta7#_J=n`q+!dxu$Le1%lkwf_66v{vhVqFEGmZ$PWd zIUZJfVXP+OOF05$CwpmA#rNJ;nOnC3Xxu+p$y1Y@ecKR;uYY2V*mi*S+0P%xTf;6S z2{_JOvwpE#K7PjhmC59<^5ekCqR)0??)T$R7|_rA@!;GgH8yGOX>4bt;rI1Hpr7Bx z?s@M}n6dYDZ>f%7qxVx<{Ncf(h`0!&K{b`{W9iz{VTX?l+2ie!*yC;5ToPo>;ORb% z!tY_1=T|eC$iEK>Y54wlbX8egTs>GUx>JEuE@FYwchIUJ0E zwNQn=bIV#f<;X;{f~$m^whQ_B!#W{pTW#++=DE*A5koxvir;ufKQlqi(97_1E069vvw&{^KjNin%vb@C-j>%^ru3 zQ66RbP3GzLyq}=p4N9jVwUw8<8d)igM6`UpEkr7{GR|tPu|n7C{po|YuIVdhLirU< zVbzc*DX$nTht`-Xb9bUiDFzXIGLKkRkAfyFy%O$WM*Q}naS2ydkI&>0h1Q*3Dt|im zxK~h&GbfPQq(=9v_I+J5trK)o@3YT-8R2swb%Sn7CqNSz#q5c#lgr3`jTedX53C8U zUIqQRT+sTe?Sx%X_R&W4IEVRhWL8dsDDwD14BVR~%~h*QaRPcJ(}Pw~iVx9q2T|lQ zQ-+B9qDsCnHBnq^;?_-O4hXwBBqI13=9z`h%A>dpg>E1Z1PK*n{$cVU<29&NNmhG4If7$5am!KD}<%MVVWC+mOVJ7=JC*Q?%)+OVX zMn}*;+~V)R*~_e3MT_8w&lHpZ;&;ItlI#qxc+1{&3U~5Dx0~(7>MFcRNE=-va-~A6 zF$%U`x`{Ns(4yv;2_7MKRrTOs%mC7rU+QX=)DI@QJe5qk_Kb9`(f)Xu;d6&LWsP&A zVJtyf5zg(_6g`_p%tkLww48+LQvK~r5{OoR067?RwSIIbA!;YwJF0b|o`g)Ts#JcP zGt0hVG+fUBs6;7{=&rD(VwAjQ0lByJ9yYC)t_{|`>lKd%<5{` z6r*Mro!`*G2+SKXy)h(KOgHOh<3wfiOmUTu2T0oADB@L^S=R#bFe|^C^uCYBMSW`R zRJZ}1A?!Z9(9S$z#ImsNLi*wQXE-p|=%}ESkXLOS@sIAXnL=2w|+4Y=2Wt`Ue+;E+3E5yB^5q-s}Q#Y0`pEx zFP17RmI-2S_gTXSj2DTQ6kiA^N`iA}?=sU&#YTkg=GT7^S0`y+d2iGS7J2kG?FydYU(qq?MvmFSySLMd=V zS%f+yz4VMXk@kSa%B19Dl7t(_r!?HG!_JS;)+*$pEtBS{pu8v|i{tkjx*%PV=0uloqwVQ2 z;q}gUjo4v79Hx)uiiMdD82TgS zDs)+pqal6K6!&%>&190SNU=3yf;f__;afFD!V4HB%bT6QY$UlhD;l z@Hd68QC$Vwve~_yFlpUVfb(_>5ntTO#Piy1Vd{3@(x-_;BTbd!e-)qH%+33eu3o`W ztpa`Wg%9aS4>eMfV6i5eF#oiql>@i*7Eupor2n|AlwWwtZz~n1P zCMTcyeje=`0;So0!JyhLFL&0fy2>euz-H%tge&#^Z!1e8Y_c3-T}XsP`MnGW*lE_3 zaBlHdLw1RSm#iX{Kv`K9kddJ4F<8lEX9^388ENYQS9A_Lh)`s(rX}*bDBo}OJ|CnPr*Z{tf=N@Gb z?qxATEt^%3B9AyWOG-aiaG@ZyTp@j6<;K#jm57lhU>E`oFpv0{q%)uNU?OPd1_9Ev z_c?y#%8Qlg}&STsFyAavUgo6f8I@Z=nOxBcZw=#EWs5 zdbaAAqSndXx+bj7)?IM>d?(KsR_Z&Ry2g+0I^L4z=`7hxa=v<7f z<-;4OUkH^f1m&P7^!)(*`D_)?^3Y%GH)b#7KH4dZl00a@lH?nP zKiKIyjp(&)XD5=0-DB1R!|ZPaf9Y9=!VQF~88C9C6+nArY|CDf*q|M`A^Mgx1?cbC=OFIJKF9h6~&=K2VyoZqZqxdQL?jQPdzRA6{AZa4Duk^5) ze;$0kG24pBzR02mUy_V(r%PqCRpOLA6!iTDxW+XlUh%9Z(tEV*>+~rdGhZ%x3 zNY;scMd%knn4Mizlk~6gHIZjXCc;UvaFNQIgj`n;27g9BmiBndhgy1i9lz6U^PAYW z?#^Wy9?oSe8kwY|87=hm-AZj9&XK%hQ3GN}11Tb<^vEs^Ue3Gm)A68(>+9`p^3}0; z5LxR`yoQ44)&L`wNI`CE1AV_y?*q32WHqLN_92SCpT-k+ofjpC?_)|}9=1MawEX&D z`>&i!I*vzfrn8L)ZrZ)QJ8OmAkQc*wH5%UdMN4KcF&8-9Zrsb9_N?$Yey=RKU)0)R z=Bi(G#Y{@JpVcla5VpPBvIdf#8+|syPN?l%nKHYidv)IROsMqM<6tEpC9C6g?Z1Ld zN5G#Ta{+78o_!z!fzM#2p$R&lO2kB3@O*G$@T6CXs+hZ8V^G70@L+rRp6)SakifPa**SX;S zuHU5+x9N+w6|!Q8*FpC_&BrIl?Gf>9>q>>Q*_*YYia@R;6SS+lb$+}$ z7o(7CQrxlZjCrGzTzBRY&>>E>oaf{xP^jobuSe)MrTBR-XSJ<)Kl!(H0fM|Z-bf*p zv0d8Yj~Yrhz!a*K*Qp!j`)1}}pkYIs9abS!3l}S;E{>#%`xsH5DQ1OiLH#mugqCpj zn25c`j>vR!Zz}`4IuE2v?L1ooRTwyFygV+<7VYyQJn-}E`_k&a&!tUQrvy3H_w=oe zb&X)82)uQ+*cQ<faWaNxL`W?Rw51qxf?&#bkZ_`JU%*IRkW~m!n=)@EV&&e zOK(9`afp#SuRr%)4xu6}?GK;13>0TTM z+wiMf`#$rf-pjS7RPwJUz7{-lHG*yTY(%uM(%~54Nq&HEqjNRfHiFAjsoCo+FBujb zWD~4z*#aD;Y8sS(RPEEHFl)4zD_MXmm{6@i#yPYbq0&M0p z&ko;|_K+6u7k(=12+y-T8jc+0=I{ag!9Nj=1fqwD>A;P}LnMN4NK@%Cn1r{#Y?Yoy zsuuP7hfq;Fqex`vZMrfElbH48L~Qsg54Huqsosh5OLjrDL5%n;?rZU_VOeNbfTWKJ zdFn*B)s-Y)VZ3>FBRH^)0v2>i+5KHb(G=JLilkvVR9OWsoXC+5Zvjf^-C+F7V*X16 zp{bf>MiE>MDXJzxICP=3(W70wPX5Zzc_kQ~&6k5l@BrO-QBWEn^m0g@mIoOSnqW|} zlS=4M)n7s4k61Xvr@_--Kv=szp5*?e5>V#hfKvmn+oK^g9ze@&Tm%hiWQb)hAa<WY11x-#eJv#v#;vQe1ON!J!=CHlCYZCT9g*T zC!hCk3T56P_{)!;2BiqF+b@T~CGX+_eQ_xUFii^fbAX{(nYbNrx)bRB11dvs_WqjO zw3^DVTdk&$Or$lWqOz9GKN31R7@Al(bV<1+M*WF1 zHHYgjOhQ~D$cHI|u3xTe0&rmEN^3w2JT|YOHg}DB6k4bzqW{ zMAjKBMwit-GbW1rDICeJI0bp_n~OC-o7*3BKBfK^_tUtHza+Z2yTbwi7G%!R=+j66 z=!arDNR@Ws3H}v;=NT%_{(|eIS$OD*RBSCehi8XXPi{kDuy~sBQ=f`WaW7D|+S zt!>wc{qd9ywS#c?;2RNPp*124|C6Y1p3RFdiw{iRpE6EMb%icUbz6!an8qL7Iuzm+ zCoW3~?R|0M{T3@Yy|g<}cO9-vhv~l<_uibG=yaL>HuA+gTsQQ^W8KDE`=Ryj(RbL! zlAA!ks^(4Dc+PxERh_mjf1==_yoOma^YkV)El0V8xq`#PE;otHjRc$T3%w)p8wCe% zkA(r_lP7W1l!B*WL5mf!*C;ZncgdNt2zmClJ~F70wHWPUuMuRH5>Msel~xmP%J^vW zd&Ym-jofF%vc~n>8jKv+57kf(Oob7f$Csis_GR8S@*#ewGd_%AZKoS0HZFapy~M`0 zFxq!SXv_?wv^C_1(aKLsnj+d{`re8XBD170!Y-l>Bg*7F?Xx7XCc20=j40o6x6cx% zJ#=eUD1eS?8>CUALayE;=EZ~i0=-yr8nFJ$~!x!zm($i{Lq zKGW6x3UD80`3XY>-7_5{d8`|qhlQDh=KNt8^+1l-CK9S`Z}|>u+!hq_^j*#FMBiDd zoBbTWd(fLg1%=J-&86ZK3e`rp*jxn9o|_&-Q2liXN+7s+;dPh2)7walSonZKfL6XI zTN3ejsoM^*=V;#FyxiMHZsN~1lhd5SnZthpbz{bU4Oqe0hK^jIjvAIJWUO!p9Pbf_ zACeJ3UPvSOwk7*f=BTVYv(BRtYY*?}ZO6)&`pPRa`=YSb#my@QhUuvh-l0nrWxq%G z!it}aspo@EG1(M_3$?a0|E8h!PP!>EQ&-|3enX%7I;WlBvn^vnM+kzBs`h z{$xPlJafJQTBf?iOP;~HmzcR2dwbaiXh@Cx7uq@Xx{T)pUq!X&KLv@VC)mb97`;n< z*3G@`b4+Igj>X@$heT`#k0Q}#COP)iQ19rSIvaeIJE&o>-qEYHn$OMuCiB&qRabV| z2*wkjTcAA{558wgfC;kWrWaHHdiR+${-@qo>5%&p<-#33cR>B3nZlSeE8$Y@41U_B z_E$v3;Jn{;!V!}-252f`?StDP7C}P`$Uu!zlk!Y#rnP|Hq-FK36E(6)o3v@@bwC5D^l{`$$9pq5g3p?eTofOtE`T-MJtSzrko&-ClRzF&XEQC1hd<{<)L3zr@&nnE7MniS$?!%_fyhPis=Fh2?Oc2<- zLSm*{pXUJUU0G-`JC~ubiO2{nvDCuPgIv8UM@b%iyUo(1D^NYORR*3zFt&0$F0Fe* z3M4PoqgTbtIDHH*p)O&e81ZKkhO$JzM#uPr~66m4iw87+-g#1{FGhtv8QA%1Y*6cSrPW?Jxd{9 zargtl=(TBa9N$_t6W(oev1;#D*B4GCt50~sSTUi1>H+*`)CIZQf*Geg+< z`SgB%TE)~+-{yKOHi~>00!0b6-P@;g{lQh@3C{i7y9o?*`$0%;V}oXyye8#1C7*2* zdMlAE!R+D`_02u+-bEHsD;2B5j?P-syHQ1q3^Z!036VPORSRG#yTdIpu4koGc1p4n zf_=Vb{+onzOE8D=9g-WoNb-vSr7Cxm5(Pb&=(~65#s_piQ7=!anM$h(ZMH(n(F>nt zFF8t87%$f$GZlko%Z0#V^`|nH(4*?XK-A0idj-9O-kU^Xd`(6_ZTyP@h3p{8_Y`RWf$dAk<01Ve;K3~%-%35lRc=f8gvE*QDmcXHkA#U5n`=kp=v!# zj`y?7m5c|uB?)s@v6yz;JUidN*|PK6+Y(%bjTzD#{9s-2vA&w!&%Q4AVeOq(S)-&H zM;oFNCz(Px(XF{OhYtZWVd#`YDScrDZ_^Wk7EX}EqiAoBw(f70S!}OwTDgM#vd~1` zvvSAc*Z6T|b3>L86f38%jvCTsF16j;*F+E5-Z54f4S#)JrCpz0V+v+$li$}@{&>VH zrW8_X=`K^NoA}qAaD`}cQaJutDb{XF!1@#??FD^o zk^*#oM4Jf)-kPsJ42@T8_a)gi4$N&gCXg$Ym>5O_V7FwO*sbxv?Y;_oH%*2n0qZL) zBfPRvlDj(pga(8BwGQvcHr!F z+hpD@T#$kva3BKh9eHBg3^T~LtdUcoT#_en`<3+zTZzX*kV8PUr)KfGj|X) zyN`a@>Ejj4GA+JmySdenOZT0E81s&3=htYNQAFyWbGypbhI!LT`>T=Z z1;!2LU((@@UHc`x4R{e_kPbmG`5> zOwm5M-@lhSvhT-J(G}_OudeB?&x4&^gn^4^ zk3w$zzZX`$TMP)A+2Vvg>M&{tza(3q6#4oTv zu|HHXx2&0iQB(0GA`Fx9B)oW-gPqovP;)|9GVMbuuTCl@bSJ{x^r^V>wqNT}mDNh| zPj}A4Q>sgdBXJGwCAFF&gZ86 z#`-_a3=5pt z^S=LhZH9McLfeL^tp4a!BUdGNCG1PlsguNGZThz`-U1V_%ZsIQ#vNoXOrHzAr!Lx3 zit^&q*q^ioRU~#cNNt@C)!Ya)Sr)gF^-3CJ+X~z}xL#*BuS=(Ea&wL!Qay4xi~h30 zWMua8Jd!}Qj#NdWRjI3CGHS)^k^06ky^;$?g#YcJp~&@%%kdM}51ZIqs9`rA6_QI# z?eGSPsD}qae&nz37D?;B%Ggxj+ybMg(&v{3=g5T01@ISwbrj@_hrMZAm7)7s&bd)5QLZpY-89)g`G_BMQ0DFbY1SyD;VLyEB}0nN`T#up0q!+V zXJ_yf>Qu=op)2<~_6Izjlicm(eE#%8M?L9S9zRaoCROF6VLh_x?`1QuGIbS{+`dUd)GozM01DFYM?An}eqy>q@7P8v*HidMAB*uze8 zm_@%I6{AfSATZBVM4UU-{bisK)iX~x0PEYZvoY@r^p{p(VRUBuc=p;!nQ=faY{ix= zLm9P-nlAJZP&=An;*(robIdPG)S;OXw43(&vUkk(sma@g3_P_sC!W8fVeUjUXD=fs zVpH2zayGjxYmx1FUxsbvRe@(H%f~Emc_4#E$40xUvn-X9rE0w}ibw3ZEGQinI(u-=z4yyZ&VF~B}%cldg)7<}KU+e^UW03wfe7%}%~zmhuh2!- zNMND$u*$pf+635lMgyOFM3omUm5wG2apXHx&f9k}urSaCfmBgLY(W^OMjdj ze7k_pZTnjTzn0)PRx4S*1Pp=nH<;&dEYdU7=wDEvf3QgZf*=7Et<7C1^(mjBN9vS@ zFo+Vw6$Ulppad#|&8EBhE{q;8~uYWf`Oty8ek};J`e`W zf>{8&Lk&@ZGBBhPrLZtihLe)xS&2Ffal}Q*@r=^?1A78WyF+C)pw9p;Vc}=ZFza>@ zXAMVF&>s%aMO{{clII`s{{l)qvz|@s1LY+pB}`qw=9Ik8QilIPubx}~OjrCF>*Vlk z1Skc5Zw&%JD+RuV!Kr|MgQ7Tj1bF{HK~c{>{TmdOk?5wWv49^o*2dSS8uv2%^SG^* zI=mc4n_Mg#69>vmh0ad^YF;T#M=7<~!WWzj>;8F?u~pRJA#iF{hkp@CO*=W_k|fHT zmlLhEdYp`;tx;V2+Ge$ltyrG;%r*l)Jp>_sQ_@B8N6*p4-Pr5>5EWr*f((a#3P7*} ztUK1!NZ{Ydz^A}_&qvy&;M@?jHDglV-ZM>!MRoyxXMJvZN6i6HO^)0GFDav9wOpKdSHZ$$}Uw@1dB z5E1OXxM|{Srvn8+HHjnJrgoQRFFG3C98-eGo3zNpum9P=qe??^YC0TyEq?{Gcr ziK?lE=J^Hq+3i{F+Zm?)3?I_suHPpb1^XQnNm1N%U+MU!U;ru$!F3CFVfNN|0K-E# z)t1Mw5fdOFyt>^L)5nRt+1p6HXOqQ}xpaBiKoKs>mJzsra+ihaCu!_Grry4CvSaFg z5zik7<&ZSubFlC%lj?1T z3?Ke-GT~Sc#Qg*(MM;3`UzbhzIk4P>z8J|h`xSQ(uOlHe0JP4{kIWV@Wb)TCoH{#Z zra$8;>hPPF!6DyY;;Dy0DJ8#?R)^?*BH2bjlzgR1`uPQ|q=Xzi-kZa0^~f%uS5-5HKSN`T5FoY2KCn~Wf|Mtk%`fL0F*_LC?kSR zMkhOMoNT{l|J5b`9Uf9p4k(f%n$S;)ln$>3zJ{oVWGY%9gs3mlw6IFIOmLR;FdU=T zxzfyzqaM}kr5CmrqENI?(Cba8V?DpIf$-Z8%spD`dXDstI*vGvM6KAXFHa(|dM{3g z&cHV^-dMgb#GdFgC39FFO zw$rM~Sw;Q{RZuM> zUU+zhLajokLe!uW9wS)}$ppgwk4w%w(>vEYAuU!d6|MFIx*6UX=8}q;-C~d8vl)fy z#Tl9D)h}GrKGRdv4PV^8tWCYKPPPuV-M0BT!C!g?#k7weB0Wxz6A0T|o(<+1;eO4n zV|6@5I-Wi|IeR`eI9W0&nD}i zx7#rEF|;BoGRiEHm(Z0+gj1UflYNxWfv4S8g4c=foyDYy^9U|zkW-m!#oluPowtTh zk)xD@gfnb}Y#4F3Wh9Js9RDr8L z!=05qB*S6F%BMfn497~tY6^5wTAso>SKoKFXmD<@)>OLpe$2u2=Ecl&qfrV5yuJ~36$;

mWn`F{1ONe^j>JzCHDfwK_JiI)d+_XI7e)s;* z{c@mkGo3!8@Tm_tJ+A$rt-P(Y%_4$3mgOc#;V#h}{*628EoqMkHkMi4^!A$@>|WY* zT3PIgj~pLaKb{EWq$7=9PWl(rDHb)_9kU(dWFkY5A#o>zz&#b3Cb|TA5ry~4tC_+n zo0;md=bD}PnrN9tcSW-W?QhYPm$I%HMd;c@5Rzh(oiycM6{6g8)^S~MhSdz$gxBN` zr6(O3@Y14U^MEB6BC1<@xfjN!{d=UUjW|=c5;fv4==cP$yGgW`#A#u0#7x!iY|S zvCm>$Ex2+x$8Jn}SbJmkaA~=Lj6sKO3`ns=Uft$`43Zs)P7+G`*{W))qt*~nsaUP3 zM`W;I2v)7DJMeK;_$3#mqwG~~aa6cC*W%Gq@$+ncO1nr)NAKF9!sKMcv*T_QDK9kK zC)r2fo}5>P@5*oFuDMA*?@R5EZr=^x9|0c_d`6;2Mp#o>!R3yn`L5?C-@1m@-2J9j z`s_*}MGZ;h@L@Dz+@4f zzHqVUJ(ls~`lp!3>aT|?v{8a+BXh%_&D)M&TsnQTw~+5C1d+&)gu+O}nt?)w$3&TY z-kag~2~#?46`3vgjz^QXbhC)>jB zQ?E&UxgV?PO46r#gcvZZwW8Yj*B8yvV0M)gD>v?SJo}??=%$k?-u~l4luC2 zwFB%4z;oLAUo9PL2XO}%>wg%iKq+er3os1!|BUO`r{v}V+FLugx=;%60-anTP_P}? z0*cDP%?&h%z+Ni2xL8vHEuBr@!*X0xkn3}*3$k_wx!POUf!!&A&sP#Kmc1#+8R9?* zG;;<&3&KX4f`ea~-^$g&($v}2 z-p@ywh5ru&tP&7A z^Z$z#cW1B#Dmx_yCo22DKa{*YJX}1K7L@?+79dtdr?CP)Ufsh!`6TjDDPkaft84V3OaQN1WNh*(&f*u;sP~whW=SC R4o(4f0aRLAX%!jN{{uC$*Bk%< literal 0 HcmV?d00001 From 88c551cdea99fea47395341a04bd59d37d715916 Mon Sep 17 00:00:00 2001 From: Cedric Nugteren Date: Sat, 12 Mar 2016 16:23:58 +0100 Subject: [PATCH 45/50] Added tuning results for the newest xGER family kernels --- include/internal/database/xger.h | 64 ++++++++++++++++++++++++++------ 1 file changed, 52 insertions(+), 12 deletions(-) diff --git a/include/internal/database/xger.h b/include/internal/database/xger.h index ccc7a06f..d57e606f 100644 --- a/include/internal/database/xger.h +++ b/include/internal/database/xger.h @@ -19,6 +19,7 @@ const Database::DatabaseEntry Database::XgerSingle = { { // AMD GPUs kDeviceTypeGPU, "AMD", { { "AMD Radeon R9 M370X Compute Engine", { {"WGS1",256}, {"WGS2",1}, {"WPT",1} } }, + { "Tahiti", { {"WGS1",256}, {"WGS2",1}, {"WPT",1} } }, { "default", { {"WGS1",256}, {"WGS2",1}, {"WPT",1} } }, } }, @@ -31,7 +32,8 @@ const Database::DatabaseEntry Database::XgerSingle = { { // Intel CPUs kDeviceTypeCPU, "Intel", { { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"WGS1",128}, {"WGS2",2}, {"WPT",4} } }, - { "default", { {"WGS1",128}, {"WGS2",2}, {"WPT",4} } }, + { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { {"WGS1",128}, {"WGS2",1}, {"WPT",4} } }, + { "default", { {"WGS1",128}, {"WGS2",1}, {"WPT",4} } }, } }, { // Intel GPUs @@ -40,9 +42,17 @@ const Database::DatabaseEntry Database::XgerSingle = { { "default", { {"WGS1",64}, {"WGS2",1}, {"WPT",4} } }, } }, + { // NVIDIA GPUs + kDeviceTypeGPU, "NVIDIA", { + { "GeForce GTX 480", { {"WGS1",256}, {"WGS2",1}, {"WPT",4} } }, + { "GeForce GTX 680", { {"WGS1",128}, {"WGS2",1}, {"WPT",4} } }, + { "GeForce GTX TITAN", { {"WGS1",32}, {"WGS2",4}, {"WPT",2} } }, + { "default", { {"WGS1",32}, {"WGS2",1}, {"WPT",2} } }, + } + }, { // Default kDeviceTypeAll, "default", { - { "default", { {"WGS1",64}, {"WGS2",1}, {"WPT",1} } }, + { "default", { {"WGS1",32}, {"WGS2",1}, {"WPT",1} } }, } }, } @@ -55,7 +65,8 @@ const Database::DatabaseEntry Database::XgerComplexSingle = { { // AMD GPUs kDeviceTypeGPU, "AMD", { { "AMD Radeon R9 M370X Compute Engine", { {"WGS1",64}, {"WGS2",4}, {"WPT",1} } }, - { "default", { {"WGS1",64}, {"WGS2",4}, {"WPT",1} } }, + { "Tahiti", { {"WGS1",64}, {"WGS2",2}, {"WPT",1} } }, + { "default", { {"WGS1",64}, {"WGS2",2}, {"WPT",1} } }, } }, { // ARM GPUs @@ -66,8 +77,9 @@ const Database::DatabaseEntry Database::XgerComplexSingle = { }, { // Intel CPUs kDeviceTypeCPU, "Intel", { - { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"WGS1",512}, {"WGS2",8}, {"WPT",2} } }, - { "default", { {"WGS1",512}, {"WGS2",8}, {"WPT",2} } }, + { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"WGS1",256}, {"WGS2",1}, {"WPT",4} } }, + { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { {"WGS1",512}, {"WGS2",4}, {"WPT",2} } }, + { "default", { {"WGS1",256}, {"WGS2",1}, {"WPT",2} } }, } }, { // Intel GPUs @@ -76,6 +88,14 @@ const Database::DatabaseEntry Database::XgerComplexSingle = { { "default", { {"WGS1",16}, {"WGS2",2}, {"WPT",4} } }, } }, + { // NVIDIA GPUs + kDeviceTypeGPU, "NVIDIA", { + { "GeForce GTX 480", { {"WGS1",128}, {"WGS2",2}, {"WPT",2} } }, + { "GeForce GTX 680", { {"WGS1",32}, {"WGS2",4}, {"WPT",2} } }, + { "GeForce GTX TITAN", { {"WGS1",16}, {"WGS2",16}, {"WPT",2} } }, + { "default", { {"WGS1",16}, {"WGS2",2}, {"WPT",2} } }, + } + }, { // Default kDeviceTypeAll, "default", { { "default", { {"WGS1",16}, {"WGS2",1}, {"WPT",1} } }, @@ -91,7 +111,8 @@ const Database::DatabaseEntry Database::XgerDouble = { { // AMD GPUs kDeviceTypeGPU, "AMD", { { "AMD Radeon R9 M370X Compute Engine", { {"WGS1",32}, {"WGS2",4}, {"WPT",1} } }, - { "default", { {"WGS1",32}, {"WGS2",4}, {"WPT",1} } }, + { "Tahiti", { {"WGS1",64}, {"WGS2",2}, {"WPT",1} } }, + { "default", { {"WGS1",32}, {"WGS2",2}, {"WPT",1} } }, } }, { // ARM GPUs @@ -103,12 +124,21 @@ const Database::DatabaseEntry Database::XgerDouble = { { // Intel CPUs kDeviceTypeCPU, "Intel", { { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"WGS1",512}, {"WGS2",16}, {"WPT",1} } }, - { "default", { {"WGS1",512}, {"WGS2",16}, {"WPT",1} } }, + { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { {"WGS1",512}, {"WGS2",8}, {"WPT",2} } }, + { "default", { {"WGS1",512}, {"WGS2",8}, {"WPT",1} } }, + } + }, + { // NVIDIA GPUs + kDeviceTypeGPU, "NVIDIA", { + { "GeForce GTX 480", { {"WGS1",32}, {"WGS2",4}, {"WPT",2} } }, + { "GeForce GTX 680", { {"WGS1",128}, {"WGS2",4}, {"WPT",2} } }, + { "GeForce GTX TITAN", { {"WGS1",16}, {"WGS2",8}, {"WPT",2} } }, + { "default", { {"WGS1",16}, {"WGS2",4}, {"WPT",2} } }, } }, { // Default kDeviceTypeAll, "default", { - { "default", { {"WGS1",32}, {"WGS2",4}, {"WPT",1} } }, + { "default", { {"WGS1",16}, {"WGS2",2}, {"WPT",1} } }, } }, } @@ -121,7 +151,8 @@ const Database::DatabaseEntry Database::XgerComplexDouble = { { // AMD GPUs kDeviceTypeGPU, "AMD", { { "AMD Radeon R9 M370X Compute Engine", { {"WGS1",64}, {"WGS2",1}, {"WPT",1} } }, - { "default", { {"WGS1",64}, {"WGS2",1}, {"WPT",1} } }, + { "Tahiti", { {"WGS1",32}, {"WGS2",4}, {"WPT",1} } }, + { "default", { {"WGS1",32}, {"WGS2",1}, {"WPT",1} } }, } }, { // ARM GPUs @@ -132,13 +163,22 @@ const Database::DatabaseEntry Database::XgerComplexDouble = { }, { // Intel CPUs kDeviceTypeCPU, "Intel", { - { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"WGS1",512}, {"WGS2",1}, {"WPT",1} } }, - { "default", { {"WGS1",512}, {"WGS2",1}, {"WPT",1} } }, + { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { {"WGS1",512}, {"WGS2",4}, {"WPT",2} } }, + { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { {"WGS1",256}, {"WGS2",1}, {"WPT",2} } }, + { "default", { {"WGS1",256}, {"WGS2",1}, {"WPT",2} } }, + } + }, + { // NVIDIA GPUs + kDeviceTypeGPU, "NVIDIA", { + { "GeForce GTX 480", { {"WGS1",64}, {"WGS2",2}, {"WPT",2} } }, + { "GeForce GTX 680", { {"WGS1",8}, {"WGS2",16}, {"WPT",1} } }, + { "GeForce GTX TITAN", { {"WGS1",32}, {"WGS2",4}, {"WPT",2} } }, + { "default", { {"WGS1",8}, {"WGS2",2}, {"WPT",1} } }, } }, { // Default kDeviceTypeAll, "default", { - { "default", { {"WGS1",64}, {"WGS2",1}, {"WPT",1} } }, + { "default", { {"WGS1",8}, {"WGS2",1}, {"WPT",1} } }, } }, } From 99d309598d5c076f721f3ec738ad3137f134e0b3 Mon Sep 17 00:00:00 2001 From: Cedric Nugteren Date: Sun, 13 Mar 2016 10:21:33 +0100 Subject: [PATCH 46/50] Updated Travis script to fix the fglrx=2:8.960-0ubuntu1 issue --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index f523e20b..9d5dd27f 100644 --- a/.travis.yml +++ b/.travis.yml @@ -7,7 +7,7 @@ before_install: - sudo add-apt-repository -y ppa:kalakris/cmake - sudo apt-get update -qq - sudo apt-get install -qq gcc-4.8 g++-4.8 clang - - sudo apt-get install -qq fglrx=2:8.960-0ubuntu1 opencl-headers + - sudo apt-get install -qq fglrx opencl-headers - sudo apt-get install -qq cmake install: - if [ "$CXX" = "g++" ]; then export CXX="g++-4.8" CC="gcc-4.8"; fi From e6acf132968b3f3087b8b38251d89b265699024b Mon Sep 17 00:00:00 2001 From: Cedric Nugteren Date: Sun, 13 Mar 2016 10:47:53 +0100 Subject: [PATCH 47/50] Updated Travis script to take into account the missing OpenCL packages --- .travis.yml | 66 +++++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 52 insertions(+), 14 deletions(-) diff --git a/.travis.yml b/.travis.yml index 9d5dd27f..d7a8e0a2 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,29 +1,67 @@ language: cpp +sudo: required +dist: trusty + compiler: - gcc - clang + +addons: + apt: + sources: + # kubuntu-backports contains newer versions of cmake to install + - kubuntu-backports + packages: + - cmake + +env: + global: + - CLBLAST_ROOT=${TRAVIS_BUILD_DIR}/bin/make/release + - OPENCL_REGISTRY=https://www.khronos.org/registry/cl + - OPENCL_ROOT=${TRAVIS_BUILD_DIR}/bin/opencl + before_install: - - sudo add-apt-repository -y ppa:ubuntu-toolchain-r/test - - sudo add-apt-repository -y ppa:kalakris/cmake - - sudo apt-get update -qq - - sudo apt-get install -qq gcc-4.8 g++-4.8 clang - - sudo apt-get install -qq fglrx opencl-headers - - sudo apt-get install -qq cmake + - cmake --version; + - ${CC} --version; + - ${CXX} --version; + install: - - if [ "$CXX" = "g++" ]; then export CXX="g++-4.8" CC="gcc-4.8"; fi + # The following linux logic is necessary because of Travis's move to the GCE platform, which does not + # currently contain packages for fglrx: https://github.com/travis-ci/travis-ci/issues/5221 + # We build our own linkable .so file + - if [ ${TRAVIS_OS_NAME} == "linux" ]; then + mkdir -p ${OPENCL_ROOT}; + pushd ${OPENCL_ROOT}; + wget ${OPENCL_REGISTRY}/specs/opencl-icd-1.2.11.0.tgz; + tar -xf opencl-icd-1.2.11.0.tgz; + mv ./icd/* .; + mkdir -p inc/CL; + pushd inc/CL; + wget -r -w 1 -np -nd -nv -A h,hpp https://www.khronos.org/registry/cl/api/1.2/; + wget -w 1 -np -nd -nv -A h,hpp https://www.khronos.org/registry/cl/api/2.1/cl.hpp; + popd; + mkdir -p lib; + pushd lib; + cmake -G "Unix Makefiles" ..; + make; + cp ../bin/libOpenCL.so .; + popd; + mv inc/ include/; + popd; + fi + before_script: - - mkdir install - - export PATH=`pwd`/install/bin:${PATH} - - export LD_LIBRARY_PATH=`pwd`/install/lib64:`pwd`/install/lib:${LD_LIBRARY_PATH} - - mkdir build - - cd build - - cmake -DCMAKE_INSTALL_PREFIX:PATH=../install .. + - mkdir -p ${CLBLAST_ROOT} + - pushd ${CLBLAST_ROOT} + - cmake -DOPENCL_ROOT=${OPENCL_ROOT} ${TRAVIS_BUILD_DIR} + script: - make - - make install + branches: only: - master - development + notifications: email: false From de7e68e872e9574153f260ff8aa914892f60e490 Mon Sep 17 00:00:00 2001 From: Cedric Nugteren Date: Sun, 13 Mar 2016 10:48:42 +0100 Subject: [PATCH 48/50] Updated the README file --- README.md | 31 ++++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/README.md b/README.md index 87c7bb53..d3d77c34 100644 --- a/README.md +++ b/README.md @@ -17,7 +17,9 @@ Use CLBlast instead of clBLAS: * When you care about achieving maximum performance. * When you want to be able to inspect the BLAS kernels or easily customize them to your needs. * When you run on exotic OpenCL devices which you need to tune yourself. +* When you are still running on OpenCL 1.1 hardware. * When you value an organized and modern C++ codebase. +* When you target Intel CPUs and GPUs or embedded devices Use CLBlast instead of cuBLAS: @@ -102,6 +104,7 @@ The CLBlast library will be tuned in the future for the most commonly used OpenC - Core i7-3770K - Core i7-5930K * Other devices: + - ARM Mali-T628 GPU - Intel MIC If your device is not (yet) among this list or if you want to tune CLBlast for specific parameters (e.g. rectangular matrix sizes), you should compile the library with the optional tuners: @@ -154,10 +157,11 @@ These graphs can be generated automatically on your own device. First, compile C Rscript path/to/test/performance/graphs/xgemm.r 0 1 + Supported routines ------------- -CLBlast is in active development but already supports almost all the BLAS routines. The currently supported routines are marked with '✔' in the following tables: +CLBlast is in active development but already supports almost all the BLAS routines. The currently supported routines are marked with '✔' in the following tables. Empty boxes represent routines that still need to be implemented in a future release, whereas routines marked with '-' are not part of BLAS at all. | Level-1 | S | D | C | Z | Notes | | ---------|---|---|---|---|---------| @@ -226,6 +230,12 @@ The contributing authors so far are: * [Cedric Nugteren](http://www.cedricnugteren.nl) +Tuning and testing on a variety of OpenCL devices was made possible by: + +* [TU/e ES research group](http://www.es.ele.tue.nl/) +* [ASCI DAS4 and DAS5](http://www.cs.vu.nl/das4/) +* [Dividiti](http://www.dividiti.com) +* [SURFsara HPC center](http://www.surfsara.com) Support us ------------- @@ -236,17 +246,8 @@ This project started in March 2015 as an evenings and weekends free-time project To-do list before release of version 1.0 ------------- -- Increase the functionality: - * Support all routines supported by clBLAS - * Allow the user control over events and synchronization - * Add half-precision routines (e.g. HGEMM) -- Improve host performance: - * Allow initialization to pre-compile kernels and store to disk -- Improve device performance: - * Tune for a wider range of devices - * Allow users to define custom tuned parameters -- Improve the performance comparisons: - * Enable comparison against optionally: ViennaCL, cuBLAS, MAGMA OpenCL -- Further reduce the likelihood of crashes: - * Add checks for proper command-line arguments in the tuner, tester and client - * Test in multi-threaded environments +- Support all routines supported by clBLAS +- Allow the user control over events and synchronization +- Add half-precision routines (e.g. HGEMM) +- Enable correctness and performance testing against a CPU-based BLAS library +- Test in multi-threaded environments From dd74450a831f431d3c94608e81c45af1e4a68a22 Mon Sep 17 00:00:00 2001 From: Cedric Nugteren Date: Sun, 13 Mar 2016 10:55:16 +0100 Subject: [PATCH 49/50] Updated Travis to reflect the changes in the Khronos website --- .travis.yml | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/.travis.yml b/.travis.yml index d7a8e0a2..351836de 100644 --- a/.travis.yml +++ b/.travis.yml @@ -32,19 +32,21 @@ install: - if [ ${TRAVIS_OS_NAME} == "linux" ]; then mkdir -p ${OPENCL_ROOT}; pushd ${OPENCL_ROOT}; - wget ${OPENCL_REGISTRY}/specs/opencl-icd-1.2.11.0.tgz; - tar -xf opencl-icd-1.2.11.0.tgz; - mv ./icd/* .; - mkdir -p inc/CL; + travis_retry git clone --depth 1 https://github.com/KhronosGroup/OpenCL-ICD-Loader.git; + mv ./OpenCL-ICD-Loader/* .; + travis_retry git clone --depth 1 https://github.com/KhronosGroup/OpenCL-Headers.git inc/CL; pushd inc/CL; - wget -r -w 1 -np -nd -nv -A h,hpp https://www.khronos.org/registry/cl/api/1.2/; - wget -w 1 -np -nd -nv -A h,hpp https://www.khronos.org/registry/cl/api/2.1/cl.hpp; + travis_retry wget -w 1 -np -nd -nv -A h,hpp ${OPENCL_REGISTRY}/api/2.1/cl.hpp; popd; mkdir -p lib; pushd lib; cmake -G "Unix Makefiles" ..; make; - cp ../bin/libOpenCL.so .; + cp ./bin/libOpenCL.so .; + popd; + pushd inc/CL; + travis_retry git fetch origin opencl12:opencl12; + git checkout opencl12; popd; mv inc/ include/; popd; From bf4bd072e21a4360a3069d5cd7118a60df583d1c Mon Sep 17 00:00:00 2001 From: Cedric Nugteren Date: Sun, 13 Mar 2016 11:02:40 +0100 Subject: [PATCH 50/50] Updated to version 0.6.0 --- CHANGELOG | 2 +- CMakeLists.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index 3c91c31b..f0648ebc 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,5 +1,5 @@ -Development version (next release) +Version 0.6.0 - Added support for MSVC (Visual Studio) 2015 - Added tuned parameters for various devices (see README) - Now automatically generates C++ code from JSON tuning results diff --git a/CMakeLists.txt b/CMakeLists.txt index 508dad72..d26a2843 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -13,7 +13,7 @@ cmake_minimum_required(VERSION 2.8.10) project("clblast" C CXX) set(clblast_VERSION_MAJOR 0) -set(clblast_VERSION_MINOR 5) +set(clblast_VERSION_MINOR 6) set(clblast_VERSION_PATCH 0) # Options and their default values