From fd107c9b12e2c369ed2946efd53ddabc440388e1 Mon Sep 17 00:00:00 2001 From: cnugteren Date: Sun, 15 May 2016 17:28:22 +0200 Subject: [PATCH] Added new tuning results for SGEMM and updated the performance graph for the Radeon M370X AMD GPU --- doc/performance/Radeon_M370X/SGEMM.pdf | Bin 13227 -> 13268 bytes include/internal/database/xgemm.h | 4 ++-- scripts/database/database.py | 1 + 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/doc/performance/Radeon_M370X/SGEMM.pdf b/doc/performance/Radeon_M370X/SGEMM.pdf index 362d229d5eb09dc75a86a4ef0587e9c1b110e544..ab4382ce431f3a11e87a85b7df82b92646b40a5e 100644 GIT binary patch delta 9451 zcmZXZbyS;A(C;bkP+W?%MT%>2D_Yz=IKf?t^B@IUv=AJMyA&zz?k>gMgIn=(!|%Q4 z{&C-&^PI_kcXoG@b26WuNvY+GSHJMgpuZhqWg1^G)o zKMWhJmjJFw-F`7i@%0bMY&gWPF|2Kyj*{3au8UV3M|-41_iwhnUmIk~H=X{{p1i&n zn`Rq--c0acJ6#jH@N@KkzCZbGBxzHn4YW8l+g*w-THPe9hLtEDUqCl5Ontn(u9KQG z+c(44v^o18LfHmyfNYn+h7}#2q+|cj3%<4s)wK_F;L;?Psrf5amYk|OC(+PBt8yj; zLbFz?sO#Fbq|s}}rnaPq34@B9Lj>U}ZC-Y-DpUjN#w$|fJ4uX#hSz>#m|)9;v$xYIOdPE8BBZ%bZ?0{tM^c^DdsYYsA!NYDo*9pEbiZZ z(q#xxJcxjmRle5$Aw+_I%_erS#W8HND$}vt$?PQz=S>2X=1STd~iWXncA7;m= z9X8-!+8T(`a0;J|?O)sT*|Cv__tt3(Vribn#C`jht8sB64JkgK~_tIR(4vUD4mC07+jyw z6?L+lJNuYxk-wQ>*tpmPBYrl)fG(=BrtE%Tu@9P~nEu!w=s_2;T*W_3f-ZI5v|8vr z$UjWSwl2sM_r#!6j(+-3H)8eX+^`X-duU-W#5tmIiSYEO&T^FHCLrVezV6;Bmr1S+ zWN}o8H|~9h)ObLz3a(gcD&eCL4Efc6U*~wPaQL_?lNg(SS_=}ePLjdFZ}~Nkzw;@R znDsr_z;0^(?0);YJ@Kj(hsEtjK-Im-@PI6f>&xM=(3m4Dpono&d zeYL3D$gpNZFM{v}F@3MLDs(<}&< zleR?U2DrRD_|5ujn_GuKD2twFU$asq-BpP5OfN*hi{uXb2)}t#4#fn`hA_E#XD9C> z^I>|bSa7@Iv7$7E+t)^%6#rcQX#ejsI4sF6&C3%M%}b>t?ka*?k}^OpoO%i!g>=PW z^DsiLo5xGA%$(%PU}grIX@rtNSfrbYebw`6Bkoy01(j!5vaq2jWF3sBtT^S}NBwFj zwOm9$t6pgqV?)EXS)x$|!9@gBh!+{cZFuS&W*(0z^zn3!0?2`{x513SYfe#l+f87G{+7yi+f#jo><8YQ!upv#JSLANN;ej|i6~TaWLJd#QkQ{mv4s${aA$go!r^qVccE zUGjFCIuZ%vJROSIHYBFO&+w^mOq58yZHarb*}?uD6UaK}4Ida>Gr4p$xmljOoTT6L zX;F6ODphv*=IePq<5qQGCqncl>d&ll7y5~G2siU_lMC~VHLFxMh|kaLT7GSgF6kS) z3tJIY4q*Z_IJ-evs+;b~bGW0QHKDB6C-EiYqy}at^A|Z7~svY6lnd0&z``B6~FmL}1 zav%pyL@H3bMa+{`)G|m@m72%zLU92xGl=lG^HKWo1f|mS>E!+^%jv5!^f5^wse`c4<+q!kS5(R5+7B&R1~nw%?H3<0cWe4`fH4QXk)P^g?>sBm11r zfQRodjaPSZAxC_9`vK@75??^+eNDJ=NZzPG^n~ccHSxooqd(s<@CV&$K~}Bv^oGs< zX_`%&7lD2CX@ta+yTNRcy-=9lxfbb;`y9h3fiw%ZapKu}tsNZOuZh{sY!w2WmWB*O zNEKW&(bubZ)n>`omlM5XBcg=jM$rCo_VuVgbG)0d`m8jL6-IL;PDa(vUGRFWpS#AX z;L@v+KiM?Iw(7G&Q{x$)QsY|!vyfp6^=wZQ;Sq0U+PqTZk=l^Kh$YIjHQEm23)5Fg zV|d2w3oM+%9$14f9PR;0Z~`#k*65YJC4~@UEa*0c3`K<4ZLh%K9kl8P^Ex6zvTyim z4x{tb7u1(U>nlk_Trk29f~`4VOB&n~Uxsil6|p=CNh(VwLbtaKZ)p%Rhujq`(r*ay z{}`cw7Z0Y+It^vOLw8g2I7U+7DVRlr*{-`OKHvvP3OrV?C!Gcp9IQSb`CN9kw)b9+ zYE|yEMh~Yu)(5SexPx|myG`b9v|x7FB*mAB)wS&ay{>-FIfXp+S7H4Y=U&>coSlmQ zYnSwd9K7DtDJY+Yh~O@7usp_BrNnO*qK=sh1Wl&w**S$~{8cvbI)>Okd;(O-%DUm2 znOT*wGmRra^Q3RZ#-gGJBD&oAQgB1-NCVcKozjs`^!47-zN;n_8Rhz+9sL_uNrSI` zJw)0Eze|yt_y)Uf7bIg*H11|7>rIZLt>m;qcX=0$^Vj6eOOycVq7Q6?U#R9lh)wq8 zl`X~Q$xzL7sV^<+S`M~CRRA!~sPUz?u7ka`b3v_cU=<7b0(qNJKdFKxmfi#HO*9_0 z<&E5T-*99F`t)RUKeu=u7Cr=G>Dy38h7bd<-k*LUJab-^!OZW+)b{oFQT5cojwlFT zfy0=Vm(1)Glk7-)tlw`mJYC5|+j63t&R>^x0EA;bHn>C^-Gz3Cr|bao{1)w!chYaZ znu69L-#iBcc%PQZJc0e~K;eji=Hc%~L&uuC0@ANV=W&T8$P9#xBtYnN>ZjGE-A6FE zkgA7;QXeNSTtkZ6*Tr$YGF?zN$+&zVjnVTQDePLW3z~yniT+8(dL)%LKwt-eWOiLO z3oc_?WOnCshaTePfvFo6m^*glT?e_QgL*pE{B=T}Vj+mb>`T4&3)FD1#pxPiIJ**i zw^zU-qo*)l%yG=u$60Sz0eHW3daNZ^4Olx)R*p5LDMA+cC$hd;h9Cs}59!KI@~qdE zqcn?t-reeYRWF-Rnsj|{QRa8pjD!8w8?kTyaRAnWC4~YLIJ2-EFn@`h*t=@}3*mVA zuKt@pgAUhk7!`Fv`7cDvt$N1BZ*}rf5$z~tdEskt;F1Ik-~#Km@{jr(j@kYJhw|Gi zJ7w<4et!jQ);okmAY0yqPZKaGQsR-P{q5fJu1AXU!O#nG^nygN)zE=M>GeLmUi;*`{L7{8LGP}*^8=l!3yAwnC*20ygOT15=MUAosgZIvp>Y<_t_QrcZcqx8mF)+3n}y?A1jy|!6{rd1BbtRG_>HD ze5JNbnnX$vap?oT`)A&(fnS5*Fv)qMfn+Ykw2TwDg8P;0g)J)d&$l$u-O0^}{*$Ax z6s{j;rhoi&CPAXXe2ORS=*gaHfY1fJ&?zYR2+r8kVOwBkM_jQ{3Wa_4R9*S>qnSSP zXOlD5;dJhXm2P5c%nnJQUDG5gnVGZ`UrU?QB9g;BB#xM*%Zk5!VT?U6$ zH6E$sNNI9Lo#OH$2p$jN))>xg4(9=h7)~8VCu)ucqz=zGmrAvCiK6!`|HVr7%?J(? zl8EpRDzbSy?*#J`x#m9JLR5Q2qj1%E{1%(;Yx)wraS5gp5E zV1jMn2lcrj35LhZ7$&Bkkqy}?AbR--zs5O%Rf=H-mN%ySQmo0sq{CG%M6gG>*Ciap zlw6e#FLRAI%&}m~Yw_&HZ270G>dn;(*>H;Tluq=4s8C|?^bks~OIgrOuL~pA9DY?g z*(IQ^ACD8gGheEcC%r@a@4QZ`A$%tp$xo&{0>&kzzg!)tzhAgS$KYIr>s3qLtFtPx zGi!=kqJ$Ak91$cRk)~hNqNgJ?z$~ICRFrtJ?5V;cv>aGg1C_PgcGF!%c5)cqq;0Kb zS7{=0=UN3doDm)MiNzD1w4e}#-Is>|&S0xN{ahWbc?8wLOpreR*A!`H?Z6^i3`mkO z64tPU>5n0YN%IIHmX`{#(Uy8>yBA)6o3_jp1~jU1yl z_svIK@P%n04+vfrhBtmnn5k&g*us5r{&Cy}i&&?_sJ13~(18QkUzrn&AB^bzA9CSp z>bIjyuEd$E%QV6)9;Qt|stdE9h(T`eKP_sguS)ig3*omM*NwX=mtm;Ny{}MS_UK{N z6&9NAe9fNO!DV)O^s%>4eA`>ue3>yGzIYKTxr0q1{&*45KTV!ZIQio-wbAx>&NV%k zX60rQ{%-&l-kyKRi;sMIn|D<_6TAKVL`x=VH&_3T5qR;V#`rTi|7p1XBRDTiL`Pv; z4Hxx$YDLUydw4h$1>M?<_LtNnXDD$)QIIlh=33y!WW?H7jq0R5Wf0zI;83ytUX^qp zUn`KRR?_CfRP{GGMKjiYIvwreXL0!k*IqeZPD=^M7j-*)nNF7^LCY?RGHKlO#jFT1 z{L!U{l1WZcKfm|Ksap&CDxc8_2hsKn!zkP4=M$2AoJ#OkmdhK+6WDL~F}}+zbpyA5 zJgQyPRYU{RSE$hb2=Sy7O{76KB#NJ6f1M>zv|yj*YUHF$L7!3SJ@kqN)L;2tve2Ff zFG3FJAA_$PVHM#MI-(Pd-kLIP;W%AnD_%;(uj8y$g%+6f4w#7^Np~gZ zCx01pqwAdUwEI9EcMV8NKH9MAW6)sDx&q*wdu3+b(djNpNyrE*RAh%d{6=96cz;$f$6jrVymPfml6}f8~cGk50kGMpsd%%7XXku`t}6r0;J0Wpt%z=rHoy^!P_)4lf0{Vc4S<(fQ3w z>W*n+d3%gb|4n#89M^LbAXX$vgGD_x-S+Tw@acHO;~BCnOSK>7aB;$!N$ydvy#TLV z|7!j?un>KX6h6SffqWg*DAkx+AQGLO;rKTizJo8>(a;{5-EjDB*V+OO15XnA<))4n z+so<42gU&VvvibZ;>q=X#a%&^0I(QkMovn$qUpgs2Ao1h&gvIHXCNoWNDY=xue#N_ z&t{~>O_tBseO@Q4<~ocbxa{Q5^&@vkO4z7W2|7B$T zYe{@0nv?s^gX+!2{pwTp|4Hi9ew{>On`xhZzEVi1BKb25!&38M1e*%_=Vdf_<4wu~ z-A-w^7matETT`sdG4PcF#fQ&B@nP^iQGZ(<575~mvc=5Ze*?d>d*(3dZY0z zM}QRhA73-6|1hWSGa4z4Ktr?WL{{Bk4R`;Bh64^ChLoC}v`=_QH2&CZ^U#4#ALjo8 zT{_#QK%(&#wX{qvE!4KgL3%;Q99$>7u~0ehm>RyNiZc2e7<<+X#=QVD6u53znrX!q zsezWLmzWINX_^_R&s|9-X0vBLPUQWEvouQ!Ch84-p+BQLGqmB3|7nx`P4IXz-9vD6 zO2w?$ZU`@ojd8X)J-_{4a^r?7e(1qN^hd-Xp$nYU%DAb8T7DOpL zQ$O0S189D*6t)ZDJ~O|^!4Wd4dd}*AXr8roKsbZ*Z$f{ztSZX_$o^QCq7jWGag?)i zN<=ZsmS&LPS}Q3R`z!Wb?@t8RG~uC2QVsfOPdKy3UC>J>G1zvdqGrkNPh4zEoU4-(>l z(d#&3!E3nzp6jq*Uj*v>1vcN=*;TbGUb@9ausFnk-=%Mx>SKsfZu-_#x>QlfWqLd^ z5iez5VJSFt^?v!_w|ZUyvJ0Eyb(%nXk-hrc<6vHEVwQ9-@?Nll+=~|%K5XtTyFk*; z_P`tAd-_==Y$qyPxo!Y9>0icxv6StmSI?t-q$h%nrq7xg7tGQj{KUaO&to_|aiAQk zYo#D6-u3t7@1qJjk3es)SMIms?02brMXqSAkyN3Gv|{K8t~+@LzS-}m93m^$(2>Zm zpgQoSiU~#pwhNH%YCbn{a~$*}>&UuGQtuR0VUg6wE=1cD49ZS=!-;L?eMw#$G;Mr zAEwu9Q@2S`vcTfH#R#*l_ap;4Pnnaj0y+<2)NJS)X`$*zo=IK%4lK3wAVn#&+Ko?J z1a|6fTI865ZI~?FJd9+eit+n#s+Ee)Cbb*9luDfi7v{Aa(F{qlojf}sv&hOT;p&ye zLb~=Up~bButJ@*7bYwo^)$@{O@*JeypFS+~ylA07COPbYok5#m&gCWx$BQnTZhS#O zq_iQ(EVBJTH2ZH}FWCOIS^rl8^G%ukrDH4O0nz=B5<1MT{%vuOwpRA#B=xcQUg`ff zJDTD2fe>qvQ@BEc?4vswY!ShWY_AEywVja`asuEgRai&y`)~V2ptPlOG0Sk5OoNC# z!6QlVpChaV2wQhmUs;YC5jE62ES26i-PVAwqigS$ol0O==hO(AWQH{meY1=Oku=At zjU%cTT}n5}sQx;9Qj;Jku@d>3*lQLb6|2-!PR5|SC;uw@KoL%NH4q`y4!-CeGp0cb zOR%J&*q>q#OVSd6j4Q{44qd~&Zopn#v%;>^6SXKO*I<7^sr6Ih4ZHO!%|#F%8851P zM{*&gGQ{GbeMHGuL{v+cwmN{+LUy<%ff7SQ)60RQ$i{a{<3uC0q@(?-xytOZiA%Wo zd@q}`c*2Z{7)|AHEvHo4^0f?_;<-uD(4#x%3Z~}8jJ+#R#HiM9p_1?TPYW5^j5g2y zo=;Zw5mjgo3a<&}(`3O~N+Ec=kFOU$D=CCUA|=#`SfbIaTz=$8RMb@&Zh}{d^!2ds zE92~my8h2ks@W(ARUY)lzfCMv^6zCkLg_~oghD7OC1=s*8(e=&B*4gm(*8ATB6u$} zBp|BUA&?P(rRFsnFY`b~A?<Wg|GSXy&_+g$YvOXS0m(9%uM==yWi<* z`URdRV`wm24u&p-x=WNI@s!1%0a0kQ#PB?@pp_k8SC0Cto=zPRhmPAOx%OZCdpRH+n1BGq&cxIV)0bY0gvIBF;+)b?wTv{A!D_0IKh9 z<&ijBpv%Lj(>##RJ5Y{Kv@!`hM|8DkmcD)$_8jt%2~owsAB!2uD@q>K)_%nvW3=?- z-pKT#kT9jAa*HEL=EU{E^VXHktq|p}Dx0L|_+SbjEjX!R5w;-HkW51H$*%E;Y@H36 z;18*eLI`hDgZOekXnV^me=&Mo2Ly}M*>VnJi8F)}vo>>*B72JG5POT+8e9&mkO_%} zqqF6cWqPDy%47S*H9(tUhbV$E^LIKyQ{RyODpl?sKb-b_x{XPySvNF%+`d*e%)G(b zjk=-GFi<`%Iq4@akxyqOy>*z%oqhVe*>}CFVhfQ#Y6ez~4zW(rGvF3Y0o8Kdxr5U$ z4lUEnn#LfKB4aywB+*qd*Ir@^cq8)7Q*o?u>$mx3InSYwpA|(#`YNvmL5u4ma$fG% zr$+596WqS%xn5yFgYS;tEotkaHXGm)xRh2W&(`Muv|E@5JT1g%xBHv4ua3B0S5d4t zxFC=C55J0Li@MJbX}=l;b`Ix&tct9g=R*(2Hoqr&*DZjuE}4qH{a$j_`G=Qrd-&ne z&buC1eYQne$Qna#WQ`+%GdK`88a%cHHq_`~`O!zPuYiej!tkbasZ#V`Y%e&_=vI&B zoBg^awtP{#tE1Wa@j1e8CIpUs2I=cOfAbL*U70(jFPPKg(RtoL1GJltNb`i&nZl6+ z+!6+5_)M-)LJvztv%DXViH!(#m^@v_+>CJ)O1u6AzO^Hxi=pZFfc@V=N~#vuSG1|# zdu?{>AO-8O3{4pS?e;5CkO`1*i_;&-BI+oRgSCvn_8EUMB3u_djiQhV(0`W8Lx}Sy zICOEK)BgMo!Hxh5hne$uKiYn8_y#;%jY8RmSgjDO}v6ET3|ml7={ zLAaD2tr&!J6x-!CV~8O_;q{2B-*~0~ZlitOmmYoAo&eA+St`g#N+z6!6q?lr$m7|N zD*+v8f48SOoRsCTpZYXJIV9y#1!O=7Mnr283PhJv4s!=Zux>~MhJGP&Q|u`idV(vV zfof(hPy%aw7@(TDd4m<>CMak!n97U=D1Or^D)?PE#VWB;#w8S*{dk*3qbpIE3ri7> zW2+X^Kxq+^fk)0!uEMYzGgL40ew)=U%}_@n#a!x3=^6gHtDQhNY}<3lUAmz`Va<^h z2aT<6q1hb6FK;qbh=#cmTic=Vci9O_7vklD+*KsrC!X}yRagQL6JvJa zkZ$ts?yE@j;D~e9+s>vboNY=6+-MQ(*_(H&c(V}=`x5JlBiyjw#D^{=UX~%64ep2x z8j4B+vc;d6#LQA!;h@uuNV;6x939pUT40V7< zFJA9YHSB*H8Vr0sjS-Edqd!1zJ%T^F_JL5nQ&tUNnPLkig*CM9Tnc}>v|*n%4GjI* z*n1r!!K=;&OBk4O#r0qD=#PH1V_4T4NP({6YB{bGw>Of&UW3<5)y%BkJ1bnRv`Tz~ zeROxNeN~;CIY#6&q&#EIR*}Z<-vU3n^S1UXe4rtLfm!hr7HdP%8{p|Xc_*ybkm~r> zjhG_FdZf5DhiConm3Qak_xjykiW?!0Z#fS_QevO9POtzM?PA&t_goJs&k55|~^?$92@ z{S58ZMS$glKW0lcM$yiGXDkuy+@xRZd>d7FV7=f-REvs>IQBte`W(y0QXzagReGwK z(betJ$YUkS>1(7eoQK1xQ)6qI~2Mwc8AU@ zoh#;Gkbo=G$J;fcoQYrcwFzv5N|!@S=s4+3tfWXSIHxhY7!v&4OM;bEef85IhrLnY zL*nE>l;)=GpM)l$D)NrMsd!wEzERr zv?6;xsJrsc_~D^3eIH6di?dd<)K!~z}cB$fesVB~t&w=_!l>$huCA`-dum6GUo0cR0 zKrnpn`{q-Cxsek2bDzDaklq%gW7%GZV}1srmLN z0_C&I?gdZA9zQLNMVAaLch+`(H9r98T9<*pDQJIUiG1XzW`B86J5Zzl_lBLHhnw?% vX}mli|F_1^2f5`GVC3ie|E4%Nc|WrMFO83z7XspvM&ss3r=^urmPY?SuDx>8 delta 9406 zcmZX3Wmr^S)bAkDr6}Fq-61I@-5@QEv~(Rp;E!~7cXz{ppwv(@q_i|h4c&L}z0dt{ z-+7)r>+IG0d|1D|)|s}hw2tGa z4OU+8@g|BO&;p8$WUVeMXS!L%dM-66wFp-0GMgU^x8}VSwDo`JGq&JDz890Km%qQc z*gHOO1by)u_8q5dUn^7<;3Vu4%QJby-UI#$$z^}K*r(ula$g{CIm9*S7ianrdb_>| z5jRq(Ck%*AZWvsv z<>0>VPA&Ngjr`i=Z_)wTlX>A_B0Rb$BlSCg_1*S4HI>6^D)|?m__&r*OTFwI+?{kz z>wzS_(&a3BK*-v37)M)R#Yorlzr%8&JBxMVItJEI^93jNC}nU}s3t^!0NY^TvNeev zpjt5&4htupo$?KEEfzBp9)5~#c3XBjZVV(}otrvr>xKrqd8g>(dgLJv9ymF1uo{tD zycjNFp6u<*Ut0OBe%s|x`lo(*#o=}Kipoz*-=6Nab05aiKXl70F6tfK#hR!7J^o;s zy(?Ump9d~-bNM^5nx`}?EAD9TOy<;M0rP1mVQK%(*OWyY`JZwXa6~enI7~zX&Ngsr zIRGXa*iX+VU+=r+r*81t(<-?8{wQ+Lr0eD<@OX4_0Cf939FSPg_P>7lcWbCv@sBQW zmf7pXO4A!^Wzer;%bUbyL%O6q&YKL)Mr-y}mERonUEo{rUMj&AlrKC2$}V zdOYvUTl79@gDOBl&iA&JWhw)yt&mqWAfH?~$y6rSdWylJx^uPYoylNcAWlk85$+5ggxw4TRV&i>Vg zv}ViU({(0t@AYA8gl50@2831$TRC56y2VdM$PpLc6_LbX`Hzr{33GnqUcgxqF2Y_Q zao$z$Q0+MJ;oXY1B5P3H`oSG>*qdzgr9(Bte5thSSC#awF1~e&W_nldbkI?)%9Z4S z#-a3z?QH!Q*=_T|+Z17D)hpR6g+){x??Qc)+jiGst=V79rP)*dlR%L61=TU{eq1F~ zIxbo*CREQtior;uGv*-=$Kftce7p1go;7>tS0OfcQ&Q~GyP{&D*G&+3enq*EfzSxL zAC~Qh&jF3faJ);OeY>a^M+sYy+IhgUV*#te1BRbB8k&3~%@kr+AE1(hyKrAuxe*jd6NYcQ|`ibP`l$bmey$ zt*pjrxkx>(?_Q>Ap&NB=HkhAoN|z~#%eAisNO~*&BtVj_;`BUt(I%QJ3q|SFPAPG8>pq}Yc4B|a zf}#OG8i=JyOtmMcKDn@} z?g88Bu$MYP%*k?dbFbttk*bnb8FKYpkSm9}pMLQP(`lgbe22R{*`gTFd85Zy)hq|h z-Q+3#CaBX0pzss*Y3lD~a9$Db(D_f%+n|?uzQg&^!?{y1D7lM6)v|;^3Mgs`OorWU zGQ4xEHl?#9K2TG}iBzv&|LD{^qB(NmxM+OmTO4~}9o@OUDe17`W@>dX>9 zUJ#s_Ik(N*Qc!9Jfoz#(W+iI^A0hADM`Hq3|6II!_tCpYqXVboefA#vaT9o0)AvuA zzQK=rjO4|&7B)Tu3>5J_vD*@gu!ef%f26-l>%D^Fe)55h^&VI^EB^P+U~-}anVfG)~d$)WUyb7zo@ zKK*Ore6sR(n~^T79^jBvE|40AefgVN(~*Ph(z6QS-n*O^m%Wp=xg!frBcM ze}H9jd*Ph((HO(UBfirye^&WwJ!EOc$zxM=(bDl)aBWn?BYn{qoBqJ_FL$e_(f8BK z-|S!H=9I|i*~tW zFQCyyHs_MS%=R+c|4vGT=0Ktsn z-v~}JEqCoeXY@_H)-=|+(qO1bO!5aaAzm!X2|;p;_HN|Ry_cY|z_;(rU5^!)BsA%i zPz>uDmuz5G5-WKgeYX3iWH5HB)2!dvhyW}K@%@T`w$b`uTF+!wU1CiUv&;n9orm7& z<9K1Kra~C3Ok%}3Fwi(?meeM#+--4LRzqtsJ~l6fa98+CzHHut=6ww5$rLqJYggGw z4L;@&$@z8LX~W>RE=4Whi58MLYa4wxy#b%a@ zCqgze8OuC*?D^@b^nEzy_?(_8J?7b*e zL2b8I3;dkt@LE5pJj2g^%zH2t3MA50#OUlwx1z(rXcF3}lIWS?E-x;Pr{gRjM@AUF z@0QD}UGfhRCnv($vCD)xbL(j%%RRDIl<_uUDvj9ZU$$WR9WQ0cSk<55lVK@IfY^PK znfbxE-6P+;IDTqZY**I7nBrM?Z>L0&vs|4^O2*7B_dNeHo(cezd==sY!kCpzYb5b> zxOFpr`A@>UF6$v8W5SaY73d+#2D^VMI4Y1)#;6Yeot?$BzaZ3hap?_ag&Cr`#GlZTSbl%Rb^h%E>d!Yp ze$^P#)>OG@&5CTg9{`d=m_@+2QHf|E2}C0AVZpPcRxf)f5nj!5+sbM~GEUx`3Y4Kd zPHFu!LJT7Gpkvg<>n$@JA zdmm%{F~eU3$AU3z4sbwen7I6<37EYew&kn|UYMXtAJ;|HV8YZ8-{~WeY#PY0RS5(E z`x54v=SY1LDE>um)AiM$UVkj7eic&!y#mRdFH|XoSQms*fk$@lKukdvx2bJ=;wd8w z@TVf6aBf`gotz z6AP}+mnIHE#pDbIF;K@&mzy02p+c+1Ez`d`2IF1{3$9sDr!8630=2OXTV6;M4O@Q$ z4|V_tp-$GH)-j9}4v^E5w&HHCLar(x-@i7c|NPjqqQQ zKTw;wB2)=JH>Scg6vtu+%Fj>V&V!*E=Ao%^=_0Ir#@B0ywD4 zTDO<&v(CGs6DcBp4dm8tE}t15c@q<#S!bc@+c!KTa(%AJjrTiE^?!u`Y!XFfpsN!6 z+5Bc3g|ETyY1Qe`+3YhAWaPwGvou*wVQKgsoXK6Q=VSU=)Kb#GT!lkq{LQ`}B34DO z>#U-^OGJg3qrMYMpk>Zab?VIb1wtert$ZGI-r;Uc5r-gL)yyr~yT9!?-RdRbrTa@w z@ZUh-mc+z*o=VosBg9rLQ3AU-MIhOcIbPR{Cu-WfG=(vPzQ)1L&qA@8jExpBXd@X> zdDFZ#94(549&XgUUWF*hW^?mhQ6$Kqwi*V>L4y!kOB%OU#~5A6YoK&lS?Z#8p$_an z4lBP4RUZs9cB+Q$m$up=Id(8e1Y;|QcPlE@(1`yAsT+;P zwPMs*CA2IE6tZ%`FSL2(-C6Z(I>G1oRR>I^){TM}lM2N)8D5$Mr!35Lg0K0cEMkKOqv1`I^MX5bRex4s34mr=5J!)C*~pg>ijR`b#CdH$$=u|bbX z{uJo|qQ$TPei{Bn^~wsc=gcwI?W9Bo6)@VHLI1FHE_5F5#EmT^40oWEx(8D!jFG-2 z>R@^)kVh6Ch3KXZ2UnUL#0mH3X3Qg^ch%Gqze9td7~V)#Z%~<(1{?%{I#=5oq{ZLb z5DH^=>H?1Y%KM&Ba|?6@Y}3gyh^d|-H3I7?!7a|KUacz<`oLd?ojk&Ups=~1SUYY| zWm@Wg7non$xN|;j6TC>1U7JJ+0_il9A-l|q1DnRanLMoLP$HkTZ3o; zMDdQu@?PwPC`LEy-2d!*>@ne-n&8-SIbsJgM7T8yg>39EY$Vza>PE5>iE?f8um9td zL!u(^$8hf>C!Qi7V$`$8&0{FDQ{a0<%tf;ggie+JSk@JM)FEx;U7Z<}pBwkD%lW!0PKGxwC$TOx4Q;GBn7 zW|?v*KJ!pa@Q4{e8zs$edcQf*mn(sQXPlwSJJ=)9{=8>lcpCjzXyaSz7x?lWZ=3ok zLZqAFw zlzi#U>n+|zdK>0U*eep_PY6xKj8U5pY1_FX9;FYOlC+KqN{6<@#L`MXEENqw!&YH(Un!C$=&e1SA^j|1V92|XW*EP!VCiNKu5Wv{E0WF>oxa8U{$u@dO&k;5$`F*`LEV(Op;NC6P$X` zBo;s0i7d{SpC4+DB}7*jUy0xr+t$FX7I!j=iQz*38vPRZwu#&!N2u8^sKI9ck$%w5Nb@ zy-WFyf0^4ZVhjc0ldBrZOBa#t(C7eP^)c+;k6}G0c_k=9R*nldisb1A*gAU)Cb$oJ z>6>5OEsQ4$+PhxImA6WD+BTw!BwGzto34`&Em0GOniN<&GG%_yb4-;U6sO3b#!NSm znh|E>i0p!_oTo;&L5yh}h%~9cRGa{*51-NvnvASazNO=AYKneQoc8dZO_o|5wz1XP zQI3hQ1k{_+a{DZ-qID=f>@E(3aEuG5u}hM1NFE=1J%Yu@2uoOwhchaE4v7ufiJj9M zmI@SGIet$g=NU_j_m2zT4s|u7vEEKeD|bKRHJ81V)$*BjF*x-{f z3ZGL7m@G%SK>?}x+VxTT$Ec&k73~PF&1hrp*0nL|4BG)X$8rC)$mZgCT=OVf)}w6+ z5s#0B<98-Ub29ql^MrP!3p-?@vFT6eLrb`dTA`5;k!}k|$@EuN8C->O+xVio(7N<4 zC_HXc_PR()`On?>b|gzepk@PRiMCol%El-inQ3Xq1@|eL-cUqD0(TioqMZqk41e)z zVL7Ah&ryC3*uiDyJ8ls=;ls}nzJK96_-6IbZvCziP+Hle`+_l;KL$T*EZk}T1so*c z2jhGaYWcrG9ju?)x}c@J<^qy47(hal3_n$Q|E*>1WNm2Q?F=H0BC$f3O`gp;KK31d z(<6$bx>>B9yt?lDgvG#$tpCWGWv3Kw5P2 zlTt@&$svygsr^wRC?71snFo|K2>PlP)QN>Se&Xt%rf+kpFPU# z6b$U#o6PU0Fe@6wn!#@@ZyssQxwv>|5~F@L(d1c4B*Uj+8Pbnmne7W~>52MT(Rqf` zT<+Gw_BLyF`@NfDY0U|>OMqjIGHyqTC5QbREFa3g>Y*mQvLI(0HA{E{u*lm%a&D?& zHqsW1h*(I(iC6JlR2lQw)k1d$=!W%9@<1WX@Twha=Vp|LKB4SDPlSI+ z=VNHO7}@g-6EeKd{O8Xl2nZn;a(9Af6KUs_rB)1&0o*n4v7DbUrdua)UFv83(^F53 zV^a&IKO?owAU;PKT>a-Ttx{6AtnV(2GcjtRf%KoARjeL7tnHu@_rOx8yHxV2Tml#S zMP*dl68Q$4=b{fc^oOO=e3iu6FE$$GmD`Qgpw}V-Xo=1azl2tBsVaB!0^8-1GijwE zJ)=+@_)Gd|6EnQic+KZ~9LMRCJSmWgd!{U|+*ePar^kAg zvR&Y%-%+1OL~N4ch-n>iCr-Q(8ha94JD|5tctzmHKY($38gV?JH*{3jsP}f2bIS1a zU7u&gEzW=s4mcq(w(W>l8Bdno4{M*qFY=VckG?*q1e1n9tV;qPU`a^8=xLdm{Z6HQ$)Ap8Ewl8tHTPdy^;i8@6~KM&Ttlu2_iIqKlruHiv0ej<}1{QV&zMP>9@MzEx_P2GWwS!ZQfGn z1WJ}(@IvJwh=CYP0N8mc;ONhm_Ev6m@;#huB@|4QaD}U~-0zkSbOC3#)R2?NT*Djq zw*}8Hm;k%O!}39{QJ!`xbe>?f%ll5Z4>&u^tqFLT0=h-Cr%PvRp=1s3@=XRaWqqN2 z0{*@?+hY`)Y()>i zm3#(;t2|?smb+(Bvbbwob0kfPOI8&e)aF)JU6^B&&ZaPa&V{u)zSuyQ&y!ZYn4n0w z%-~Y&SZ?oBe&KC#2(t8LH!yrcG z79IZT)x)D=>H1CZ(G0ufgQU@lt+IUYZYp5~J>V_Thrno5ps#ub$~3`u-M+ zey$Ex;m^1sRJqm~A-<^_imvON_Q?E}B`^D3zqS-u>W-NS4TSVe!uq!6r`edIwg{)y zBe0i;>OmmKeJoo@{T|0P47IV8BAss3r@zZ;DyDAXQzDVBcIJA!3Ad-)sF3W0snDNk zV6p*lL4c^2(BM(gcLv@6jC^9@p)^0s)TFV z3|I>B-%)-$JAk>oF)3aw)77QV8eE&TLsj)Zt_N z!VcqF2$#KGLPMerDO$C?5OWU>m;va88wJ$Ee&N(IO5;Uv$ZcJHU-dXVvlK#p80ame zaQ;=A0g+BMKXH16`N+TQ;@}-hV<#Gs?c*Z3**WjZ3%@-&&|}@1I^yW&>wSya(PGpY z(Pq@y7sC5yx$2DfjcyVCmw1yjz2S}b4Xp$Hr-nxx`cFG|yl-E`KZbtV0|bq$q)uTJ zYWZIK-*76`SZTL;D%3zMU?gPG3XMrmy=ireET(YNk5xXr`vE zl&&y)it|PMd^%q`VmcibRb!hN#CS`a!m%MER;F`lsZwTqEn{4!b8YDee^LdlCP)7y3wbjZQN4)|h`gT?Il{#Lr)bm5i!>}!_Jv+yFO zv^08sc~xZf>j}7CYmOzu$WYG<4cg_jBLjQsXX0=4aTPtlMSwOK>&t<=Ne8nin`el?B`9sXBNsXDx z@1X~yJ|FXwKcLD^N_cmjm|0L*s9V1$;CDq^!j$*NwcbX6_vdV;ZbP*QMd|A+Y4qn9 zpXn-civxf)r%6j_uW_oU4_ZeNjctxTBK)SOQh`JWvkh3UTED=y*q!qrCCN66f{%4m z6rfws1byfW`hJ}Y%|Lug+jG=3OscYtuMiP`ywj)jxALvWD#U-qJxXy0kF>n5HU|^h zMO-orEJQpQ7E(ngxIGw%di}4e3+*^N2F&M=+<;y+I}`JPuEf`3!C_VAQ@!6YMhf{o zN=wi)x)zR_kS?$mw~yR7i{wM@`9v^z-BUoXzh9)uGWksLzsnfh%oUX7CsgVpJf|5# z%^t)?nK>1qkbG3z?XtCb#~olVH*@KwqQ2t@?VJ}NWpP6B;W+M0O!hp|SP{sw=?o#i zJ_X7TI{vEs;>Xx<{30`owPWht{pu|aGDYW`i?(rP#UZexpmSXWk5so~OZM(j(bDT& z@bQ^%P|JE#h03r;ew=%+(ol;CQ!9>RIt9k%a!JK%u8Bk+Dq8EZ!VOK8=(MoAD|oT! z)B2$gOVCD6XyCz766;I96X+E7q605oA1Z(ro5TIxlR-6|^V;@kx@FOnoBJVX*^$$z zys&9`7(N^AFMvNFze#gc_CO4FFJkRXJe3m^=<1HaIe@aVE?(H;CqHC~jeTf`uXm-Y zje(!%O7;Dk51?72rGiI07q;}fx4e;49{w3K`?tJ10Zn3K)O99Rs%NlcXlXa{EnpRM zyxAyAmyIRHB}mEn{G)WFMF01|Da6mo^}l!m+`Rvr$0v|T#Usqj$Nm3_xVQy4h5i>$ Wkdr?#lt&hgQxKhwPDWK0{r>;}o`;YC diff --git a/include/internal/database/xgemm.h b/include/internal/database/xgemm.h index e24adb19..9ca2bff5 100644 --- a/include/internal/database/xgemm.h +++ b/include/internal/database/xgemm.h @@ -18,11 +18,11 @@ const Database::DatabaseEntry Database::XgemmSingle = { "Xgemm", Precision::kSingle, { { // AMD GPUs kDeviceTypeGPU, "AMD", { - { "AMD Radeon R9 M370X Compute Engine", { {"KWG",32}, {"KWI",2}, {"MDIMA",8}, {"MDIMC",32}, {"MWG",128}, {"NDIMB",32}, {"NDIMC",8}, {"NWG",64}, {"SA",0}, {"SB",1}, {"STRM",1}, {"STRN",1}, {"VWM",4}, {"VWN",1} } }, + { "AMD Radeon R9 M370X Compute Engine", { {"KWG",32}, {"KWI",2}, {"MDIMA",16}, {"MDIMC",16}, {"MWG",64}, {"NDIMB",8}, {"NDIMC",16}, {"NWG",128}, {"SA",0}, {"SB",0}, {"STRM",0}, {"STRN",0}, {"VWM",2}, {"VWN",8} } }, { "Hawaii", { {"KWG",16}, {"KWI",2}, {"MDIMA",16}, {"MDIMC",32}, {"MWG",128}, {"NDIMB",32}, {"NDIMC",8}, {"NWG",64}, {"SA",1}, {"SB",1}, {"STRM",1}, {"STRN",1}, {"VWM",4}, {"VWN",2} } }, { "Pitcairn", { {"KWG",16}, {"KWI",2}, {"MDIMA",16}, {"MDIMC",8}, {"MWG",32}, {"NDIMB",16}, {"NDIMC",16}, {"NWG",128}, {"SA",0}, {"SB",0}, {"STRM",1}, {"STRN",0}, {"VWM",1}, {"VWN",1} } }, { "Tahiti", { {"KWG",32}, {"KWI",2}, {"MDIMA",16}, {"MDIMC",32}, {"MWG",128}, {"NDIMB",16}, {"NDIMC",8}, {"NWG",64}, {"SA",0}, {"SB",0}, {"STRM",0}, {"STRN",0}, {"VWM",4}, {"VWN",1} } }, - { "default", { {"KWG",16}, {"KWI",2}, {"MDIMA",8}, {"MDIMC",8}, {"MWG",32}, {"NDIMB",16}, {"NDIMC",8}, {"NWG",64}, {"SA",0}, {"SB",0}, {"STRM",0}, {"STRN",0}, {"VWM",1}, {"VWN",1} } }, + { "default", { {"KWG",16}, {"KWI",2}, {"MDIMA",16}, {"MDIMC",8}, {"MWG",32}, {"NDIMB",8}, {"NDIMC",8}, {"NWG",64}, {"SA",0}, {"SB",0}, {"STRM",0}, {"STRN",0}, {"VWM",1}, {"VWN",1} } }, } }, { // ARM GPUs diff --git a/scripts/database/database.py b/scripts/database/database.py index d14e36cc..8e8f37f8 100644 --- a/scripts/database/database.py +++ b/scripts/database/database.py @@ -92,6 +92,7 @@ def ConcatenateData(df1, df2): def RemoveDuplicates(df): return df.drop_duplicates() +# database = database[(database["device"] != "AMD Radeon R9 M370X Compute Engine") | (database["kernel_family"] != "xgemm") | (database["precision"] != "32")] def RemoveEntriesByDevice(df, devicename): return df[df["device"] != devicename]