From 9a76fdc1d23ef581ae9fcd5b384d00a83a1958c2 Mon Sep 17 00:00:00 2001 From: shay-li77 Date: Wed, 17 Dec 2025 16:30:17 +0800 Subject: [PATCH 1/4] temp impl --- hsa/gfx950/fmha_v3_fwd/codegen.py | 16 +++++++++++++++- hsa/gfx950/fmha_v3_fwd/fwd_hd192x128_bf16.co | Bin 0 -> 40824 bytes .../fmha_v3_fwd/fwd_hd192x128_bf16_causal.co | Bin 0 -> 46520 bytes .../fwd_hd192x128_bf16_causal_group.co | Bin 0 -> 46640 bytes .../fmha_v3_fwd/fwd_hd192x128_bf16_group.co | Bin 0 -> 40952 bytes 5 files changed, 15 insertions(+), 1 deletion(-) create mode 100755 hsa/gfx950/fmha_v3_fwd/fwd_hd192x128_bf16.co create mode 100755 hsa/gfx950/fmha_v3_fwd/fwd_hd192x128_bf16_causal.co create mode 100755 hsa/gfx950/fmha_v3_fwd/fwd_hd192x128_bf16_causal_group.co create mode 100755 hsa/gfx950/fmha_v3_fwd/fwd_hd192x128_bf16_group.co diff --git a/hsa/gfx950/fmha_v3_fwd/codegen.py b/hsa/gfx950/fmha_v3_fwd/codegen.py index 2563390b5f..c221410cc4 100644 --- a/hsa/gfx950/fmha_v3_fwd/codegen.py +++ b/hsa/gfx950/fmha_v3_fwd/codegen.py @@ -122,6 +122,12 @@ class fmha_fwd_v3_kernel int gdx = ((fmha_v3_traits.s + fmha_v3_traits.ts_qo - 1) / fmha_v3_traits.ts_qo + tg_div - 1) / tg_div; int gdy = fmha_v3_traits.h; int gdz = fmha_v3_traits.b; + if (fmha_v3_traits.d == 192) + { + gdx = fmha_v3_traits.h; + gdy = (fmha_v3_traits.s + fmha_v3_traits.ts_qo - 1) / fmha_v3_traits.ts_qo; //do not merge the head and tail in seqlen_q direction + gdz = fmha_v3_traits.b; + } HIP_CALL(hipModuleLaunchKernel(kernel_func, gdx, @@ -146,7 +152,7 @@ class fmha_fwd_v3_kernel &arg_size, HIP_LAUNCH_PARAM_END}; - int tg_div = (fmha_v3_traits.mask != 0) ? 2 : 1; + int tg_div = (fmha_v3_traits.mask != 0 && fmha_v3_traits.d != 192) ? 2 : 1; int bdx = (fmha_v3_traits.d == 192) ? 256 : 512; int gdx = fmha_v3_traits.h; @@ -182,6 +188,10 @@ class fmha_fwd_v3_kernel { tune_opt -= 2; } + if (a.hdim_q == 192 && a.hdim_v == 128) + { + tune_opt = 0; + } fmha_fwd_v3_args args; args.ptr_o = a.o_ptr; @@ -245,6 +255,10 @@ class fmha_fwd_v3_kernel { tune_opt -= 2; } + if (a.hdim_q == 192 && a.hdim_v == 128) + { + tune_opt = 0; + } fmha_fwd_v3_args args; args.ptr_o = a.o_ptr; diff --git a/hsa/gfx950/fmha_v3_fwd/fwd_hd192x128_bf16.co b/hsa/gfx950/fmha_v3_fwd/fwd_hd192x128_bf16.co new file mode 100755 index 0000000000000000000000000000000000000000..82ae7f0fddf05bec08f0541bc75503abde202795 GIT binary patch literal 40824 zcmeI53wRV&n)j=^5-xHHG|m{*U?R7m5E2Lw5TPps>>wf_A}E?PxzLf2gd|+W?k0qb z$Uuy!h=>_vltI?X2q-EGEfWM4ToRobZ@a_AapUaFI`hr!T)y3Jvj2abQ*gRX=v;Qc z=b2slc}{*^?|aT$r%s)&ob%ROr~H$rO>yYDD@qd|Lp!JG@;l*@-~ara_;PnNWocR$ z{OzfA*E&JWvM!CM6|fHOvOA8B&7kCPPkW}PZocH}% zFDDh}=a=P`Yd=UTD_E7+<&~smp2DShS&Isaa{o?b&04TDKR>T@ohb5Ri6_@C)TNn1 zdl#(CvP{^P%2$@;y;v}EM0gc{XjQSesBq;KYdF*1S+8bx7O{r0%@q2tuHl__4da{H zS;QJ9HdE-ox`uP?8YVZhvxqfJYo<`EYuL(;_~NvCCfk*CHM75nm2@{#sKrXkuKZG2 zj;GL5YVUW3FcD>UT>b`_fHF8beXyej4tLbRk#-n(X;GGXJv<$3hmDY~P}fldr#fojbUO^}S(K$* z63({6LP#Tg+))Fcbkx8X?J$5wP%aAR+F>E25x(rGfyRy+_;ouByu7$uu=0F zy;nO7seR&%er<;>#CF)ijqR}T%92G{UOF3c3l?AQ9`Wt46xtpGI&2}a!xo0N!@|C0 z%```HI}C+3M{0*Hq;=TB_;y&>w?s5Y(Nd*%xY}VTv^g?5Y{A`O3)9+R;iY9EqpLIe zjCL3aZHn0)wlKHD7Us9Z!k%Ty24UyYn3*HV}C~SuXG{ogm))~E|9VS8> zqP)WvR&?0H>ULOosWj}z5om{z(59&Du!W5swy?P!7WR~e{5Y!GVIs64wsqLT_6}Rv z*$xY62=&KN(+(4%4Y8-g7WQ`7!v3~cXy1L4y(Q(PSxYcZy*wtX6?<*lEuJ&_;bw}p z=oarlWfz51HrUKwBi;l!qn~P~Sc{dlZv$Xr6Y^_2D~z~hun<>^}rQb|i zSX#Wa1XIX`i}Q-gv+@g8DrTBl8z~Jz4q7bF;j;DPu>hNEwlq zwIDxb^yIgbN=gfsdCK!H?e}uUw1Y{dc}s9RImG^x*nzIUl2o>^q%M9yE)~Y1{3vO;XIWl;X)&6GN9VHdcQ4B;Eh{K4syS>1m(-CyBBXr09Ko^)zJy=3L7@)of_D4Nf!|gtu zg0)}(404LItEw6=RU^_JVuS=)V^`dZsK3REp{taJA| zf1vZJH5cxv^K-kY?nJl0Hu>+q5L4&BsI6;o*BRPHZGFRqlZJNTl!Y^ zphoY0uxL%)dj*}}f9BE7@1I;u8yDU?q5tya2l|@2la`#vc>&u7_{-?&J~E&kJ{r*8 zJlZsDTy+3V+mw8p#i(1{)s7*V)$UApz}=-wAkejIMbs$BQl;$LYXgDre5c~hq3-G^ zH{?SztGQ3)U*nFA4Fvl3t%w@#t{$8u^LOta2=wSt5j6s`6e;V~D-h`2yCR?+MY|qD zJzzmQj_h{7p^%rhwzL|3b9OcQJZ`@4{r!reB{p(e3LKA6;kMI;ncCR+oTw z8Jzg;$sTC8gv3+t^{7GGpw$hotUEC<8vUUH$2G}~%n&#I2E8N1$o->Og!i=z8|_TF2&ap^oWD8Awx* z+(=WArXfv7nt?P6X*SXvq`648NG5x%O?T zeJF44_$Y6#ew6=r+8-3?cI=Dm*ByHSeT{vv34M_LgZ;1xeUSZW9r|Zg-H9F-PDXpb zjE?f++U8~dS{vJ-3hDD zbAK$q2HTMJz_q4FFYNP%6JxwLoE+oDZzHT;ClbQ*1V)ecHjPg62I8@=v~dtmg8hppZntc2zJFyPw;Xq!ryEK@~nIJ ztNsHoo;X<`e$OWcUp#T5U>NLg(0UH4?Add`1^i~$u+4ZquW(ndjh?!G>YB{0 znZq)xGdCjf_l@6Y74`UCH}w>Kzp0n#t4+@DT79A|=>mHhl zYgPYho`58lY~U_X{eGsLqzcr(j`16Up$#PVPw%Y(PDJUEo)!K9K)$MM(CZ$MaVyk;@g zC76~iI9>y9#BhdsFoSx~O+7e``Z!ISK|MH|dT=iF;C#(t+0Bj@%ykKRvjq#Ohn*7Y z!E)-s71V>PsfV2a^U%Io)avj9(GEo2g|7kS5ObGrXF?z)Pt4OgBz&_ zFZC-Ij!*4Z+)nN4h_m|@x6^t%;_QCK?eTp>^{xb=XTPF8V@Rmpog(z?SJY1%8>*jS z3O)N3^|LcW^>b$mJ^K~)^XFaG&%ieORexR^agAg>`)E;PDbMYiZXE|M_8Panz05`-bXE5`_LcQeQqKRKFrc=)WuVtH*}w17><0_fwfExN)Z7=6UIH?DqrY_4_)M zXZx~VtZ!`CcHgk=>y_=>#*SSx!HhZeqt!S6zR8vQzI*-XZiGrT#H+0OgrZ9e#%T^LGmUCaHfM97OqD zPJJ+Wx6oHf{S#m!#^PD=|D)r|V3jJQG{~?%4d4W?O zMJ^KheNz7lIGXasPJIknEcCTf|6?$X@+D4vELkS>uSxv@a2(}Jo%(I$a-lyg^>2dX zDPQT-ZzmrS`nRP1ZEynRk2>{oU-Q){Gul9o(lz-o;&m>ZOUhhoae23tGI|T>bEtq(Z z;4SwF4t+o{DN8UpM{vYK!PG^9ql*R8$^^$P7aaeH-~_**Ypr1V2EmNS1*dEkbU!IL z^%=ow&k0U{L2$;4g0o&0oc)U6oF56!Js^1h8-nxS7JTR!Xnf(v)z!!Jn?qmt|Mm0F zucN==H%5a${I8aNgl3yuTFfwzITf#bpP;O*e;-~@02I1!u(xw^!Rg@L;N9R1a0WON zoC(eXXMy*C_kgp(+2Fn4z2F>h4tO7UA2=7B3(f=Qf%k*=gAafYfb+rm;Dg|U;6va; zU>2AK#+xpm@6ud>em*g-<9PoDP5YahFIZzBTmu5y?Qcw&IAP803A7oDR z4K}A+a?DpO8*!ReOB>)p+5ivJ26!aVoZ(~rF_#IclR5_;V$QPiV15(FaWal2$Z;}Q z$8j=visNMP^eyIWD=+3gR-HI!xYeBF8*0wAD7ne9t47x+e)xt0ub zsko75zAx4E`bL=#TQbb2;zpZ=zA5mWSCRMjWtVsE=$~-R3Ue470|PVa^pd z)7Ip%&V@Alq~es({#=QsBDedeoHo}GPRueVh$P3vQET%fPT@qrsGPQdlY%3G6w zyy*X&2hB5I^WASAwB&n+IQqFnom?(ar}UtxQ$|qKY03lU>sH>qDWP@ZoXM`!eDkoC zXa88(6LoUCM4hGvMV+PvMV+QUXuf6Ttu;lRIJbJpJmSkTk6QABnWCL;;?xLr0%x(E zz}ajkaL!lt_1zN8j}5l#;hCZJ;#|zG*UdlusnH*a`HsvJ?S=Al*cUGz94uq+5}wuj`s_wH?Z3v+ckIY&$TQZ3pHLMc!dD|Eb>T z{gUu~I8riF3epIqkw~<6x-aaZ?NH9kwgVq#+ku5_JFqAfc}L0oXA{!<-=Sx0B{#>-*mznAPeuc5xSEkwR zt4Wi({fc9?pZ=rOwv98>`~7Quz44Xtp+6b*cdd13DPZ~r2j*|yhK$D@xGnB&n6lNO zaemGJeW}=XKk3j$fm5GxV4mh}nD(4Q8v{;%!GU?1w_(PM4s9$r>t&P!XTRdmZUg81 z$bos6w_)x9hju%7{~Hd>v%C%S-*#vd!H14HwB4>BfArO4BX$p&X~e!n-ett@C1)D3 z`^bBZ*!|=@BlcDDK_j-7^cb7_o1WOO4ng*Zs zyhWXNE};B`Q_mq!3cWh-%%%K2r=CZCAoS|IGoSJ^PJJQy4??fbJH3?GJM{wcBcWI4 zoexv~Q>VU&{JGGp^UgxbKXvN3Nn3yZbD>w~okf)Yqf;*?e6vx^4~f2mE`Y* zUY&QYqWs^S`Xl6@gkGI@uBQAiPW@5x--TYCcls#*e@@*`Hnr@32vD#0(edM~KX2Ud zEk9HR<&R|Gul9rWl=tYPZy;lYUhM}PDev7!e~j!S^lCrY zMEQ66=#P`v3BB46HdEfWkN!RK2BBB`!4}Hn`siE9n}lBN2UV0O^wFOn`wP9=4{$@i z{`|l``jg~fp;!CCHp++e(Vrr36?(NFJWctqKKe7{aG_WG!FI}1`smM+BZXe=2RkSq z)klAh93%86pPh?X%@EyrdxcP{*3&85aqldiE?Q(D|&J) zV*69+-?g`y|KXpD{Dq&3{Nv|gKlfVpwrlrU+!1)t;?6*p#a%2{^b1k0=0vYZCbb#v4|>rp?2C#PRHPiR0N96vwkaD30e#v^_!yq8e|c?sl8z$IWQSPGVbWitPs z0^}F>RAG74=@pGUQu^e9OV*GXMUS*bnZl z-hlZ;yx_O)*{(qT70AC5TnVxt)_NQcfUCjP;G^K9pbzwce$WrH+`$6$ zU+f3%*Wx|k0P+Wre+{@smOH$}5mSNf3T&?h*MgN`CAbb;2d)R#gB!pNAoq7E z!uAv36JRx14L%7z32p1wNqWrF*nS$@&w$T>+rjPNv*5Gf4sZwf9QYi_ zdtW>8eJ8#@4?Yj>0(XHgfG>dAW)Sn+%f2s;Te-i}EAboc8S4Lvhg|i4dCcR!1!kR( za^71Hz^8$9&hy%lq#3TsYkSm1~}|^6*|f?V%sE=>N(y-?#GG_vz{X$~RB@ z7MdSga^A0}4V>E+{a;@5td*Dd^3^)2{;vYF!OFw?`vLfraEvYbzaBO}w({Ed{^|c( zWd6)oXntbJ83RBYIIb4`UyIGpth|g5Q0t`nzlzK+tUQbvpgkO8i~g@-^PhalI7jq< zm6+#zOUw(tQu9ko##n+f^DAGu+2~tp{>qXuo?w~zYu|G7Uwtdg-&!)p6s$D=k8hRv z2j3&+A1xW<3Ratc_C0F;U!TwX4@<__0>AlJU%=G-YfJ~`p>KS|_<{;E%D>j^KtTV6iuQ$8Iljb15>i=?~4yylao0;f;%Dlx-Is9K}I%^EmqW|k@ zbEuUU{;v>!B;!Wx^Ye_EWaWYXD=w1%YrC0j<%R#t@{!X2^{hF>Gzx2(I|OC)%E6(>`36H<4sN;0&{lv|{S9p;SUuR#R)O1YHMB>- zr-vH|Nb@#qA8BZhf;+|-7+8B7cHU-aesI@BV^L+Qhd}|)Uoi>$o{YJII3^?zs`%_3H@J@VytSg&uw2LKm0i{|b-kVxO!zFMccZe?^M%V*Wi}Bfs!- zwdnr}j|F47y}t|nt*HL5(eR0=v0}`>|1a221pilfOd0!O?RiH`TVu;u?%>yv{a@iR zXe@X5caE60#-g#@k-tRte}%`aokh9e`Sh5!#;&nk-Ph?cZH-}Lxl_MOkC}t>)0S8^ z`oCHe&!+mnxUbWHNsn2B*ss7JE1He}isa{FTv#^jFeZ%Gf5wV2PE3veisai;%^Ea+M%zEabjxxS0vw-8V9Du{g6gSpf{1@Yg`r#h_mEyV7xUgCr597c% z*0cR)YMdC4r@8nq#uH&bSB&RsE)GmRo^ZcebMasFH)5=G#W*fCJ}ijiVf+{CeWf@t z>YI!I;(5aUT8!mlIW<0v`(q3k0*2s?J)i;Qk+<;;=dyJv%=%T_*=2!z|gnoHw%vwYbpMVex@t=vs#J+ zQ~Q?cH)}EeD}p~OJT8pC6)O%5$3wqac$`>E@n4brSuMqZsmG)G&0328iWJ8c9v{ZO z%lNNIabhjSe{p|tSBT>Zj}PPi82=S1PV9ec{8uol`S`C${x8Ob#iMN*6UOT}{a=g| zQ{%rP`M=aSF!sCV*6vZ`zasg+7#GGmF(!<4qW_C=Vru+XB>$Hh2d36Z^?#}HUy=M@ zj0A^na=GUy=M@;c;MU-BkZqc>Gr+|Cb#fHUo8JJXm;~Sa|$bB>$HkA4YqO z2MdoAyBhx`{9jk&zploAh5Ntwo4FeQb#?uv3H19sdUgG!>1>DRudct8#dg5@OU=fA zwbK8^xUd>rU#vJVj6-by*Ok_9Vt&SdwZ{L|TpSq3C@uNFnv4IUZzYodON|TToSt!D z9P8QsuPdz|#r%x_YK{M^xi~QOc*6Z(&BcGU#{Z?phXqkT#(&Wt)s}UuSdQ^ut?_@U z@nI~-7%=WH(mGdXkvALhU#<3kAs(zn{}^WuwZNF4 z@n4brUoFLfsmG)Gzgmj_isb(aj}K$tW&GE897kK$AY(bke?{_tg~x}n9OJ(-u)nsf zS;pVZ|Md7TT@&$NwA@4JE>Z(owQA^ zo%FQy7i^bnA?=WBA?=iFA?=d${~Ec@(QdiU(H^a}uBqSxe_LN+>s6KnBK69u( zYp6bBs6Jb$K2xYZOQ=3Ws6IQWJ~L=P7f62({W$c~*w5wB-$FkM{Ur8takjt1_G8$7 z3fuqClHbAhC$!{8XzBT}D?K;X*5}3A`kdI`!t-IxJr~y6=fSQ%2ZnWvp8D@u2Z?L4 zb1YGXXFJGkcy5E-j&V4-6VF%he{&6hrKf?zG4KOhfcJV_qL z_>eq;@!nZ5h_M`5hjAKt3S%hi84l{*Ojl zSMch3g8v`>kDBtjohh%|Gv#%Ap1f|G^16*Us#;uc_s*2p?RoOLZOZF*ro3*?l-KQf z^16+fAnUq~cpvM!JyTw{5w~;o|ELw?^kK;VQI9+y1@e3>k>_KjJRcr;J__XdSR&8I zN_jp!@_ZD?^RYypkCpO#c;xvgkmqBGJRd95V-op}y~MV?V{a1TF!}zy3o|usl6=Qr zBHpo=*vvcjC=Y$d-g@gDdy_^^s#f21#rJ$w;XPk`=hIfaYilIlSH<@|Mf2T*HTFB_ zx_J$}=PQ8u^b1;WusirpK{Rw^Mev=07qsa5uX|wO=1K4R-CFwDMjdY!#aN|r@>#z{ z_>3F%1pOQMD=tpl*m-0X%3jmm+lV#lYq0KSqiMu)z0pRw-e{v-Z?sXaH`*xI8*RK- ztT);y*BfobdZRU1uc2m=p?x3ReFxTU0QcN!XfJ{<-OVLBG~ez+F8_;%(*1iO+k20p z{Se%DpP}so_dj50uYj*+iM;y`&G5f+XqJEP0hj-W2h#of4rKUWIWPt9d0Txhx+Xe6 zcG?>q;5w&`a-Gvgxz1^$T<5e=u5;Qb*EwxmD%Lq|lxVkZym_>zkMLz?_#}cqY;OLdLNDsaDCNAxxQ+nTwk?OuCLlC*H>+n>#H`( zc}1gKU$qhIs|K;p;95g_2Ry#P(2j$3j~jR$oVVfKt)ec+4+Z=`IaJ|41=*=54edSf z{bvmAeem>ihV}vY;R_<~`-iIh9~`RopE?lmzjvU*|NenW{|5)w`va_ZFgieXIu#w@ zx~`3KUDrmru4|)Q*R@fu>)I&Sb!|K()^%-^>$)~#UDvbN=h+txtsZQ6+0Yumk6$sg zkHDY(NYtg_P>uhiL%aQ-K=#Q2L;Do`>3r(+`TXNPM2pC5Y7|H*+G z|EC9b`#(Fd$N%|(m;6E2`z+d-_5LI}5Etlltdq9xm|L52_M4v=+Bdv^@{9hu)~n>) zxo+^4aZ7SxMY^jeDVy#kc%gsxeR#3Dk zuXJ3zc~?gKv}qafDPxn8lak{ThZhzXEzB#$yOqnmeED!zS#egm7cVC+Em>TamE$SO zDqC7oQe0Y&965_T3-Quu=<~92^A;>!m{m}eU!0X+T$<%6EX*n{%PJ`?&Mzp;D;r)` znlrqxaM|KvkUX51Q*Ld<=cJ8EPD{znACWRPCv`+_&WJHNqZW+LO>)JZi+K1vz7K zlT-4?j2JWIQnkPJMYklKwfH@us_s&rJvmf{)kT>^UWi6?wxI9lw%3WE6cR_1O3yYQ}c|B!bP5!04yi`jnEzb3n zd$gqTycOkEZqMR^9EkG@ON;Fnb8AUC#f$lpY>n@gF83?|y(lfd32KF&1$l*8DU(vu z!{1pv~)Xfa>f|@Ys!d`m%fgka_Or%W{kbJj8wBVFLrKC z9=`lJD`Vbt^R7&B?nB$Zty}ERosk@NLHmnj>`k^p(Egp49*?POY6l;C)g|=w{C|KUU9$$JNd4EwN84Fd1e3)7JjKX)8 zG1>LE`_Ds=*>%U)OP|9##8>82 zvhJ)C%+%H{+@3tVQ)!0v%R@}bAMBsP21Pf^_i=kgLbsO*8b7D`2M5;(_aYV z?-pj0=DzO?b8q-!LH+*p>9ZeVto#kv@@#jBD`8nqaY0Ug$urx3l;A2}TJjYu*z`t% zE30&+E8pqL%E~DzIb59M%66_U&MVDbqfHJKHa4XZyu%X#ZMC&I)IKPJy`(GTYFHx(~A3+1}mG z_K0f~##(g@4?dW)qQu+*MQ!Ln-2tWTY`>Qeb6pA*xn8fwYP&KZRud? z3YQrOF4nZA1LfrC;_>!&aI(D}yw`>fUR~x?KxY&jc4lZ*l zA>ncxdI;JHpSHJy&)VC;S8eD3J3$EwSKH7-&`$Wey&W{Rw}aocp@Y|!mn?G@7L{HP z4Bxk*lc4?ZPwnmC-`d;3f3&58{E{4VA6&Aqw)CJx$4i!ocK6Vw-92<~Ll1|St#XE( zHD9vyYC}iroVaA^)9xOk+ug%mZRp|k70aA%5*xDfmfx5jv2Exncz+CRcMtLH?qPTv zdN{nQiQSRdhK_=F$LMzVkkalRQrpnO;T2+c6s%NchrJCQ1@DgZcK6_DcMmh$(8H^% zg3hikS!TDPli*!3uiZT?Xm<~b+t9Up+t34c#ErA8 zOO~QGbP>EGO55GTns)cFz70LRS{(A`@U@|n;9XJC?jE+ZyNB&<=;2^-(3_*O4P6B9 zh+XaOVNbhz*w=<0up`trM|B&z2;LC~+ug&VcK2|kH9fTLdy_*&rNzz_I8VKCPF5q< z+PbfJE?G`C(X3fnyd8sG7BpCY6Kf6oCBP-i`6ik*J6Q8;09@Dvz1uE}PFf`*2T!t3 z+ug%w?e5{L*7VS}<5pu%GAFL=vhxa--k5Ax#oAjt*&3T@*6i_X$DnDl{rRB3Z({8& zm~{WvM6+fG{dU6A;=+|hxP)A~Jg1=4nVYvJC)-Ja^~cQWV{h^=ck!+6^$M>1yrl&Z zC;rS%O)gp4$%N&uHO}08SE+M#VevBAIr-=soxbG#e3*05tT8Tp6al$_#RlAAQvndMqp;>w@;ZbDIU-YQpV&h^#b7#};HP@J;@-zo>K=2@)A zVtGBGWNA^cGplfAL22e|2_DvoUkk#-@9G!0~_{}gw?K9Il0A!*h#dl zcJtfsRXN2ad4&a4=2!FL3-plx_0I@)<=G@nwD?vl9my*H*USl*YLi)b~?&C)pmhz-iC~w+J+fB zwauuX;i#J7aQJ3qMASMoZDS8sUTY=$JV%v7bNC#aGd4LkXgjUvt(&YF2w+%OQ)N&* z>a(q<28|-_J~em*amT5_iNvZ?gDQx7PX*ZXr|8{vTi`Z+`~z$oU+x;{DwlWiK3~_a z0G8!c^p`uKde-By0za7R^y1NPw=RkTjjFLOjk`+R--mPd?qR1Hax^}BcX`Fivy zk4T0sN!oh#^7(rAF88rh*srx12YO(q@p}KMiHA>44p>jqz6cE7tnbt}>pM2GhPaA` zYdy5GRA1`m)E(Mp`;Nhb&X?I8_A>isdnL+_R7YxA>fngl)WNA6Qv(s$K5SEJ>P8}s zk4j{s(R)8l1!b;$SZrTXvBU^2t^J=<6R{WEyQes-##^{ls(wa(iQ$Ku-5 zvtFO|vNvF@_XYyqfX{l-+Yo8@T#KCKX_zXG*|%rTbo=(qC%0?&OsU#{{;}VO#GgIa z1IHyU{``d=)hL@;-4LvIP$cGCIriHW%w0!al>1s#Pxsd-TWB8k?B#a!vbi0$KJGxI zqpC{;wgL6n2aGw`f1F~y9=+O`S$%=^>wU4k;Op7b?W56K?HFSkN;=9k6bH%-l$j{A zP-dgdMVW^(A7uf`B9z4_k9n!zE^5EMddYq>;a`H1jgpJvMtK}1AEf}L2&EXM6lE34 z8kBV?>rp(s->z!EHk<6%2Y)$A1_;lzk}sdB3-) z{r2f2`>lrm0Lnp>S5Xe397Z{U@&-x`%2AZ#C~u;iM0p40J(N?t-=_9&bNh$vO>H09 zn;IY4|2z91?CVw=9lNpiWz032g8|GznjbU|1DJy}pJ@J9V*WRri*$b-8R14echmgZ z5Q*3sa8xnGdIxNg<%rd^Tqc$S5uMyVJLX)cE}WvDaouTM<#lKHoTv3s`!X!U)&sGu zM=z}N&NJiOcb*&P#(N`VTxa4!>-ffwbqB_#xP7r$SIPv=a*TOH-I*ra+R^{DGxzeo zx6pRoo9eER@4;$}=Ngu=FXGIx(K6x!-{wH-Bsc4iZAblb)Y*9U+#sf^uAMoPHv;`{W<3X2^z1pX0q@x|+Gq56QSPYP5IJMhj52&?jmW6V*n&duZ@fP% z$&dGSpr@GofnH*+25djzb2iW?n$wSZJ=o|goe_h2f?dEUuq)ULyantHb^~o-cQ6nc zS4I2R7Z>LX#P#?2Tw+^jd&{vcIPOEfK6frJ5&OZ6KeQb*Zgad2`kce~GZHc8N-^&0 z#2HnNx*_fvgX7(S!NbHj{O$`Yy6+-rjTMZD z7wlvg?BWo-B~!4wTd*g!$EQIrY7g3|J=llZgFm45U^KM{@1XYJUDO`zNA0m2VyQjY zpW1^1sXaKD+Jo`b9vnvP!Qs>%Oeng(9e?`rPR0^r8RuxbU`nQ7Dg*DrDIEF1bn=4^ z@`E$UAH&#e@`Llp4=x}-xR_aaznQUu*>*v9reHq#(N7Wi!BX;rYse3-CqMe}ksque zKe&bb;C4Fa>yP6e%ij7V(PF)Wj&~0nFIpbmwfrt~9?)`1@AA9I-&6Wi`v&{%al#)Z z{pmx4{f;Ez?%!k#`sZZ?`xnd+ew*|!UUr+H8Q_2Yy6K3(|feL((-j9~wk zIl@ox3-WJYc*AeLKYT7Ruc`${`vp_33Z{}D$Jb7NFrECMgZ$u3^5gid z%n^Q?SL9#3@P>ajmT6uMpxB5QN#negMT{qVcGon%9SHV}mfgM6_;&P>{`|hd{-QYH zzfJl}hX(uCBnkfyq<{VRV82gKi=p*YWC(7VBe;EGS`5wmfiiyIj`p;FX@ie{bo()xrkCJ`nyv;2_g~oAlpqVS`~G z4F3>ti0O}${s9&i4|_cPL&2e@e~|PKv9MvV4}kf-Y&h)0;U58xF#Ths zf1HISz@7mANN}X-*QI}og(bqC2!9foWcsH{|8xsWhCLa6NJ&>lnf^J_|Dc79hJ7^r zW56+{{~_sr*uuubJ{JCQ;5gIolKw0Uezn7{q`*HO9B=w_q<^V}O@Ms@{1d^6rhkR> zmsnUT?5XhI3*Kw`S4#hC3!4P{B={$Tli_c`^;koh1*Z<I^egjVHte(Ep99V@{mML;3;SI79|RvX{mML; z2m3tu@l%~E51D>t9?XY*KKu`Z51W2v9xQ-;0sITWg{EJb2a8}|1pg!8Bc@-O2a91} z4F99xqo!Y(2am!282nDq2|vZeNqR(Wq-A66V8jcW7mfyv&kG;44~h?EAjJnmxAFL3 zOu2){hjQuP08)G~rrLOXFs2Lt7U_Qqr1)TD*m!&}?i2p4(!UL)_+Z>`=A z{~3_tgE7a(_@K;IyvmnI>;~^W555~j7zeoCC04Y8g3vE0;7>@}5Ug>`kr1)Sw zYUA<2a0-95^#2f~_+YqfJU$p%!hb;eUjZpT7&$f`AB?5Ke@Oa&3{reB@@zal7|VqJ zu=KwUQhYF$+jx913WdK$`hN;id@xqncziHQg#W1Y9|I{q7%Oc&J{YTo|D^Q44N`nC z*4lV{FrEaZ1!+S?id{E~B#RsFz#^ZyrLG+`}1BwsEMjMY0#%AGH z=K;kB<0%`D55_j(SLXr62jgiQj}OL9;aBGY#RuaV8;=jhv%;^=1BwsEb2c6yjOT@4 zod*;jj2CP?J{T_wzd8>nJ{T|AcziHk7JhXeP<$|cXyft0ct!Ztc|h^O_>qmr2jey2 zSLXr62jj;!9v_U?g3gK@&f* z@T>EH;)C(FjmHP$UEx>f0mTR7XEq)mjM_VReE3%T!uxyn$(7hCZ+0g3pDsA?KEc5c z2*y7sIP77;;g1L=I0X~41e2Evj$S4>woovoL~z1t!PF-NCwT?!8wArf3#M-qoVHWY z@vPvC=LKiJC^+k7!P&0}&V5aA-s^(%ew*V><^?f#4u;5I7hd z3=RQ@fbn2FI20TT4g-gQ_kj0+!@=R;2yg_L049JV!I5AhmXe7n}<|2tEkT1LuJcfe(T6!TI3B;KSeoZ~?dwTnH`#7lDs}kARE8#o(jhqu^uU zW1tgsg0Z^YwG!d%9KBu3q(tOS=A{>wNQibD-|vb)a4F{;lJ z{=g%H^clP^bPax8M|!|ueWquKK8xF-Z}2{17^|T^z~j^hc#`@6-;390duaU7K7z)i zYGyuT^kT~~t_hw1ZpU1%kIOr$Z~qtEvY*B5X*G!xzj z#($am06(QZz|W`;@T(E}B3>7IO3;{8oi0IN%Oiz-&#FMONa~rgmm{EGJXSDA2jL{$GHs~)gWA%K`IK9A=q8D)+G?Ja*mZ(e-j}gB}x;rdN2T=$kz0 z`W9}3CKEGN-|CsBZ}&{scW@hYnHYy&>B-QmJTvrN+y-qX=01J5XQsZ#bHBcq+n~?H z%+mLH9?&V1PRN<}-Hryue>q#yQ>eOFg& zKgP~}iGPXs0+5*O>%uwOcMIp(`T7xFcTaC?KXaa%dWgCFu>J&@Gxuvz?(=0E8s3-zO(Mf!1WKRDFd&o0Jfw~H~Q`Nf#h{bEeh9?{?A zbq^&4kBKxEb4-i%lf2H6@#s&C$zc~`n&B5?n&}r~n)Rsu4zF9Ii!qTF^O*jg$Ely< z_Tw31Kiy4I3dRJ^rTqlXqx}TV|K`e-ZgKjj8tvDUbArc3+KoA`{=fKzHUNd{y|+;8 zFSK7k`wLt|`wLu5`wM(*AnFc6sYHE0j^kjw9)c2&G8APP$~`FLuj`t|_Z`}0(!K+i z(7pq+Y2Sgl!%=sHtbe|DTE7Im9*L5Ol7y0sG75$IyVw`~Vc(&hoAw>}IPE(ypY|PC zFdB8o$oiM#()x|X>v1S4DC1EkpiD%e{yrU=)-M&W??st}G8u)=IbxZ+K6Z+gQJv3{ z()z_=x&L%4uDjgz1MjoqmK*N+!4Ft*o#n2Nf6&SXfx{lQ;=0ORKl~BV?yK=>{f1yU z!D+?yl)FAL%gTm=$xE%cesb53US?(YfMW}-xL$JCr<90xSM{`hBd|PSwH4P%?)uax ztZXDW$!o>Xn8#=RrT4e?qo3G+p+ytZB-16QNjfBFNY0d;B{^GiuH-z)`H~AH7fCLb ze2n-jvH!l#NbC1ooU47K>)pPYGNs#Z>0Ip>f6e!8@KMwCJP694-1dF;9zrlek=q zK1p1mMZZH_sYSm>T&qQ&B0i}_`-x>*^l9QoExL~QlooxK__P*%p7@Lw{UPx=E&3wy z1ugnx;!9feW#SLD=z8LhwCGQXKh~muN&G7<`ZMBBwdgO1C$#9Vh;M7re?$D47R_}B z=#(^{f44BLUu*TsOkAgALdS4{E>q`-lzJER$|bNbfj_vH z>;>=_f`z7E(W#1HFM@vsxWe>LmFtIISqytI{3T$C=~r~BQrJu3UkR=>{fbVt3ieg- zuLf6}enqER1N$2I*Me(Je~w%~^vZRxuY>;y@Cnne=v3=rUl0G2;FG3b(WyMJd*Jti zUih2SD}C_$P!Dg`D`jRskvHZ+Iqc=|ZvZ!#eq|n1z+M6WMsTC)SLVSc*f+ty8Qg68 zm3goQ_AT%~1wLi^2i=$lTVdY{|2A-&=~w2#cG$PW|1|it=~w2#4%m0VzZ2YP`jvT5 z340~{&w$UE{;4@IMbeZ~B#aum|=% z@V@}QVES`z%!9qK?}h(G@I}+F%!7Tf?}Ps(@Fml)%!B=~?}z_o@MZX0s#j8cFy6QE z_+We>bPiRgqWEB(vGMp|oD+Ukr=s{^T(I%@V0=l@xk~~_*I>X;)C(mHXa|0UkksgQ&D^{ z3>%LR#x>zrbt;Mv#&2vqJ{W&1{Hjhx@xl1k#^ZzWcfzmgR1_bK-`jY6FuoIhRi~o( zVEn!!msL76d#O#w(H3r|_%ufZ~G@BjST`xA3d;fZ~G@C*p%K zK={>pK=HvCB;tcHMEKQtK=HvCD&m82kMOJWfZ~HOLc|ASr0}crfZ~IZB;tcHO8C`z zK=HvCBjSTGPWaV%K=HvCFXDqSQTWw)K=Hx2SHuTn@|~9T^16<`@2$qUMWSBbldhrY zJ6}`B%=|#;dP~j;W`Fc%XR13o19j8+HCQE^##!Z?$+^oni*q;WpSeF5?cAS;b}4hp zd(yRw`F$;Ye{3#O{l|YL>gRtc>ZdNmdLH8a?O_jd?)5#&xzFe1+)wQael6M+U3;^0 z4aR5JPxEb|@1MzE{I|ki`gek>z7t&YkD`8h#_67Ru|3sxu{{U;VtWqy#rC{P{jB>} z(Qf^}i*}AVr-QbKzRy;-#}g3secD@mTr(G9Jz{$f*~Ru8_KWQ~;uqWV2Cb*O^IKd$ zuITnwXFs+F-|I(_eN&XMZ@E=)>+OQu?-bl|w_xP}(GK7LN6~S@wN(urC-5j8C-68O zC-6<`w`z!Jx9c9!4tWB6eY-~r`<_vPd&ddxn<%({GKgyk8Y6NEqUczm&y#elz<20a zf$!0=0#7li3tvY4Og$~i)Q7+Q@=~O!50fuDBQ1)K2io}Qcz~zrcz|_uJixQ0BU4@X zoU|y?lQW?!W$(Wtj&crY?pM0R`-E~2a#1H2b(VrlK{x0I^T0fi>hH=!eW76{l6HA#6WZqEsQ)wA6bj_Aa@J4XB={ug z0X?7>^n%pxcpm02)`$JqY>t@^^?j&c2A0WoCs$ab%CTII;X<*~4< zdbvwK?OCGNdB{$=dp>B;m?zEZ~A=pXXB zW*#8v<+=Jr&rI28NS-sq?U*>fwS5O_3s+Z^K^}G({5Bi`%V;h^*%OBT2 z<#o;6Lek5Z>A&>k>z{EuQ*sf;v^5yy$ye{P*s$)|1@&f%UUWalLsXuIEvwC@< z{x_aP93#@pi}b6W6?%iGSpS;aC|9vW|Hf0QH+oj;zvVW{S6rq4&a+zod(RsEdv2qg z#kKlBc-HCv7jC1x#r68Xd7jk&kH@3`2e(n~qF4Wu$EP!InQnz{_0}hpzgVtE zcsJ;sycK#EZlfH=jruL#O?r3lX1yo3Q6A$Ky_feX-R9k@_u)3mW!$F!z`I?K_CBrO z!EKb!xI@3oyHoGyt<+<=jdB{F(ffO=^nu=I^}$|MFSlb1s$RZJkM};O5A%{8dO3C- zKZj{nFW;>X=XIf%2Yr7*d8y{{d0tQ8b)c8WgwxCS=!v{8^m6{43hCu9=*ix_`e<&4 zULF%pFMm-V%j-ff4;quIm+#Y4cpd2FG2!&`m-Gp|F7$FfCeq9I>#5$C^-0_gy*!5Y z)7@e8a@YHxYP4UWmj{hY)ytPaBhEshdeF=H{zAXa>gCy}n}aeN^+_+!#p|UgZj?Nf z$5ED{kRN(E-*@P*S-m_Tb(hQf(92^A@VXGC2xSFIF-i#v^#{G2?>qF@tX^JrkFRq5hzk$E?TeCs8~oUKBbX%Y?p~`GJ*D9q8pTE-WuOXN4~4 zuFw9+%Cf-RpIccr=>EjYa=^!bWo5Zw{+FU1^zxXcST6XrmAS#9YgU#A7XPgk`kuSK z^zW=}8Mx{@E6WGh{G(_Gy*y?)me>8Ol@);N|J}+8K~KPnq218SeZLTTdAVeTTW{CcDscOq8e0wSxLaduz{&v{TMJeV(V!E$ z>v!Fwu_wUYBQ+$vyX*Ii(%6&W-fN{xH=~{T%CQfnKiW8P}kl{e5U{*cMYSzggZf)la|B zGQC{QPo{Q`E5RCIs9vt-FH`-QpSMgeSM!|xXgB+Ppyp?+S`!~_cFSJ50zasSVaQWAlF~j=SF?I~Xs$Q4+w+Iu_ll~QhvtG){e6EF>$#a;9-1>wbF$hH?-kO^!{v}u z{e$14zR<;+)yqS3&8gj?KLo#5RJ}Yj7oF-K`6Jd7MlTP|S*LkeV_2hFo4Zc!j(;0o zFAvRer*7R3$~(_QKa_J$@t<ObaBc%r!i5^IrT%@I_06O`Q_oXa5c|d9h0i9tNG>ObaBc%r!i5^IgN?5 zb;?6m^UK3&;cA|_Iwn&2Ho^|D4uGIp$<d zy7bNT@80rJ*2aT=Ak#2Umi~XZZ6MU-5ynEZ!W() zT%LGn{yEKE$}bO>hu&O%Ijt||CVAqa`RBAg$}bO>hyItEUw$#N>HP9=dO78t$70`7 z&N;<#(#t6iUCl2Kr1vCdxUdF_B(QdFX0>c{sgX%`;cWr0V4z`QE)D%uI87A)63O7b9GFrULKTR z9!4*xymJ~8&pF4KNH3>6bkfV!{PJ*md1#)wIyO}=56v$Rr2ZbKtJiYe{{$Fqi10r zblg9>_fe#`y@(Gsks{sh8jE z{?k;S^2=MImp7GXPS-Te>E%u3my;G7PA^yU&PfBLJaf7>H1+bE-T#{EQ+|0%^zx?i z%+>7))ytd8FK>xnuI8WnF+R#KC;hrL_uZy;lwaNwyXBwhs#56QGR(Cy*xDUoaR2yGsoN_y*wWGq`29=#Hl{zmxt5K zo69p-=a#CMH=AD`MlTP|JEytN^USe5q?adSdv10wbE;4I<>B=5=JL$d?NRme=JLzK z>E)sM=QMXIzdRM&(VBaxQ#;Bp52u%h=ATnL$}dmH`dV|(b$Z|Y|DIoNVIsdgHQ?}h z@dcX$&&w_2&&G|Yy<(}TExmpZcOKs><-MP)0%I0)RQ-5qucWmx{ z7B{<(MQiV0(c1e~{BO8lMN{{wXzBeaI_^t>`z}BC-*X>ldIlw(OH|^wqr_eKT_|x6 z&cliO@cT{rY_7)dG5KdY&VTu5HO^~^NAP<{KUjm`HxiHIe3Ez)=ZD1iaK3jL^y6HP zScmg8;(45t5ijC=3y&K5IjL#)kTA4fUBC>a#S|XK1L;&M=>kLBC6)-y6~I ziOlCy(C>oi_dfJ{9`pGS=I?UM-`kkKr!jvg)7`^YA>;NO8Mhb8xUI{$jl9bm#M?u2WZYgTkG1l6xa9H3lgDF)JRWQ1@o>rGktdJG3VA%%rbWefuk-ri@m$X7b+}CR)duR$ zbo*1l`#?ifo%b5sSnt5I8?UiV^$q7V)^P4jUISiF;kJ$S4RaWqa`vNTcs6}r{D;p! z8UNuqJZ{2Q+i>BG0qQs~uHS91fqOuA5s6II4WOo_?A6hdBK2=S9LtEcd@Z zsDVXZ`L+ihJT>L4*TK>*H(D%=*4;SuvX>Lud7~xH@*VVwYm>Kh9#x69w{&+m;u(9@ zcy36eu0_-HQX1v+QX1v+QX1v+QX1v+QW_r;&r4~P&r4~<^HQpD|G4TY8v7x5U^?z2 z2OhjnW3Pa(K0vn&WS#>j?A}*Sq);zsQTOnP+1}Sr z%=I2RX7~R1Sep0nv2^e2$EJB%uk}|Wt0R5HPKP3W^qiJP`J9$U`J9$U`J9$U`J9$U z`J9%c4CS5By1;3 zH1;<5&T5Ul1HSi!#@+=_c}3lKP850HJyGmEc`VcW_OT`2caCLy-#wP=wbQt3B9XU; zai5Iz(eq&%4I{h0o_C9!ev&K$? zb=x%d0eE(&7|ZDsKJN!7%Dv}dJO8Z4E`T3Cudxroi!W;IBk<#wMcofiRC+%;QRO{< z%;&vutlazIu?p`;$2NI=G;V*SkJ#yaq>r9U(bru&bFL7qM3Y;Y?i;4=1OHm_hnQLiY!BY5hoY^@` zRxWks733B=a|?@|uKavwVTrS-xG*;_Kc{45NpaT5{QOnRN5JxUPF5*jh|NkFmza{2 zotvCAK5KMxc2@GZtT9W*W~YqJN*t3sW=Yn#?8KzpamnL`ULW?Ky|8dgXLfmXu&cYY z?+dZtF6|NT1lR8`?G-bE?IWZ;`}JUZinK>O8El^-?FaFtq53BlJG0SF)yf#235~zb zEE^BsQ2+3(68v>$yYLVh^^YH9*y|3eR)%|S(qCuhKN@VuGjO=Q`>A02x3p;O!lFM8 zw&U3n%%*i_cxYD8-@91qjah0JJBry&Z$d#~X%0)sURzMIb~#H}TCg&~?J99I`Je2i z#Vnz?Fxyq?VhN==Yf5=-*Ydn9m~--r3o(_lvoVoaLRR7Oyt$w^a^OzW7b=F6n1N#mt8DLKWoCXStIS`*U})6EKr>83Sl ztUkuHP8nx5OPV$Uw*G$IBil*Nj zze(CWir%K*jH4#aI4Zm0+hD#X4nd*qGTUE&AM__LV|M=k#=p?zk#JFaGtOUse+>|A z`fL-t{+}u&)PhI?d1oJum9E9Usk`STcT=GfKtmutk5 cx&5ZkT(7z8#*3!f+kYx*{DWChGPM1F0t1vZ`2YX_ literal 0 HcmV?d00001 diff --git a/hsa/gfx950/fmha_v3_fwd/fwd_hd192x128_bf16_causal_group.co b/hsa/gfx950/fmha_v3_fwd/fwd_hd192x128_bf16_causal_group.co new file mode 100755 index 0000000000000000000000000000000000000000..018f398ff816064540576e859e3a3e6748030e60 GIT binary patch literal 46640 zcmeI53wTsTn)j=_6E1QIv^oYgm~clSBq2b62z^3;4sr`10-~KJw~h%(NWxX@?u2j= z5Mo3`M9d%}gRGMgP`nIjG29d!5}g@uyTb{N6K7{u=iAxcnQ!--{{C-O9jNX}Xp-#i z^X)gu^HhF)>aD6%r%v_T=luKaQ=X|arkyqFVCc}c7r z{`Y1*Sr=qeU7Ilf6@9WXgWY5~$~N{XvjX59AT|xMK4P2dp$m)+6x(ccAS<_tZG&n( z>g$%*g?-D%fh-oAY>jd7a(`yz-%LHDzHxl>eqEMD$9J>)Gj^MfdbGc`{?Wepcv6Sy zFNlfvh-`z#*mp&C6l~F;HY;`d+y@z}c-^@?%T?-(&Cf0=%q}Q>diyJ}&XU~HuUO%x z*JGWTWhZuRm$6r3OY_!ccY7^%m9t=Fwj)2UFzfF`QOB~CIXT%S8%33ui=A0UrEV=$ zI=pPHgEwKcRJOJ_`{lgR3Bg_b5$|GAVZquP_AuSpS)Uem7P5ztEmZo??%@MQ52IVy zS;!s^YoXGAb`KXCJ&bE%XCZr-)Iz13?%_>N#FuBzpK5f{*24ZmcGBKLrB*vBr}ClF zOlN_!#Mtj#(L|Urap^4(stNVou%LtW7j>}y(stB;wKRK$qaeG`7=w&jb+G=L4%T1aj`{}{Ts!=W-geYikN;w2M;qAE z(FX9T*ycHKzJKma+R;Y9RH*N01Lr&1z{Pena4_GY zxP;5?Xdz$_KJ91&pLMi>uiDW720?KOSK84+z#x3x(FU43+Q9GH(ZH+AOYTHe}^3zdk*p+tE_scns=r3&T3x!iaXX zaAZ{r!x7hxh60BpvBNDSb-0D(cC>J0g&2;)mCEd}wWFcH;YjUp3-%7TFtZ&k99k7{ zb#=)!w;hcH4#k2Fx3H+gEi7$E3kO#zS6G)!8SQ8ya3Hce+=8paEfln)1q{UXtE@|= z;&wCXSYqhh+*8sS&33#_%7LBw?L=K!}pLV!~ z&pO<~S8Zvbedn##m}Cyz*=6Mw=3bv{SH#}iI@y|AsMhNF>p-Vzvi)_Zzi(mhH!OLjcABzpzETMpRSGuWHS z^jd6bZgGhtvuI^uS;nierE8ZjD=PSWbX#{Ho1ca6;I6fVgZy#qYUir#oRT687#+9G z_$GW+c1dYoQDL?5HNE&Uy@mf&z-qy_>PB3GERz8CMI?bu#x>YvK9x7pos@^T1Dlzd zDX-^{dw!F5ao-CTyrx8X^k;mvFShW%tq0!b3K*$tI}3W@4eX6i#!Up3;+vE4f3l?~ ztHi+=aS|<+`|r#%`AwVre(W*4i;wTu+(jIMrHe0`GQzs};)q?k_-ugJWcJMkZQaej z43OPo_7wv@&|in3K`+^RVSi)?YiRq{F1}jO3;JlA_D8&}i|-2MgWrQ&nK^o^IV{>A z7B<9>^Wis}1sUVq#JWV+;kB7noU*VDr@~m}DNFb*ZjWz!-C%#3*Mlb0HrsdF%g=l5 zVU^yy%kSP~V&1x0{#krH=lJJvru)-5=lkb#S}GS*=iJVG3sCtUTqJvd+nRkHrY3X?KGb^Z!)LB zN#b}~N`w9OayouHjz7#ERyS)vW!>OmXoGD%HFz{}_o*Qxi91dWi6d5@8eB=-d&4e7n29yWHN%w|n_}$?a$Oc3*#Ax%~p)?&t5vw~hXF?OE^fc0FI- zG`${O^Va$6&-D0<-!rEvqTX|rZEUpHTi8{$sj=ytg*BZ!lh=gTW}dgPu_*#QI{Q(6 zdHsdFt{*=4MAr|`EvGh`E}Sv_`rJpR^7?Z;pU!U+wk_bV!)LpRUUu@7m%V+;KXO7f z28{XRCUTC&c$kN$Rb$M(_HNz0-tOHi#$s&Di8AlD+q~Y}Z?7@;5QKQ-~l$;p26 zX&M9nkj>goZL_vxBWsGOY`WSjtUTG9yg7MC*k;>~A%oAC+w8V-+h$u8(vD<%a(VKQ zu)5?S$s3dXVK^=vOLFo?B8>&5@EUvdMphqFS%0S2OYF+f@7aru-=D#o2e$9ozQ*sL z#w+&IQ~k|nJzdQ8p8hxoSEru!c+HnResiP8@Avq<=8K-DaGU#T_#}7JRB^7pJ#(hV zw`V@NBkaB@)f>=0#(U_nv*&u@e8dbpf1y_m(q>ja6#E?*`!b{~ zq#Ps{(ql*kNQFqnNF_*RNUM<6Agx1MkL2d9Ha?W!((zG#OZ}t#|H}4_i;iCDqAsl|xZ?9(fY=Lieg;(ISfws%Vwm+(Y%= zv@@~x44?C~KdN7jZRGWW59`$j`@HMSIM-e0#<}p`22y2K~8;ZHz^X(Kp(LU*N~=PoCsr z190ppUx6|k&z>7B_QU%>`0AN+dE)(iX6V&3XYxj({mrcRkjmb@2Q}e6TTWv}pA!}K z>J8yDHq9u<=hw)z>a;CL^!~>Cvx@9^U;BHDx$o~I=BnTN13qW{w?%ULP_7qCZ0HIf z)En#uMu6SHKHx22U$6&g1$%=2@R({EUvEr|*B>*$>vf7_q2sN-Li*woU zO>o&qB*Iq>tfuqeA2^WvD*wPiUY3f92frF~&2L$f$Zy%;x5WD?aJxGT+`c7fyGPJ= zPi1>m#CG`itw_K7!i?s-37Vq?!-ffVu?cpw3*M3;*wZE0o9g4!pbymttyCYpjp~Cx zp!#4W)d%mS`rzGEAM8){F$~dE9~?mS!9i3X976TMVN@Tym+FHfs6H55eC;^?^5tEO z#YHpDM4MnzhF~%S@5Ut~*}+t@gLblmGszys*j%!M3&;*GB0IR0nR&Yz(SlhvL05)g z0ol<`G18R-9nq@;3n9#ei+_tV#ogSK6LPxIsGhIVMXpkJW9I7ZlMo|C<7c%Xev zys*=}C;R&Gfp)JZ?DRe$du3XneM`Eq)BA$#+ZSKA8}ARVQ_QOx!9<^6(iOpEvg7>P z$PT8G9ki1joJn?^-??N57myuXM0RkgH&e{38o?}|pzDfY0ol<`G11q6T6$EjCn=dNqx;x#=N5KJ)x*lAvoeL-5FeNnow)4U@4(#6;9bFoeHY9RSW_(W z7pFwgydNa}_Z_HD<4gUb{ze9E_Y2y-Q`w$4eth{PTUcFR^Ts;cr1RxN245&o^QUd{ zpefv6lsCz1?^O2w5dyKRXG_hgG9|rqyaJXR~EbT*0>|W&G z3;TWGeTF?=+DDt%2;`4|eIz*2u#b`UaV8dv{8-pWfujt&Chb#9EDrf`u*ZY(hJC8E zPd71WQVj{PL#k>RZP?SLeZGk$B0mxKG2j@({-Cr!WMX5HKNj|J;5ftXl=e&$ekH^j zl3*VXjyLSt(w=K#6Ocav_KDy`!@fe=OHC{p`N^=~58iLsS4#V86PtwmNw7}_C&S)^ z`?02S6D}PB=Yb7&8_H>*X0)TsgB0Yaz&-_>V%U{=kc#|N*r$S14ZAWArXhbC?9;*N zhFzHlcI4Y(PXp5oyD|@EAb$qz4}cFCc4Z#m1G!-)?6bgGhFzHlvynd=_Br4j!>-JO zxyYXjdpekI*p+!O5Bc+8pAXJA?8-cVR@1NmcKkG`;X%W$%!7rXIs zbwl7UXkOTxEPP&gp?Q#hC0AM}TXeXq2?0Fr;uAF*=(pgV-UM%sS}l7G;hR_-74OkqDD?JtAm zAM|W1_YZomupgH8AA#f_^gJu~4|=|^ACdOgK=Kdzax3=_dXcc#O8ZYh@(=n7EB6n2 zsj$Bx?Z-g!5Bf?g_YeANVLvJDZ-e9?^tD#*AN0qC{T*q47bO3nKVjwmLHFDl;2+d^ zK>k55w{riWZxHRM^ML$=zR}A4gT7hV)pk60+RFWd{*17z^ML$={;ZYz2mLu=SLXrw2mN^~_Ye9D!miE(@(=opR_-74mxNuN z2jn00A6mJ8&|emIbsms^&|k4~|DeAr?CLxq|DgZK%Kd}>ny{<$fc%60cUJBn^dAem zIuFP{=s&S?|DYcec6A<*f6z}@xqr~#5_WYSkbltMwsQZVzbowOJRtv||J2I;gI;$h z_YdD%zVQ5>ebNv;<;||d0n-HsJs>z_j^MEQg7-coIO1W!SchO-reH#@U}C=D*doEC zQo#wU1(P2aoa7O-Z4gY^ESS1YaN15m`!j+wo)et;g5d0z1n0giIPX=#1+NJ%{ITGo zV}eWG5?uPO;G=b**gTW!-5O63s6dVQ)1BZjd!F$1b!TZ4bz!BgGa3nYq zj0I!CQQ#;r4vYii!FVtMOaMoNqrpTl5gY@K0mp)4!ExX?FbPZo$Aja+3E%{9A~+FD z29v@2!TZ5U;3RM|I2p8oHc$gKFa=Bjr+`zyR4^5s3Qh&5fz!b0;B?Rq+QBq14V(ea z03QG!0B3?T!CBxea5gv_oCD4Q=Yn&=bTA#92hIcMgY&@!-~#YL@Ii1PxDb2@d;Wi@_z}67XU0VQ?w96nq4H1bh^H6m)%PO+5-<#d*ESe4?H?po59OM*WmYcqz4SqX1a%Jvw1%B4cv_O z=N}wy?r#%)ve`tRQhcIMsXo!CX%A~}^0J5H1N%f8i_xd0+DTsK=yKF#on zKF#!rKFxkadxw{;)kL32i+NOg&+X7o@%-ayVw~$-<^rF zORmPGQc>@UmePMDwkNDM<37pNnEbezjRGfm%;*|(d(FS_{MPoyC$^tkB8e%Isglzq z?UFMjXG+eNoGUp`a)IPR$wiV&B$rA)O8li5zpv9$`u`T!YTsyDk8h?->G4~-R{Qzi z^RaDCPwD?p4GorWEFXK~(0|(oGm8gPHk)z(=4wpcW=1HIt8v;+Gb25YKKl}}ZGXnh z#(*=PGvhwZ)j0D7GaCoae#wmcGFRi=m(6TEIPX=|0~fqzW)s1MKQ`k7-qpD1n3>%V zE_ut0`z%-E(s#{lGWcknnH{kG4HS1ggA5`Scg`~~sv zERmlPe`1OJf_TCb`4#bPOXNQge`<;3x&w4dn$N#moYKFodSwRgQ!=1qI6=u|n#&w)J`%r)$aPUS+r3-&xP&#(`c`-fin z81f&3Js-?B?21lRfcyg35j@hc+^{P;RUz^VVJ`xU47;LJ6(heG_7&g?!#-8+A9`g8 z@=IVZ1xpRPqEnS2zYO-3;7Y@;=v1qazY6x%;A+FJ=u~TvzXtZT;9A3;E%y(-avk#5 z!Tvb-xM5dxs`bcU5Bn3~6NX*UsocnS!|nk+u(zgHdSUmX9Nw%AqEXt!Tv1xtYKH?!EWU5hW$D4Im52ZgFVRK1N-yf^M*b9`aIZ+{JpTh0KQ<@m3goa z`TJmh5q#0GEAwDK^7q6368IAAH>+2Yf6(8za{r)zAao8@ry~ELpRscPpq~?VRi`5V zpkJ_Z|Db;)?5a*h{z1QF<^DncnXs!m75N9f!OH!E{)w=wIu-c`{pVKhAM{@eyQ))> zf6za-a{r)zDeS6FMgBqmdn@-3`mcmt)v3ro=(?5r2mPwBt2!0=2mRMp?jQ8u2)n9N zk$=#?wQ~QU|D&+0Iu-c`{r6VxAN22pUDc_`Kj?q3a{r+Jv#_f=75NAKU##3e=>I0{ zs!m1zLI1Or`v?8sg$kBgn!U2;(IN19*}>~!-ap)y9&EH z56D00-GzV9dkDKa56D00y@Y?zBZOU@2jn00zQRA~w+g#D56D00w+sKE-y!VkJRtv| z_Y?j>zf0KFc|iU_j}rbtzem{Bc|iU_j}iVsA1LhVJRtv|4;KDGA1dtXJRtv|4;TJH zzfaiJc|iU_A1VBUK1$ftc|iU_j~D(yA1&;jcu!;;V0VtwsNA+G&0)^!+p0OMWBlW&bF+>N~+T z|18R6US3y6UTGFCywW!PaMx7YG>WQiF)h*UDUIupAI-4`aWAd9=BhV_gdcK z{hGNL`w_=;*d~tWh)*2PQJ*-T*J(c$UEkvRab=IUy83WD_+CGP@;60@{4KW%ZoNZr z`(1)N?h&jSDC*(+{|GuyxVNgM^8~&@=LtMc=Lvk1+N~Zc>g~Eu)I*#A-{0<0B7e_l z!M)=I_e~VsKN-Y51oaWI1QB$u(B?@xSKvEzuE6)`T!E*U)P*mje1?`1Vd%r(et9X} z(1*#Em6j4g=L2&Mc{iyjm21na;s2oHMm-qKe`tCLF^XBtJRpWH7LIZ z<=291L7IoPPIJUMY_G%i4X1o#B#2Hl_s^ng_Fcpm02_J{FnHO9<~@?MlL z2g_x>lPk;-71*x8_6BeRSP52w8^Mj>CU6tD8Qcug{@ztHEmU8Soi! z7q|<27JL?@dgoW9MC``)ZfrjXJ_qgr_khoX&x3oxz2FPr3n0bT?8EDQc>N;yBDf#i z555Gx1ZHSHx2l!X^(yV};@XsmtiFMIc{K7>z1*ptb}!TF-IPzUdtPYJm?y33<(b-9 zUY6qe4DBpfFVE7>^D-3cN9|!Aw5pe9YajBmMjRmN1mY6IuCRlVG$ zUFKyeR#5Ges+Z?!jl2xS4|<_N;}~1j%OBG|IIdRp z^5xnWye!2Zs(n)R@svq|ge*{t>E zc@)REMeE~vQnPxtYPaz`ie=oU{lK$bi}XCD-O2MPo^gkEw`Zr;-&3VU^E`@ad|Dge zsn!O0p3#PQRK47WKB#*6E^V0SS?yjAb->HyZ{=Amp*{db;eCXv-q4e?>w6VM_^zwi{se1W7Es2+b zULF-nFMm;+z{^4}=Y1l*e7~0Lc}bhZ^P!hV(Ky``LN9l||EYz>6?%CLBo7i@k7Yn#&G^8~s0{S-C?~d;oijrhbTwvuWM-LQ&dceU6Q*c_ek!Q+$Xu8 z2)&%f26}l^OL{rkpqED(dU-MW@3mOi3b3N91^S|^v9gDSm4cfh5Pu}~@-l31xz)l} zf?MyfuvOspyDV%qxZ@rRTLV@Nw6L{c^-v3RLRaIi`z-8naQ7$+0^VJXdq!K>6X4!) z76e?l8uv}KFb}wYvgL1HFQ@YYy*#R|dU-VNlRM*kJL7w8mV=!r%hmW~s%LKq)BuC^ay5RL%Fq1# z=Jawk&e?~0bKe*Gbz9<{sosJMH>a1YanV$7QG?ju&BjMlou!|*Q7?c027390Ht6LI zLN5;$e|;H!EciUMULG8${a1RqF5V}kmxqesuE9QceT(wYk)d_9s+R}HdQ-hUe+Yc9 zsCs#DEI5_l_h+%68|mf2G2=8RYjp8mA-z0Q3^|oQ_$|r{UA$GjJUG^z>K*<=;Cn^Y z%Y$Rlsr=DDV?QDE^5B?tnuoQzIijtx>s0Ufx1sg&;23wRck&PBh_=SMQ@!{899l0A zj)}jFdOkfRqOGy0C=N*AZN^9-~@G3Vs}DHfgL(AD_zP+GVeXKrYc!P>eSUmi*qr?_+K6UCfUJEW~s z9J(4`9!d*WJd_r$#+j>qQnmHY_;RV2 zhtS0-?%a#>!DG&GKIl0P6o;J!DB(;Ojfo#N2d z`0`L%xEg1!_DR*&1LDg==;9Q2PJQAr=jan@>lB9`9A8djbt4@-IL=({o2st|$CuNX z-AE5N;?JwlM~XKOjzbTQFQ@UlksfZupHq7jZyp?n-Wgvmbo9>n^3M43&iHaX&jHWF z=!`G#j4!8o&>3Ie8DHKRU)~vCPQO3tj4%H$iZ2hLfva)nHMoZ7apt&hp_p`vQ&!{9 zX}hKP@|)4WTZ%KMdz#kb(p!ozr}*Ulh!MO|yto>7UW?gApZjn;6kksLzEK=H*;|S)r}KpUwHhl< z_0;%t+8@Q3Q+{i4=qxC{oX!#U;cG31oZ`<(N27RidQOQNhpxt#(>Xvpt;UQ~{5iEl z@#Uf7&~GZfJcRxo9CuFd6&`1fxkWmAa2$GT@#UmT-$?&%EzVq>TdL09YJ7PJ{X01B zoZc%u&K$=>I(u*&dTa6Jq4e+8;>^|KQFZp#;>$zDi3i7@)7+)_@=$T;t;Ltq{-SOW zCmtMsPWz+y@=$T;f2;B37sFeQFAt@cQ`~tp#+G8v$&Zs>PI2gJe0eCnT#YlQdDqgj zht>GE&vix!NaHFYk;mzd5~};?8Y2A3Ww9=Y#ZeibGf9%R}kqYMi-xK2*J2jV}+S zms8w1^@(E6X^xOyPI2gJe0eCnT#YkV`=sjS0rBM_^m2+jr#|tRbM%Sya*9JIyTX8bjFtpouEm14o58Xlg{TycRoLQHs(R+^P@YTA5HP|>T@{AKXg7ny7T$b zhT2Yq%^zsB8&yAjCPUR`SJd|GETAaChJgQ#aT6}pZy*xPnoaQdYmnY*m+VTu_sz>qV zq4e_L_;ad9@#U%5Ut6BJPVbxl-{Z?oOvINb`|Vy2zF?#EY0LRKxnXq;rpmgqYtL}! z{=G^*`?*>^`+1jq_VaG3FYb}ga^5SS<-AWm%Xz=l3v1+anh(h5G#`}DX+9*iz{B#n z%SYsMmygQlF2Bx%wpS~ksr-g~rt)$5OyxJFW_MCP8~GjiY~=Ukvyo3p?ae2jb$nVr z>$qM%>-enH($33g7k?<9U3^hKyZB?{nKblW0BQWB;Tz9{AuXOXcGA#|XR?qMP8v69 z*y^)U)Muio&q7h3fucV9M1AIo`m7W687JzqP1I+asLwJ{pJ8G=r-bxj(tSzSHJ%ef z`Yh?Lq^law=`i$BL-#avO+&A2O{X;U$JTVm)}Ci^qvu$(_4yTTeQw47gy&VX^qh*D zeLh9!b1CrLKc^J1wC`D2IEqN-zXC4nvh7>dPMq-U@b_$`6JRb z7Ll%k|&ep^Mqol@82P}ky6*Wggs-cZ-vP}kZ} z*Vs_k)=<~fP}kB>*U(Vc&M?-;px-6Y?~Um9M8^6Q^t&MXy$}7K$53p zznw1q_G0O`HR-nzcUcR6dpKSC?Zwh>YtnD0Nxz*g{q|z%w>9av5g*9?cDnT2i1X|G z9OW^Q9|wGnI_3GuljmcFJRfW2`Ebhfktff`3VA-(%Jbor=Oa&^j}`KKtd-}(DbGiq zJRd9M`BF01k2Yee&R^(7YuxW-^+QM3pXo*G+lPM3@+ z*IwVl73Q~BdvQPg67vnU``*tBhmBa_dw*~f3vc+g7Z#qHa@J#KDVLi~CPw>io_g8C z3GKYu6l3}hdd1brTe^;}Lfu<>x|*@ZUJcd_Y1S-}v|dWHTrZ_ru9wm**Gp-Z>!mb5 zDAr49mg}W7W4)9bJU^~xiiQ0UJTM*4kpmAtU|}zVhvv{D1DX5437hBT6Dgj<$U8jW z!hQrEdC0&2oL1X1P90vs@phS*{P$EZ2u= zmiHCSa($R)tPkVEK7AW3?0xX`W(zwF)^D@055TiKMPE*z@OnNtQQFea*r? z0e|sh(U-;(HJ(pS9PoUGyw8qV*yrFEZ&}zE;8*Wj*q7ix)QPfRoT&ADdEyPvXUA$h zpC3Em`Qq3?&zHvzd3@CG%NS?s_h;eWC~ud#E^K3+olU#^-3J!-9et1VtAQrgr})a1 z@49$}UoOXWG-F-pAO4bJ1bYauNXc-?dTGB!Zez^~{`W6`@fXnxue67WY<5s?%gt+; znOB&fT{0nBo0S?pb7pFE{P@_o*tqCnqY8=&bF)hv#m+KUp>uh*qqN9T=5m%fN{W}4 zIx?Mwj?$II#YH7$D3O`(%*`vzg+1GmmA!0bt|PB7r^u00RN`4ZPn1nIQ zGRI}b#pjGm7&rV{xBu#eiRW}>iCCmt{p%_7vB*=v-yJf4{i%R#TI(2pU0K;Xf%zk4 z{vT%q<|oPgPhShnpCa?io(RmxcZm4w%5Iwzn2$A~dHz91U_RC=;rX9pQ4aNwpJcQX zwnNQjc;+VkVH{ow%*PryJU{MOVE(_*rnwv2@^N53)|OyaOILiCEH<}rWvt6t>SFRg<(HMP*pi|wXPJ}5mSwLgd0 zT~Jblsg#w4iNsrZ5@ZF=W!VLexVWU$>o4Qv%Y=l~X~xU= z)N2LCOfz1_PaQo*7Kl$6Z{)^}i!*ZLQWC}+1>&X|x$$GC#2dL&uI)G>E^Z2QESWvV znOBxwGJ0&za+lMQvpUP+%8DPKuqHks$+0Xaek|TJD@&aPj@*)>mBmxrTp>0;i#bvk z&(>z8iMeGAeEUf67o1&YHmVyhBops341$IqF>9;jawZWkK>+w6AV*+wfx% z*#1|)zsRU>_$z~hWO>7G)R)<;A6`(~hTZUQ2BT#`!*2B7ARQigal>x-Q-g*-mCf*D zFkTafBGGXf^{>4j29T988~=a(U-0%Qn5e$t@2|bj28ueB+XS!ur^3n~xihLu$3TC^ z`*#bzRiHnt5yby!jOfo8zdmA<)&%EyM*Tm@_y3<{{cb_~3)*C}P@nSRWP1jT`ad=@ zB#rt87b4#nJGJOxS#ggnh)YUFbH(VtF@GLKp3!%;zU?Ps%Q$|+X6)D4cHu=!^=oDQ LzZeB2gX{la=)84o literal 0 HcmV?d00001 diff --git a/hsa/gfx950/fmha_v3_fwd/fwd_hd192x128_bf16_group.co b/hsa/gfx950/fmha_v3_fwd/fwd_hd192x128_bf16_group.co new file mode 100755 index 0000000000000000000000000000000000000000..bebe7e9b54368c2e50171176151b1439acc9da47 GIT binary patch literal 40952 zcmeI533yaRy7$lNPFQ3KG&%+~n6L(gkgx_s=o12VkR>c4D4H}`=s-w95>~Oh6T%`Q z#E6K9m_bBDMl%A6%8(YrqJl%B&bV-ASR5zLrt{6cvwU;E>F@tPRR>Nt37zHM`#f_8 zo~QVAy>Hcf>eQ(|sd~?Qsy*W;O|Yn{Jxme*tjc*sm7fW_{JeaJ_%Jn`vP7jF{_m`G zRN6qyx^|(yiau$~U^`ikV&j-n6aa5eku|7v5qZ`_6BMPF$SXqvS-C{y4Vvq*t(DuP z{%{}*L{>>}D?iBNS&(f?J)^#He$#(doXPWR>hX*|pN4uoUUUEPSbBf5!TcA*-nR>} zL8I?^x)IPtgWBZeds6RHl+xFni!xnB&bWnHg?U-IMNe*gDb86qzv!HjxAwI-XGZZ7 zXKtD^BO|M*XkTHLGc#>jVNP)tG?|%&kfjwBI5V>L%+JlA=gfU6F3-6r>xa`{j?2%^ zF3KuaUWzNqS((-D)wre3+$C9Q3v==^eZHgO=nIBCXsqmlLoju*0#js_|<+DJK+_Y`F~bDf38 zaZ`nfFk|5ID-g$f)*GscR+C~>o0F*{Z%cfziZ~@(?9KPL4EW2pDt^yf%UC5fWL~( z9s|1^SGu6Q1@!~EU`uNaY-_E79W5}hE9puXRJNdgKo{(8t%1F*HL$-W2Ienz8aIN| zRV^`KnjD=z*jfXJTWjD*3k>X8m}b5op7yoCM!-<0X{~`%tu=7E1qOC6Of%gQ&bGip zKqq|KS_7ZA*1)+I7(geOZVKmHU?HFrzHY67`qmowT?-7nyr^hlT7E(CitD zFcjDwLtAYjvDFqvx4^=_#iBd%mY7C|y#yCb>P796d%FsTI=_ACvUU7b-=TVN!x zD`vLZ!t7RCnA-viyO)|~SZCDq7MKX^h|E@7aJAY(ZVN1+Bd*M{&Zq?~FcH`h#jUom zyww&~wZOuj!k{mQw*^K5yP~Ys7S^}g!p0U@*j*U#6P94|ie8~Gn|Fj3D40Ga9aN*#Q*^6AxwCrV>X|BwKk%N~f3{Fg&mz^*? zO|Lip-ME6noTbjaB@bRL__8NCuOQ%UF-Mbh~aH~`f)97 z#3@+Y_@XHYYvYS2+qCi70k3NDrGobM7GFB3++guB_QMByK|5FudcjK2hw@dFcd+<; zpcgz#Id~pa+FE?y19p&uzze%wK%dRxvvZ(y#PL8om`*)dKt1TCz7vi|Jy^+nw>G{i z&QlpeJztB-~$ zWk;>yH@H2%jn%i6+_qL#yw#Kall8uv>Yu7}ntz(k8U7hMtz|PSvTss+GjaG|OYZF? z^505^DKp>NBX#|LAap#&tovT_&c1(-w~yaZ;z51KI>+WPM_BdWZY!(q6YHpOC>xIU z8A5J7+IJwi>1f}0vf^l;GIIM-zw*pceuHlE-=w$4uRN~j+xy$g{ANAh$=^xlpVISP z{at1LIX&Oq-(An6_s}KY>b4zgJl?jaN-o}0gQj__{WZrs{KM~=dNHELb4giK=cutN zmz1@27f)K1izkogT*POKE?ZM~F#;_*@zKJPnh$c?e)!CzZ9hD@h&C>Ma9sV>$&b{M znv=Sm{d*C4EBLGMDQ?oM96st*-ahIdII6;nKJmxjrE@s?!_q&c!ja;0OqZ27A|t)t?%hjApzr#{$#NY#dcB=Gm5x9^_Dhhm zE?vCdu3byL%2D)ZHI9$gD#!47=h3_N9Ukkq9Ah8&`>xYAYwNU4Ym|#IWfw1X>Z=?< z*`FNkVL9PxW2y1jZ@caE!At<=PzKl;Uy(;)Wi`k9gyXzR9gd1|4xD#QgzHj7 zXV=$A>p4C;cX2to*jx_VjV^zOLE4M74{1NrYe-c{Zy+5+dK2j| z(mO~;kdEqh8|vT2`iJs{&X4kj+DG|+r~SU(4%O#wSyTN2#u~? z2II55=6I)zC&OJ|hljavFLiNztqymrb@&|>3huKG$ihnr!c$MrGK$cbPyzU+`0Efd}mA8XS|P> zIx1F&Ph2~(BxQ5Tz?6!V^+^1F(REO5K10%rpU>EQPuq)UBw1FK#e|Ss<`_~&2us(Pj>w`U5AKehm`e0Ai2Ya(V*q8OeSk?#cV0~}^>w|Fxm(SxL zU)`!G@zILTp?1N?gAVG!Nz_LvN-Fi>OzOee)Pr*si*7ePS}@Zt z=t>vNr5<()s0WLw2bWV1uA&}xywrnb)Pw7(2RCvq)Dzb|T6yb}c-3+XuXiV0FXo4~ zFTKqe2h1mSExnEU&Qd?Rd!XJPBlHncpWHuC??@2(E>b^fWS~A(6Z)=FKQkpzKYNq^M?2VIKDT?Iz92^EInJps?jNXM zo*?ub_tdW%8L0PaLeK93^<^o6`t{RcP3*3^A@M1v7nuuJeMq)Wc2z^_dT>4U;N@{;$N8DZ74t*eTcV6{#e8B{OO!FLm>=CeP;ZYBdX6jVlluqi9SK6u zaYg;4k%9VDP3SqUsGpe=ou+U6b_lxKZkJy9eqEVub!CsW0vys9&BS^godLRU-rSUM(q#$0vtUeIi63_0!=mHJYtUk&0QmVeKr+-lG+RR3;i~!e-<1-`5c=% zkbF?+w@dwVU>xNS+0;Q~n$TBD{SU!-%AGbffy@y4T~hxdIGFM*n+ms0?S=V5zgOyC z0f$naV^fEb3x$54)V~T2r+kr39YN*`eU;Sz7)+#mu}vLG776_uQhxv(Mfnn&dKbA& z=nqT%+u&%*SJ>3M$w!3#9jSj897Fk|HgzoNxkcqTxLBfIH4g05m)KN|TrKRF$3YV1 zYi#N`a-Gnd$3ZgXkJ;4mXpf-_$gob|fk>;r-iyd^mIUBQQ|LF+jW?yhd8 z-yb^X`NLOV-AsSO@2z#7=r6zM5PkYghi|mgo@nKx+Gwq6T?`ll_5^!^y}({zZ?HGm z2kZm(1^a^izObT1&#(sgLi{>gJZxk;8<`hXb0_} z25Mjum;{ak$AQUUGB_R_4^99lfcJp+fDX_BrhqBnL~tT_FL*CF37iB@1}B44z$xHV za4MJzrh?PJY2b8lIyeKI0nP+xg7<;D<)3c7pwEGD5AZ5?zcps;n3B6khgDm; zl~sqAhE;ppmsKxU#QKhN@N~>ojJaQ1%&lUyBzI3O*-bgt7Ezp|q7TYBM+LWVjtXw$ z92MNrOPipV#k%A2e3A1R&fAUoV{gr&m%&;@w}-iDCGCN`X%E~>d*J>)+C;r9)+zct zjr04y+9Y>BZHg|(dPTPpr6^Uj0Uo3c@GxzFM`E>9H`|Z3OhB8=YvBIcbiEALH&L7? z<6L~4CxbPdCxfRrPXGq-&rPR=US%t8^LGR#9U$x7)6H-I`Xa%dozRO47>Q`){44U+v=XAZF5i7w(Byix1y$KJKR&Xo$gev zQkP-P6*W!U<({tXcF)lExL3+?%eAl<=U#4F3(wT{y6@BWxhdb$-qJ%+PJD&`;^Al~ z2YcJ;9O}J6=kQtDe!c9ru9hCgI5pNcM*e>7HNDJ^?y%QYua~HF(>co9UFT@;tvbix z{-c+zj6qq9f3Ab(Xm7Y5&<^VI-Tf^+?4nI}yJ%CAPqZo7C)zaOLG4Yw?B0aHHgU~l zv}vw(STD1GB|J**vZr)fuZ`N0&?Pq%Yv zgf@ZG*-zk1_7gbko7&nAG1{kA_Uqwkf$id2%xG87&pxyELSnfib3}ik{%rOa_yGG0 zoXh?KAMTB^eUQpg-iPbh7oYnf#Uk}bx&!G>B(%w*qz*#l5^ zpe%o?Yf_Imd>(`pkCcEk7-ByOG8qjYZ;|BU*9QMUS&6Ec1CnQjZwqd){Ngy31A9 z`(6uPUw75@ooc~4%T*UU!=m&7@3`NBb(O1bz=NXRxsgdd`XL{eX2E*ORTrOOQTl^} z=UcFTa@7r8Xi@G2hv!?cUUJnX7KwW2wWJ;cksr0pf_0LsZuBD-We_;VLyw-~_F8`C z`HlUJPwbyrBgrJmWXTDV4#|m2j{ z?Hf(&@Xfe!9e%^P+Gqc!_ig>Oq#l2$t+jq*{n!(a_S;umlmswoodxSRS6%Ys7Cd%$ z)lJxJQMkV5-@c*9JD##A!@!BpSg=lW)lGWNqKp8iykNn)%vG2AqD2`APJbEoz?rXF zl)J!LuUoM0a@EZ~U{US{A9%}xb(X7c?zMcHNl@h9Jmw?^(JQ>>AD$jR2o zz2r1&Mb^l}dj-q#mLC59wTw zyk}FL9mxSKD?kuGIS2neX{I$@V*PX?be``~*oki`1-wM5X-MN(V-`mt>T-a`WyE@c&S&r zsrYi%Ua;PJl^?2<^6+lzYO<}+o5w*J2Fh>hran&IEcE7au#xia-P9+@TZP^{4mMF9)lJ<@-Y)d!aZpZqOgHsO zvX{`C$3X?T>d9fJ~`!BXS+C`O1n6pT|RL>yM5w(_R!AC zzleIP{x0e{rX347ANrfk=i~N^@?Ps(db=jg!Ewa-?6r&Y+2<4Iv)?Dq=QSRuwC!8E zKd7w3TWx(fANbuPC|?^PPzHCfxA%P?wpK6v69(IzC+M|}_+2qSD_M$iOHpnaxJ;Jc zzXHd>v(;O%o`@E_>Y43wlwXeWE5H>X$6=Mz60s8bmB>E=J_4=+SAma$kAiN{4SGNi z$a)8JFn)0y^k1XTfW0X1Mfno2MAkdJ*b-5Sd@1s)!PQ_HSO%^E*MMumwct8%9mwMy zSs{*x{%iD^@_LkCkMfU!kAWP+K4((I2IMy&|2X(KxDnh4J^?-fZUQ%fo59T>>(%5W zMU*37j{KA0lVAl{0X_vj1#SVifKP)@gRFOIaZ4a3}Zz_yU-&`LMpd;``#fmB%~10^iZjf&Q;($j$yQr*_Oe zPpffL&S&dh_>?eC8ufo=XeacteC}?ng@gTHnc6A644=i*9>zhV{;w?UL%po=oSy!# zZ0)przV@*$=kt2nz_o4E|K-xo>Sg&X-`pm%|0_qU)64Mrz85|voMWT@uLattdRgPy zKmA_|wV%0jwa;}qV*qFa=hdkHYmxS)UY79z<~EuAUwPU&y$oXpXbfKAU-{>;N6D-w!=U%4$!M$AjUY9YZV1@R7 z+$*(zcR!;2S(h=cV3qb)_oLeXb-T6y)Mbn<@M!;Vdo{&VqFJyGed`m(7nEvYp4D0# zPnp(Emodg*jdp`)t=7@APV2187-z6v>*9G#vw1dXH|jFR8a%H3z_U?{^gN;6qRSX> zut~ekvsvrmDc7QP8DkEf)Ova2sJy z{a;(P0eV^ZzXJS`j2kho&of$_UIzZJs8If|ZCbou7XB~YM@s+Kv)W+Kc5SFGhyN=o zl>h5FZMa?*{;z;Gnf+fov_!oO{9jR_{9n&&qx7=yf9Y+a|7)i<+Vg@oMwi3?6~%tK zJ%s-l>Ho^c=lMu3q#UFL zNDGmuhyP3OJJ@T~|CNifi)4BDzoPQ+IUlJ2X)#hEQV|mE!T+WA9qcvg|0+h=C9*vH zUr|f(c^T4jq!majksd*!J@~((R^jubNNyw#66a#+@Ry{&Z&6qV{;wz}^7BqwlzCw0 zM;0Xm%>JoG$pl@WSd=Vq!Oty9HkkXBs0aU7)O_UgerZu$V8JDek^>h0+M+A~i+^iT z7J^H^vnaXX@;`}s@P9=uLVo36EJ_}@>hBgMA9VXIXxbJ3m-jQ_|00e6nI{>%HncwONCiVEfbispJ)yB>d& zz<$UO{v)&BV=(1e*W+)J*bi&2$KU*KiNCqZ|Mec$&q4mLhT?K6abFqw0m?SP|7DKf zVZFrKz%_la|H~ZT!}6oQz;T-3|1!q`RiU2!J*=H^E{6Z>T5&-vpZr17{9oqyA=Y!$ z2CnIY{a@zzB9@=@MbrFW<~SrD>ZQIX{87z`M`FF1A2iMXWsX~7z1g+mcukF8Vx75P zG}Hg}-ZlJRA2h@NRV(~oq2i;?qK&yuvjE;CIFB{|b)PV)-3^6UVuh|0_7Ai(|6#g7~h`{}n36i{*EJ zi}J$H)u{g~I2Mfc_Wm*OyJGf#4Tn#}94p51`~QaHgz$d_$CPm#R$Z_}G&i=4^$va; z+W!?CgT{J?|7eM5ZY&z>9r;^m|5tF#+F8`|T}X;(ZtNQC)qI;2(cBm|);smbq=;F# zKFx_`qyMWZ@oZ-Q7ms!NZ%Gj)i2d^ZxwOIfuTXw2#)YNB4r9W2|7WZii( zL!0O;W1LuU{1^M`T7IqII52bD%>J_A_%HU^wftK~d{{Z!$at{eII-aPFZSQH{98tR z80|40EI3Z=di^J^l;+nd|Xidfb*d?&o^^*L!FA9^xkY@2H?^|6=@54?M%aRy>zE zF02aY!#FU`^$fq6IZlk{(@^{uvBu)R z=x4f?KdZ4gF!R_l`^_4S{|e#H3XTiocSVl_!}-u}791zmSo~Kge^z60VCM5N`^_4Q z{|Xhy6&xSNvCH_cP;p|7#eea5QP+s$3XTus@fiOVDo*TQHU7&N)^Pk+DE}AZ!lKc) zj0xj?oc=GyiJ9ZSLixYUabO&G4ei}yj{gef|6*Ji+r*eKwu$~P#)+BZze4%H%yD4m zHktij=J>Br{x8Obu}zE#W1HyzVw{*c{wtLK%Nz%0Zj;&nbv^#8Y5p(9h1qdE^q4SQ z5Bk3tCuWZS3g!PY$AOuzhuQySj{gef|6*Ji+r*eKjuHC57$;_q{|e>*GRJ|L+hq2C z1;l@a@P9EbjBV0m!q6uAzZfS*|Cc%bE0q5$I1bF*HnaaLIQ}b?|I3IEOGO(Q4;CCJ z799T-%Kv4=htVG6!Ghz&uE&1~|JU{Suj}z&!Tv9PXRgP8UEhCc4E;WjUf+Ld3j5*t z>-#UIu^+JiQiJhdP4s^;F02yw7d;LP^AN-Tb*=rISf24;P4Ry<6bHsRN@M=7hT^~I zTM6a=GRK8+P0u(m&h-rc*R}SKVtK}YHO2qcP#l=~e1iR74aI*o#s6iF5A&gYjQ^rP zsyX{su^!{Un&SU5$A_^VW59U4Q2Sh+McH)3e>K_vg?O+={a=XxYO?38UGc^|J7I=nE8Cn{;$U3ze4%Hg5$$Db{YRQ8t2iRJ;+#(@n50* zU%~NVtjGATWE`(KdzSHg^S?d*OI1Yt*J!`P>w&YJzo$^*z@FPBVb!&2S#|N{J<#;~ zd%4^@sY33Zv_J~s9a0?;m0ui6o&tyF~5W1PiV}K(AfK9*LrWPx$lcL_dT(HhWEo7dM~W0?}J@` z4-ESjJ^kOa4-)re=Uk#3?{<(|@ZJWw4fAkv2i~vX-{wlZFQNZU$NX3STa9@wxgYN< z_`oW>e?T6@e3Cqj`5}1(^S!g64|6%P2JZ7hD(h)ZoO9<{N32(PvOU~~5! zZ0>%8{|x&KUhD5@sC@;m?_@OM;`_w5vU-<~G#+jHc7Ta))~#8Fk@e!F*?yl>Bu z_iatyw^QVOdz!p&&yn|S#02U0ZN&TN_w8x&zKyt@>wib}7^jZ|{*F53^~jOeW3jv* zE9CWX%IlFMug79}Jyyu;;gr`SM_!M`@_MY0*TX5VM~=K6i{ajQkM(ghr`;FGi{YLAt-)JTFYp5J&RelKWx(E9>uYmjRw<`O<{SR7|SHaiPMA?0ZQa!I8n(o4j?sHl%_c^VX`<&LxeNO9_h<#4$?UIF#-=aA=4{>-pe7sprE3 zWuA`?to3-=ZeO^UY;!8y%Y9wz<-V@NRu*>u1f!&_34(##x*zU9FXSVzEaBq~iO?4Y(O|?Ur zaQ3_Rt;%zU|`#1Zj{37Of06GmnX9h{jlctpmq zdBZakhiAkO8$4`Y#)!=LgzOQ6NA$nk?5jSgx}>eLZEv8eqm-BL4wT<45g2N@N@Q-DY-6L%-Xs#O37|XDM-+EAsHG(@Na@yd`liXOT;h z|5ILEsKgcKXF7|WN?dW)@?yQTb5TwP#96t8`Nj`{D{&e5i}<753V*@6*f|e$p|l7?sQef|G|BjwkeD>ikWLtK`QxxF(!^w=Y<%)a<72|$VV6IS zNVxn_8#$!uKN;QhlKgS$wB$Kcw8<%A90XpAmgG3T+ag(v%EkxD-nSbHLF0RGNLiQn z5~G@Ne=+Eg1$lqrvD(SJao>qR{?C4YzER(}zZmQ-%Nu&5z7#9n@qxAty>UM>7%dAL zdZYaYd3wC#8hYdYXVAF+Ni+I67@x_0NIWm2zP%@<%2mHtE~JnfA0!8XtZ&?(FaQ2| zFHxWp8&?PXV`0<3Vff6tJO}~KR>;H`VyU~ALq#W@V`ZY1?cX?YVx;!iR z{pE&B@1Hur!X1qH^$GykQr?^ g*N=Tp Date: Wed, 17 Dec 2025 17:20:23 +0800 Subject: [PATCH 2/4] update --- hsa/gfx950/fmha_v3_fwd/fwd_hd192x128_bf16.co | Bin 40824 -> 40824 bytes .../fmha_v3_fwd/fwd_hd192x128_bf16_causal.co | Bin 46520 -> 46520 bytes 2 files changed, 0 insertions(+), 0 deletions(-) diff --git a/hsa/gfx950/fmha_v3_fwd/fwd_hd192x128_bf16.co b/hsa/gfx950/fmha_v3_fwd/fwd_hd192x128_bf16.co index 82ae7f0fddf05bec08f0541bc75503abde202795..47a00d6fcc62827ed314fc89100fc8bbc0859621 100755 GIT binary patch delta 20 ccmeydkLkxgrVR=ztjr9}` Date: Wed, 17 Dec 2025 18:16:22 +0800 Subject: [PATCH 3/4] update2 --- .../fmha_v3_fwd/fwd_hd192_hd128_bf16.co | Bin 40824 -> 40824 bytes .../fwd_hd192_hd128_bf16_causal.co | Bin 46768 -> 46520 bytes .../fwd_hd192_hd128_bf16_causal_group.co | Bin 46944 -> 46640 bytes .../fmha_v3_fwd/fwd_hd192_hd128_bf16_group.co | Bin 40952 -> 40952 bytes hsa/gfx950/fmha_v3_fwd/fwd_hd192x128_bf16.co | Bin 40824 -> 0 bytes .../fmha_v3_fwd/fwd_hd192x128_bf16_causal.co | Bin 46520 -> 0 bytes .../fwd_hd192x128_bf16_causal_group.co | Bin 46640 -> 0 bytes .../fmha_v3_fwd/fwd_hd192x128_bf16_group.co | Bin 40952 -> 0 bytes 8 files changed, 0 insertions(+), 0 deletions(-) delete mode 100755 hsa/gfx950/fmha_v3_fwd/fwd_hd192x128_bf16.co delete mode 100755 hsa/gfx950/fmha_v3_fwd/fwd_hd192x128_bf16_causal.co delete mode 100755 hsa/gfx950/fmha_v3_fwd/fwd_hd192x128_bf16_causal_group.co delete mode 100755 hsa/gfx950/fmha_v3_fwd/fwd_hd192x128_bf16_group.co diff --git a/hsa/gfx950/fmha_v3_fwd/fwd_hd192_hd128_bf16.co b/hsa/gfx950/fmha_v3_fwd/fwd_hd192_hd128_bf16.co index 482d44bd2122541ccd6670eb6f2a7bd15c188626..47a00d6fcc62827ed314fc89100fc8bbc0859621 100755 GIT binary patch delta 1142 zcma)*-%Aux6vywqyXKA*HI?jcS~}&r`jRuVKV}vJSuxuMk)#JJi2XILO7=T3ddOYd zdWuL-!TMu21EZ*yLL#5G9_*o)&|WjJ`U4_J5Q6H?&YiVQ3NPIGo^wBE&i!@nT7j$; z$eqQYIE0GLRJ6nJF_kT}i&?`L=galH;i5(0v7O%#1p(|9HjJ_y>;I;4ALpnuQaRuo z-i0l5739dingfxOUB1}B<=%_3CwZnn7D*0kk$z1I$X8&XzNe<~Km22T4@4)da7fsV zUC=GKM|e5|AqeT4T4;OuQ0V$jEY#M!-Y6=vqGNvkJ%c2xk$6(_A4Z#qIe1#zYNph6 z?uXzaH5PQeK+tHxiZTUDMb)*D*SB8PrfY9vlmbt~i*01Vg1(>|?>}#+tAo{sW7VWN zJRS;%QX!iymEyQjt1mQ8;FQab2@JWO(t{HkADyN0_HLgAJ3z|TNwb16a}`@aC)3rR zzs~7qlyNH?&plqJtfoMQ8I&y!jeht-r(Q0)AC1&Z>GRtdFr@P=F_$Gx=&2Ea1M5z4j7wO zIP-F&utdEKV;SIc3U3yMsh!Y0Q4L>n91Q1TL>ywSlPf)Y;PcAP8TNo@vc{1{E8#FFR%PZ$(umVJ3#AeasBa`&bxS_A#?^ z)zu&9nQX1Hm6!PlGs9zs)=Q80fv%eTS0$K{d2*ntuq3mI=OhnLj|LABk%k6Q(I#&l zk4Y>Z9+Q|S*Q%~&WZJB$wmc6=U7V86%&0Kga)!Lo8bydNxS*a1fYM+8{m%z6GobP; zQ1KqciH%~D7t9b~GBTLFX-2W8g;8Q^ib<-OWs12$QfiuUvV~=`u|=X$YN~~4vPH6q zp^>FYN?Nj|=H!KpQow*_VP#-oXONg&I8$6Q!WC*NloAKi>7}SZ~IVlbDp66K`N*31b>sSh~VQ-3;MOQ-r966HKjvi#d#GXk-j$nz_N5j^?IN z!wg+aEYMSoF-i(2E@4f6FgpU0%2_7c%#mS~nH)JspHl%EvkH?Z&N1hN#dF5wi$Gov zRIX>T;9PS-SQ>%R0h1LKg(ttA%RRY#ZX%OLDU>VC3D5kK1?QOy&46a(8PK93gMISt nxw4bX=Secom^^WwJ)_6ui$HS2WWo9NoC>ocL4OHmHX{Q7sC^A? diff --git a/hsa/gfx950/fmha_v3_fwd/fwd_hd192_hd128_bf16_causal.co b/hsa/gfx950/fmha_v3_fwd/fwd_hd192_hd128_bf16_causal.co index 5f994f73b1698b07eb97b602421994eec699a740..30e473596989e1bdb941733bdb20414dfd344432 100755 GIT binary patch delta 1390 zcmb7^T}TvB6vywqv$i{;=+=ssm9DtCTH(yByR%Conjg(9LPM`V7HE$I=R60BNVPnnwNoXBLseOJz~+}G2)y$YV*e8iG2g{`&prl%B?@Z=-w? z@JtoqLr^nwraBrFxgf9f2z)47Cn!-tjQYC*N-z-R{DQwLDk>b`BMM?Q+`*f)H|W^- zs*d)dl9yFGx&=609Jf8Px)szN3|sk5%9RIJ9;DoM$;z84zuRu*MrlWNku29=rV)p- zVn@n(lNhQ{K6%Z`vs;Di=;>W6@2B&hJhbv5%JY_1avwrp^_n=4`Ei=|xR$JE>osU&5p;#I@H(M)!i52xPVAgfp6wb>TfkuzLgJA$_qi0a)DNpa=W<}a{||5Qa;e+ zH)*qIUdFezaXPKgRu+V2ANt=D z-XzF#qSHLLrglJTE{`4TcZ4KL;!ih?tS daRVEguSo%HS=dfqr6Ih~z&6er#!qdHvcHxhB>eyY delta 1689 zcmbW1O>7%Q6vt<0HzXSoz-g^Gw4rrWCj^{kcjNdYiAwFn?W`-cRB&iPitE}=gD9c} zL?fv1tW(EGEhO+N?`+bgs0W@j z`+xI$Z{F_A&dlns@cM6X{&6T(7M{2oE?xu;vlKjH_BkpK&{kYB`^9hVddBR6W5mIA zIPS?JA+Fbb4`nsB#%kOt(YwnmQVhG`wD(DEpTpP&hyuqJE1q2aBd36@ez&U=MpXF7atnB%wjsKblzkuBLKR-;r zoGf^r>F4{@0LTwa}=11R!B-eA9 zgO#q{%QIA#KYpIia3HecsUw`BlSg?phqA4DHWJBs>S}|j2GiCtf>~VeA1+{;SC>W_ zs_l%;#xy~12RXgRb5qLzbw$mq^H4=vk9HGNe-0UQRZ$LMLC1*daIk7ResUpCxn78e*_Bxjd;p(2qFmR{`(Dv5N;m-dHJ0qJCVaxfVTB?p5EC7liq zC4)(ytb}~2$z-UHe8fFUo-Va?R(-4vlLs5x3@|xc>d2h%+oRnqy0K>G{ftk=>^#Ug z`ozx17~eQ==Z6`8+jNZIXxj0HGy_-MBBrJoN3Yv?W2rzhn1093-(dO0D|Vh^{7U(5 zo&?5`qt*=Wd~tUGfDakJP_y%^jKB9U{*ZAXVGFQsA#2nP5_WfRKa#cU^BMm*IGs3| zJ{^}NzqIL&2brtLk+4OF#w}Nl$_k^h60oS`k6KiUNPa6p8n>wI4@E6H=5Gv((l^To zq9KMAB{H_@M&&Kn|G(}PXL*8NW&$~1-pP5%`{e;qVmnzP-<1QRwXdg%qY@AoS!|IU ztpps_nXq(%giB5`P!-8aB>{^US+`TPR<}rwRs#;#EpwG!B~v%HhE+R7iV{4nn3`6@SAOO|J z1Y{DU52}B%Kck=$VLK)_F{W{v$b&6nFq!<3QJ7J7Gb@w3oU}oFl%Hv0W=U$1u~}Mf zMq+$gc}jdnilL=Z#pH*IVw39?{G^TXtC^%2FmVFQgS|U>;WiN|yoM?YV0Dd_k~q++&5lZ+8AVk8dp6VqP4sN{==W$m z(cr<-(lFUXs=Uf5|dJM;tdQe44_OSBNsOq)6fO3z|;-KG;}pKg(@&K zvV@5mm>a-E4V;WDVG0b~V4{X*&W12iXA6in#hiE}0|RFUauVak&H0QblO4DG2c|5? zt@TVT-9Soi@`J4cTreMZtegC6tC`>rXy*U14iW?$lO4CqPfpk-!SrL@$ZuYOWoAcM~Y&^M>Lv{&NNok zU?|*86XU`K#KeR`OM{~6=nmMpbz{QP1@RFR!*0Btxf5Dqg7GGI{&T)_&OMWpJF~h+ z?|!3`7J4_kaArAXnFGqw6jZA|h2VnTiwK^L6|58Wl(Kw|y9o+-P zsmO7@P3^^_pHwYn%$C!mtu37Y@|Ytqc_)0!#z(G41OCXpkUtu-x;Xqq)uuW$V;F03 z9Ff(CWW#xM!Pr3A(WLQoBW+MxT`5cfJWbmBU!D71hRrf`Ggps3q|2L+IIs>)XHL}o z^^Ll-1EyaTg6;cN6o4C0g!2q!r2E-nv-?Sm0-g=C422l(24~?sj8{r=k0)W4y$bUd zn7AcC=Aj^W_bv)JPiEbUip{=9K9P}XKtk#;Z;LVD-SEzOlZ+RN9pbit?Rn^BBMHuUGD>qSs!ZSRxNw+v1o@!d%DL>J zNYK@ZUQ$hXpt_jBfe$WPCl$K_?AePTHtPvD;LgR%MEI_=m;%sEcyzOr^Mrr(DMn%5 z>Kas}0&Kn3h^k|RM`ud;CBk*%r94D<%3sQF5WaP{nB)ErNMQe2aR7h>;g+RRo+P|I zTgsmkKAkV+%Y-wBCbw4LU7!Lq9155V*Pqq_?b}!C@#8(ZXsoBcqt3O37!Rfb6X7X8 z%l3GP$=TQgI-;4Jb!b4#b&P1Hb=bxcim!x5*&bG-tcP=I3G9eQtxnIdLdU24)3k~wZ@P%~|;^}uw3|NnHG&{F0a#Up)I#@AiH9-}RDSX$BQdUd5XB}*FZtUL87 z^21D})v-l!VjE&}4Rk7ve7U|;a8(WLD?b?ks<2y&UgWx{4BE-{>oRKiEYj!N{s6Zk BLHz&# diff --git a/hsa/gfx950/fmha_v3_fwd/fwd_hd192_hd128_bf16_group.co b/hsa/gfx950/fmha_v3_fwd/fwd_hd192_hd128_bf16_group.co index 794fd00158131ef67e9b9c08e9549e4ea7f0c92d..bebe7e9b54368c2e50171176151b1439acc9da47 100755 GIT binary patch delta 380 zcmeydpXtYbrVTF`IaTBtz(8g4M@C^r{>`jR?sC$a@lk%JiJ2v-MMjosxfzM^Y2_*L z87YRAMirAY6=J1z@W?+_h@CirWwL{!1f%w3!P(-I3ls%p8yFmbwsbHs0?BCw=kIK8 z(ve{XvO$<(@) zGns*G5C#E|I1`wKMBugKyrQHqHrM@BblL2vG`);5X7bG$?o37ola*!`Yg!m3rly#r znpvip8ziNs87Es#*=E7nT#Q(4!{v&wIdQ-cV41vUjtry5>wf_A}E?PxzLf2gd|+W?k0qb z$Uuy!h=>_vltI?X2q-EGEfWM4ToRobZ@a_AapUaFI`hr!T)y3Jvj2abQ*gRX=v;Qc z=b2slc}{*^?|aT$r%s)&ob%ROr~H$rO>yYDD@qd|Lp!JG@;l*@-~ara_;PnNWocR$ z{OzfA*E&JWvM!CM6|fHOvOA8B&7kCPPkW}PZocH}% zFDDh}=a=P`Yd=UTD_E7+<&~smp2DShS&Isaa{o?b&04TDKR>T@ohb5Ri6_@C)TNn1 zdl#(CvP{^P%2$@;y;v}EM0gc{XjQSesBq;KYdF*1S+8bx7O{r0%@q2tuHl__4da{H zS;QJ9HdE-ox`uP?8YVZhvxqfJYo<`EYuL(;_~NvCCfk*CHM75nm2@{#sKrXkuKZG2 zj;GL5YVUW3FcD>UT>b`_fHF8beXyej4tLbRk#-n(X;GGXJv<$3hmDY~P}fldr#fojbUO^}S(K$* z63({6LP#Tg+))Fcbkx8X?J$5wP%aAR+F>E25x(rGfyRy+_;ouByu7$uu=0F zy;nO7seR&%er<;>#CF)ijqR}T%92G{UOF3c3l?AQ9`Wt46xtpGI&2}a!xo0N!@|C0 z%```HI}C+3M{0*Hq;=TB_;y&>w?s5Y(Nd*%xY}VTv^g?5Y{A`O3)9+R;iY9EqpLIe zjCL3aZHn0)wlKHD7Us9Z!k%Ty24UyYn3*HV}C~SuXG{ogm))~E|9VS8> zqP)WvR&?0H>ULOosWj}z5om{z(59&Du!W5swy?P!7WR~e{5Y!GVIs64wsqLT_6}Rv z*$xY62=&KN(+(4%4Y8-g7WQ`7!v3~cXy1L4y(Q(PSxYcZy*wtX6?<*lEuJ&_;bw}p z=oarlWfz51HrUKwBi;l!qn~P~Sc{dlZv$Xr6Y^_2D~z~hun<>^}rQb|i zSX#Wa1XIX`i}Q-gv+@g8DrTBl8z~Jz4q7bF;j;DPu>hNEwlq zwIDxb^yIgbN=gfsdCK!H?e}uUw1Y{dc}s9RImG^x*nzIUl2o>^q%M9yE)~Y1{3vO;XIWl;X)&6GN9VHdcQ4B;Eh{K4syS>1m(-CyBBXr09Ko^)zJy=3L7@)of_D4Nf!|gtu zg0)}(404LItEw6=RU^_JVuS=)V^`dZsK3REp{taJA| zf1vZJH5cxv^K-kY?nJl0Hu>+q5L4&BsI6;o*BRPHZGFRqlZJNTl!Y^ zphoY0uxL%)dj*}}f9BE7@1I;u8yDU?q5tya2l|@2la`#vc>&u7_{-?&J~E&kJ{r*8 zJlZsDTy+3V+mw8p#i(1{)s7*V)$UApz}=-wAkejIMbs$BQl;$LYXgDre5c~hq3-G^ zH{?SztGQ3)U*nFA4Fvl3t%w@#t{$8u^LOta2=wSt5j6s`6e;V~D-h`2yCR?+MY|qD zJzzmQj_h{7p^%rhwzL|3b9OcQJZ`@4{r!reB{p(e3LKA6;kMI;ncCR+oTw z8Jzg;$sTC8gv3+t^{7GGpw$hotUEC<8vUUH$2G}~%n&#I2E8N1$o->Og!i=z8|_TF2&ap^oWD8Awx* z+(=WArXfv7nt?P6X*SXvq`648NG5x%O?T zeJF44_$Y6#ew6=r+8-3?cI=Dm*ByHSeT{vv34M_LgZ;1xeUSZW9r|Zg-H9F-PDXpb zjE?f++U8~dS{vJ-3hDD zbAK$q2HTMJz_q4FFYNP%6JxwLoE+oDZzHT;ClbQ*1V)ecHjPg62I8@=v~dtmg8hppZntc2zJFyPw;Xq!ryEK@~nIJ ztNsHoo;X<`e$OWcUp#T5U>NLg(0UH4?Add`1^i~$u+4ZquW(ndjh?!G>YB{0 znZq)xGdCjf_l@6Y74`UCH}w>Kzp0n#t4+@DT79A|=>mHhl zYgPYho`58lY~U_X{eGsLqzcr(j`16Up$#PVPw%Y(PDJUEo)!K9K)$MM(CZ$MaVyk;@g zC76~iI9>y9#BhdsFoSx~O+7e``Z!ISK|MH|dT=iF;C#(t+0Bj@%ykKRvjq#Ohn*7Y z!E)-s71V>PsfV2a^U%Io)avj9(GEo2g|7kS5ObGrXF?z)Pt4OgBz&_ zFZC-Ij!*4Z+)nN4h_m|@x6^t%;_QCK?eTp>^{xb=XTPF8V@Rmpog(z?SJY1%8>*jS z3O)N3^|LcW^>b$mJ^K~)^XFaG&%ieORexR^agAg>`)E;PDbMYiZXE|M_8Panz05`-bXE5`_LcQeQqKRKFrc=)WuVtH*}w17><0_fwfExN)Z7=6UIH?DqrY_4_)M zXZx~VtZ!`CcHgk=>y_=>#*SSx!HhZeqt!S6zR8vQzI*-XZiGrT#H+0OgrZ9e#%T^LGmUCaHfM97OqD zPJJ+Wx6oHf{S#m!#^PD=|D)r|V3jJQG{~?%4d4W?O zMJ^KheNz7lIGXasPJIknEcCTf|6?$X@+D4vELkS>uSxv@a2(}Jo%(I$a-lyg^>2dX zDPQT-ZzmrS`nRP1ZEynRk2>{oU-Q){Gul9o(lz-o;&m>ZOUhhoae23tGI|T>bEtq(Z z;4SwF4t+o{DN8UpM{vYK!PG^9ql*R8$^^$P7aaeH-~_**Ypr1V2EmNS1*dEkbU!IL z^%=ow&k0U{L2$;4g0o&0oc)U6oF56!Js^1h8-nxS7JTR!Xnf(v)z!!Jn?qmt|Mm0F zucN==H%5a${I8aNgl3yuTFfwzITf#bpP;O*e;-~@02I1!u(xw^!Rg@L;N9R1a0WON zoC(eXXMy*C_kgp(+2Fn4z2F>h4tO7UA2=7B3(f=Qf%k*=gAafYfb+rm;Dg|U;6va; zU>2AK#+xpm@6ud>em*g-<9PoDP5YahFIZzBTmu5y?Qcw&IAP803A7oDR z4K}A+a?DpO8*!ReOB>)p+5ivJ26!aVoZ(~rF_#IclR5_;V$QPiV15(FaWal2$Z;}Q z$8j=visNMP^eyIWD=+3gR-HI!xYeBF8*0wAD7ne9t47x+e)xt0ub zsko75zAx4E`bL=#TQbb2;zpZ=zA5mWSCRMjWtVsE=$~-R3Ue470|PVa^pd z)7Ip%&V@Alq~es({#=QsBDedeoHo}GPRueVh$P3vQET%fPT@qrsGPQdlY%3G6w zyy*X&2hB5I^WASAwB&n+IQqFnom?(ar}UtxQ$|qKY03lU>sH>qDWP@ZoXM`!eDkoC zXa88(6LoUCM4hGvMV+PvMV+QUXuf6Ttu;lRIJbJpJmSkTk6QABnWCL;;?xLr0%x(E zz}ajkaL!lt_1zN8j}5l#;hCZJ;#|zG*UdlusnH*a`HsvJ?S=Al*cUGz94uq+5}wuj`s_wH?Z3v+ckIY&$TQZ3pHLMc!dD|Eb>T z{gUu~I8riF3epIqkw~<6x-aaZ?NH9kwgVq#+ku5_JFqAfc}L0oXA{!<-=Sx0B{#>-*mznAPeuc5xSEkwR zt4Wi({fc9?pZ=rOwv98>`~7Quz44Xtp+6b*cdd13DPZ~r2j*|yhK$D@xGnB&n6lNO zaemGJeW}=XKk3j$fm5GxV4mh}nD(4Q8v{;%!GU?1w_(PM4s9$r>t&P!XTRdmZUg81 z$bos6w_)x9hju%7{~Hd>v%C%S-*#vd!H14HwB4>BfArO4BX$p&X~e!n-ett@C1)D3 z`^bBZ*!|=@BlcDDK_j-7^cb7_o1WOO4ng*Zs zyhWXNE};B`Q_mq!3cWh-%%%K2r=CZCAoS|IGoSJ^PJJQy4??fbJH3?GJM{wcBcWI4 zoexv~Q>VU&{JGGp^UgxbKXvN3Nn3yZbD>w~okf)Yqf;*?e6vx^4~f2mE`Y* zUY&QYqWs^S`Xl6@gkGI@uBQAiPW@5x--TYCcls#*e@@*`Hnr@32vD#0(edM~KX2Ud zEk9HR<&R|Gul9rWl=tYPZy;lYUhM}PDev7!e~j!S^lCrY zMEQ66=#P`v3BB46HdEfWkN!RK2BBB`!4}Hn`siE9n}lBN2UV0O^wFOn`wP9=4{$@i z{`|l``jg~fp;!CCHp++e(Vrr36?(NFJWctqKKe7{aG_WG!FI}1`smM+BZXe=2RkSq z)klAh93%86pPh?X%@EyrdxcP{*3&85aqldiE?Q(D|&J) zV*69+-?g`y|KXpD{Dq&3{Nv|gKlfVpwrlrU+!1)t;?6*p#a%2{^b1k0=0vYZCbb#v4|>rp?2C#PRHPiR0N96vwkaD30e#v^_!yq8e|c?sl8z$IWQSPGVbWitPs z0^}F>RAG74=@pGUQu^e9OV*GXMUS*bnZl z-hlZ;yx_O)*{(qT70AC5TnVxt)_NQcfUCjP;G^K9pbzwce$WrH+`$6$ zU+f3%*Wx|k0P+Wre+{@smOH$}5mSNf3T&?h*MgN`CAbb;2d)R#gB!pNAoq7E z!uAv36JRx14L%7z32p1wNqWrF*nS$@&w$T>+rjPNv*5Gf4sZwf9QYi_ zdtW>8eJ8#@4?Yj>0(XHgfG>dAW)Sn+%f2s;Te-i}EAboc8S4Lvhg|i4dCcR!1!kR( za^71Hz^8$9&hy%lq#3TsYkSm1~}|^6*|f?V%sE=>N(y-?#GG_vz{X$~RB@ z7MdSga^A0}4V>E+{a;@5td*Dd^3^)2{;vYF!OFw?`vLfraEvYbzaBO}w({Ed{^|c( zWd6)oXntbJ83RBYIIb4`UyIGpth|g5Q0t`nzlzK+tUQbvpgkO8i~g@-^PhalI7jq< zm6+#zOUw(tQu9ko##n+f^DAGu+2~tp{>qXuo?w~zYu|G7Uwtdg-&!)p6s$D=k8hRv z2j3&+A1xW<3Ratc_C0F;U!TwX4@<__0>AlJU%=G-YfJ~`p>KS|_<{;E%D>j^KtTV6iuQ$8Iljb15>i=?~4yylao0;f;%Dlx-Is9K}I%^EmqW|k@ zbEuUU{;v>!B;!Wx^Ye_EWaWYXD=w1%YrC0j<%R#t@{!X2^{hF>Gzx2(I|OC)%E6(>`36H<4sN;0&{lv|{S9p;SUuR#R)O1YHMB>- zr-vH|Nb@#qA8BZhf;+|-7+8B7cHU-aesI@BV^L+Qhd}|)Uoi>$o{YJII3^?zs`%_3H@J@VytSg&uw2LKm0i{|b-kVxO!zFMccZe?^M%V*Wi}Bfs!- zwdnr}j|F47y}t|nt*HL5(eR0=v0}`>|1a221pilfOd0!O?RiH`TVu;u?%>yv{a@iR zXe@X5caE60#-g#@k-tRte}%`aokh9e`Sh5!#;&nk-Ph?cZH-}Lxl_MOkC}t>)0S8^ z`oCHe&!+mnxUbWHNsn2B*ss7JE1He}isa{FTv#^jFeZ%Gf5wV2PE3veisai;%^Ea+M%zEabjxxS0vw-8V9Du{g6gSpf{1@Yg`r#h_mEyV7xUgCr597c% z*0cR)YMdC4r@8nq#uH&bSB&RsE)GmRo^ZcebMasFH)5=G#W*fCJ}ijiVf+{CeWf@t z>YI!I;(5aUT8!mlIW<0v`(q3k0*2s?J)i;Qk+<;;=dyJv%=%T_*=2!z|gnoHw%vwYbpMVex@t=vs#J+ zQ~Q?cH)}EeD}p~OJT8pC6)O%5$3wqac$`>E@n4brSuMqZsmG)G&0328iWJ8c9v{ZO z%lNNIabhjSe{p|tSBT>Zj}PPi82=S1PV9ec{8uol`S`C${x8Ob#iMN*6UOT}{a=g| zQ{%rP`M=aSF!sCV*6vZ`zasg+7#GGmF(!<4qW_C=Vru+XB>$Hh2d36Z^?#}HUy=M@ zj0A^na=GUy=M@;c;MU-BkZqc>Gr+|Cb#fHUo8JJXm;~Sa|$bB>$HkA4YqO z2MdoAyBhx`{9jk&zploAh5Ntwo4FeQb#?uv3H19sdUgG!>1>DRudct8#dg5@OU=fA zwbK8^xUd>rU#vJVj6-by*Ok_9Vt&SdwZ{L|TpSq3C@uNFnv4IUZzYodON|TToSt!D z9P8QsuPdz|#r%x_YK{M^xi~QOc*6Z(&BcGU#{Z?phXqkT#(&Wt)s}UuSdQ^ut?_@U z@nI~-7%=WH(mGdXkvALhU#<3kAs(zn{}^WuwZNF4 z@n4brUoFLfsmG)Gzgmj_isb(aj}K$tW&GE897kK$AY(bke?{_tg~x}n9OJ(-u)nsf zS;pVZ|Md7TT@&$NwA@4JE>Z(owQA^ zo%FQy7i^bnA?=WBA?=iFA?=d${~Ec@(QdiU(H^a}uBqSxe_LN+>s6KnBK69u( zYp6bBs6Jb$K2xYZOQ=3Ws6IQWJ~L=P7f62({W$c~*w5wB-$FkM{Ur8takjt1_G8$7 z3fuqClHbAhC$!{8XzBT}D?K;X*5}3A`kdI`!t-IxJr~y6=fSQ%2ZnWvp8D@u2Z?L4 zb1YGXXFJGkcy5E-j&V4-6VF%he{&6hrKf?zG4KOhfcJV_qL z_>eq;@!nZ5h_M`5hjAKt3S%hi84l{*Ojl zSMch3g8v`>kDBtjohh%|Gv#%Ap1f|G^16*Us#;uc_s*2p?RoOLZOZF*ro3*?l-KQf z^16+fAnUq~cpvM!JyTw{5w~;o|ELw?^kK;VQI9+y1@e3>k>_KjJRcr;J__XdSR&8I zN_jp!@_ZD?^RYypkCpO#c;xvgkmqBGJRd95V-op}y~MV?V{a1TF!}zy3o|usl6=Qr zBHpo=*vvcjC=Y$d-g@gDdy_^^s#f21#rJ$w;XPk`=hIfaYilIlSH<@|Mf2T*HTFB_ zx_J$}=PQ8u^b1;WusirpK{Rw^Mev=07qsa5uX|wO=1K4R-CFwDMjdY!#aN|r@>#z{ z_>3F%1pOQMD=tpl*m-0X%3jmm+lV#lYq0KSqiMu)z0pRw-e{v-Z?sXaH`*xI8*RK- ztT);y*BfobdZRU1uc2m=p?x3ReFxTU0QcN!XfJ{<-OVLBG~ez+F8_;%(*1iO+k20p z{Se%DpP}so_dj50uYj*+iM;y`&G5f+XqJEP0hj-W2h#of4rKUWIWPt9d0Txhx+Xe6 zcG?>q;5w&`a-Gvgxz1^$T<5e=u5;Qb*EwxmD%Lq|lxVkZym_>zkMLz?_#}cqY;OLdLNDsaDCNAxxQ+nTwk?OuCLlC*H>+n>#H`( zc}1gKU$qhIs|K;p;95g_2Ry#P(2j$3j~jR$oVVfKt)ec+4+Z=`IaJ|41=*=54edSf z{bvmAeem>ihV}vY;R_<~`-iIh9~`RopE?lmzjvU*|NenW{|5)w`va_ZFgieXIu#w@ zx~`3KUDrmru4|)Q*R@fu>)I&Sb!|K()^%-^>$)~#UDvbN=h+txtsZQ6+0Yumk6$sg zkHDY(NYtg_P>uhiL%aQ-K=#Q2L;Do`>3r(+`TXNPM2pC5Y7|H*+G z|EC9b`#(Fd$N%|(m;6E2`z+d-_5LI}5Etlltdq9xm|L52_M4v=+Bdv^@{9hu)~n>) zxo+^4aZ7SxMY^jeDVy#kc%gsxeR#3Dk zuXJ3zc~?gKv}qafDPxn8lak{ThZhzXEzB#$yOqnmeED!zS#egm7cVC+Em>TamE$SO zDqC7oQe0Y&965_T3-Quu=<~92^A;>!m{m}eU!0X+T$<%6EX*n{%PJ`?&Mzp;D;r)` znlrqxaM|KvkUX51Q*Ld<=cJ8EPD{znACWRPCv`+_&WJHNqZW+LO>)JZi+K1vz7K zlT-4?j2JWIQnkPJMYklKwfH@us_s&rJvmf{)kT>^UWi6?wxI9lw%3WE6cR_1O3yYQ}c|B!bP5!04yi`jnEzb3n zd$gqTycOkEZqMR^9EkG@ON;Fnb8AUC#f$lpY>n@gF83?|y(lfd32KF&1$l*8DU(vu z!{1pv~)Xfa>f|@Ys!d`m%fgka_Or%W{kbJj8wBVFLrKC z9=`lJD`Vbt^R7&B?nB$Zty}ERosk@NLHmnj>`k^p(Egp49*?POY6l;C)g|=w{C|KUU9$$JNd4EwN84Fd1e3)7JjKX)8 zG1>LE`_Ds=*>%U)OP|9##8>82 zvhJ)C%+%H{+@3tVQ)!0v%R@}bAMBsP21Pf^_i=kgLbsO*8b7D`2M5;(_aYV z?-pj0=DzO?b8q-!LH+*p>9ZeVto#kv@@#jBD`8nqaY0Ug$urx3l;A2}TJjYu*z`t% zE30&+E8pqL%E~DzIb59M%66_U&MVDbqfHJKHa4XZyu%X#ZMC&I)IKPJy`(GTYFHx(~A3+1}mG z_K0f~##(g@4?dW)qQu+*MQ!Ln-2tWTY`>Qeb6pA*xn8fwYP&KZRud? z3YQrOF4nZA1LfrC;_>!&aI(D}yw`>fUR~x?KxY&jc4lZ*l zA>ncxdI;JHpSHJy&)VC;S8eD3J3$EwSKH7-&`$Wey&W{Rw}aocp@Y|!mn?G@7L{HP z4Bxk*lc4?ZPwnmC-`d;3f3&58{E{4VA6&Aqw)CJx$4i!ocK6Vw-92<~Ll1|St#XE( zHD9vyYC}iroVaA^)9xOk+ug%mZRp|k70aA%5*xDfmfx5jv2Exncz+CRcMtLH?qPTv zdN{nQiQSRdhK_=F$LMzVkkalRQrpnO;T2+c6s%NchrJCQ1@DgZcK6_DcMmh$(8H^% zg3hikS!TDPli*!3uiZT?Xm<~b+t9Up+t34c#ErA8 zOO~QGbP>EGO55GTns)cFz70LRS{(A`@U@|n;9XJC?jE+ZyNB&<=;2^-(3_*O4P6B9 zh+XaOVNbhz*w=<0up`trM|B&z2;LC~+ug&VcK2|kH9fTLdy_*&rNzz_I8VKCPF5q< z+PbfJE?G`C(X3fnyd8sG7BpCY6Kf6oCBP-i`6ik*J6Q8;09@Dvz1uE}PFf`*2T!t3 z+ug%w?e5{L*7VS}<5pu%GAFL=vhxa--k5Ax#oAjt*&3T@*6i_X$DnDl{rRB3Z({8& zm~{WvM6+fG{dU6A;=+|hxP)A~Jg1=4nVYvJC)-Ja^~cQWV{h^=ck!+6^$M>1yrl&Z zC;rS%O)gp4$%N&uHO}08SE+M#VevBAIr-=soxbG#e3*05tT8Tp6al$_#RlAAQvndMqp;>w@;ZbDIU-YQpV&h^#b7#};HP@J;@-zo>K=2@)A zVtGBGWNA^cGplfAL22e|2_DvoUkk#-@9G!0~_{}gw?K9Il0A!*h#dl zcJtfsRXN2ad4&a4=2!FL3-plx_0I@)<=G@nwD?vl9my*BaY!%F4=5MX-ufTI4MruYY^(pg@Mti!L)ZJ9awC&ifqN z2H#y}cWtsTU+rCGj-4AEj!llTTWarO8Li)b~?&C)pmhz-iC~w+J+fB zwauuX;i#J7aQJ3qMASMoZDS8sUTY=$JV%v7bNC#aGd4LkXgjUvt(&YF2w+%OQ)N&* z>a(q<28|-_J~em*amT5_iNvZ?gDQx7PX*ZXr|8{vTi`Z+`~z$oU+x;{DwlWiK3~_a z0G8!c^p`uKde-By0za7R^y1NPw=RkTjjFLOjk`+R--mPd?qR1Hax^}BcX`Fivy zk4T0sN!oh#^7(rAF88rh*srx12YO(q@p}KMiHA>44p>jqz6cE7tnbt}>pM2GhPaA` zYdy5GRA1`m)E(Mp`;Nhb&X?I8_A>isdnL+_R7YxA>fngl)WNA6Qv(s$K5SEJ>P8}s zk4j{s(R)8l1!b;$SZrTXvBU^2t^J=<6R{WEyQes-##^{ls(wa(iQ$Ku-5 zvtFO|vNvF@_XYyqfX{l-+Yo8@T#KCKX_zXG*|%rTbo=(qC%0?&OsU#{{;}VO#GgIa z1IHyU{``d=)hL@;-4LvIP$cGCIriHW%w0!al>1s#Pxsd-TWB8k?B#a!vbi0$KJGxI zqpC{;wgL6n2aGw`f1F~y9=+O`S$%=^>wU4k;Op7b?W56K?HFSkN;=9k6bH%-l$j{A zP-dgdMVW^(A7uf`B9z4_k9n!zE^5EMddYq>;a`H1jgpJvMtK}1AEf}L2&EXM6lE34 z8kBV?>rp(s->z!EHk<6%2Y)$A1_;lzk}sdB3-) z{r2f2`>lrm0Lnp>S5Xe397Z{U@&-x`%2AZ#C~u;iM0p40J(N?t-=_9&bNh$vO>H09 zn;IY4|2z91?CVw=9lNpiWz032g8|GznjbU|1DJy}pJ@J9V*WRri*$b-8R14echmgZ z5Q*3sa8xnGdIxNg<%rd^Tqc$S5uMyVJLX)cE}WvDaouTM<#lKHoTv3s`!X!U)&sGu zM=z}N&NJiOcb*&P#(N`VTxa4!>-ffwbqB_#xP7r$SIPv=a*TOH-I*ra+R^{DGxzeo zx6pRoo9eER@4;$}=Ngu=FXGIx(K6x!-{wH-Bsc4iZAblb)Y*9U+#sf^uAMoPHv;`{W<3X2^z1pX0q@x|+Gq56QSPYP5IJMhj52&?jmW6V*n&duZ@fP% z$&dGSpr@GofnH*+25djzb2iW?n$wSZJ=o|goe_h2f?dEUuq)ULyantHb^~o-cQ6nc zS4I2R7Z>LX#P#?2Tw+^jd&{vcIPOEfK6frJ5&OZ6KeQb*Zgad2`kce~GZHc8N-^&0 z#2HnNx*_fvgX7(S!NbHj{O$`Yy6+-rjTMZD z7wlvg?BWo-B~!4wTd*g!$EQIrY7g3|J=llZgFm45U^KM{@1XYJUDO`zNA0m2VyQjY zpW1^1sXaKD+Jo`b9vnvP!Qs>%Oeng(9e?`rPR0^r8RuxbU`nQ7Dg*DrDIEF1bn=4^ z@`E$UAH&#e@`Llp4=x}-xR_aaznQUu*>*v9reHq#(N7Wi!BX;rYse3-CqMe}ksque zKe&bb;C4Fa>yP6e%ij7V(PF)Wj&~0nFIpbmwfrt~9?)`1@AA9I-&6Wi`v&{%al#)Z z{pmx4{f;Ez?%!k#`sZZ?`xnd+ew*|!UUr+H8Q_2Yy6K3(|feL((-j9~wk zIl@ox3-WJYc*AeLKYT7Ruc`${`vp_33Z{}D$Jb7NFrECMgZ$u3^5gid z%n^Q?SL9#3@P>ajmT6uMpxB5QN#negMT{qVcGon%9SHV}mfgM6_;&P>{`|hd{-QYH zzfJl}hX(uCBnkfyq<{VRV82gKi=p*YWC(7VBe;EGS`5wmfiiyIj`p;FX@ie{bo()xrkCJ`nyv;2_g~oAlpqVS`~G z4F3>ti0O}${s9&i4|_cPL&2e@e~|PKv9MvV4}kf-Y&h)0;U58xF#Ths zf1HISz@7mANN}X-*QI}og(bqC2!9foWcsH{|8xsWhCLa6NJ&>lnf^J_|Dc79hJ7^r zW56+{{~_sr*uuubJ{JCQ;5gIolKw0Uezn7{q`*HO9B=w_q<^V}O@Ms@{1d^6rhkR> zmsnUT?5XhI3*Kw`S4#hC3!4P{B={$Tli_c`^;koh1*Z<I^egjVHte(Ep99V@{mML;3;SI79|RvX{mML; z2m3tu@l%~E51D>t9?XY*KKu`Z51W2v9xQ-;0sITWg{EJb2a8}|1pg!8Bc@-O2a91} z4F99xqo!Y(2am!282nDq2|vZeNqR(Wq-A66V8jcW7mfyv&kG;44~h?EAjJnmxAFL3 zOu2){hjQuP08)G~rrLOXFs2Lt7U_Qqr1)TD*m!&}?i2p4(!UL)_+Z>`=A z{~3_tgE7a(_@K;IyvmnI>;~^W555~j7zeoCC04Y8g3vE0;7>@}5Ug>`kr1)Sw zYUA<2a0-95^#2f~_+YqfJU$p%!hb;eUjZpT7&$f`AB?5Ke@Oa&3{reB@@zal7|VqJ zu=KwUQhYF$+jx913WdK$`hN;id@xqncziHQg#W1Y9|I{q7%Oc&J{YTo|D^Q44N`nC z*4lV{FrEaZ1!+S?id{E~B#RsFz#^ZyrLG+`}1BwsEMjMY0#%AGH z=K;kB<0%`D55_j(SLXr62jgiQj}OL9;aBGY#RuaV8;=jhv%;^=1BwsEb2c6yjOT@4 zod*;jj2CP?J{T_wzd8>nJ{T|AcziHk7JhXeP<$|cXyft0ct!Ztc|h^O_>qmr2jey2 zSLXr62jj;!9v_U?g3gK@&f* z@T>EH;)C(FjmHP$UEx>f0mTR7XEq)mjM_VReE3%T!uxyn$(7hCZ+0g3pDsA?KEc5c z2*y7sIP77;;g1L=I0X~41e2Evj$S4>woovoL~z1t!PF-NCwT?!8wArf3#M-qoVHWY z@vPvC=LKiJC^+k7!P&0}&V5aA-s^(%ew*V><^?f#4u;5I7hd z3=RQ@fbn2FI20TT4g-gQ_kj0+!@=R;2yg_L049JV!I5AhmXe7n}<|2tEkT1LuJcfe(T6!TI3B;KSeoZ~?dwTnH`#7lDs}kARE8#o(jhqu^uU zW1tgsg0Z^YwG!d%9KBu3q(tOS=A{>wNQibD-|vb)a4F{;lJ z{=g%H^clP^bPax8M|!|ueWquKK8xF-Z}2{17^|T^z~j^hc#`@6-;390duaU7K7z)i zYGyuT^kT~~t_hw1ZpU1%kIOr$Z~qtEvY*B5X*G!xzj z#($am06(QZz|W`;@T(E}B3>7IO3;{8oi0IN%Oiz-&#FMONa~rgmm{EGJXSDA2jL{$GHs~)gWA%K`IK9A=q8D)+G?Ja*mZ(e-j}gB}x;rdN2T=$kz0 z`W9}3CKEGN-|CsBZ}&{scW@hYnHYy&>B-QmJTvrN+y-qX=01J5XQsZ#bHBcq+n~?H z%+mLH9?&V1PRN<}-Hryue>q#yQ>eOFg& zKgP~}iGPXs0+5*O>%uwOcMIp(`T7xFcTaC?KXaa%dWgCFu>J&@Gxuvz?(=0E8s3-zO(Mf!1WKRDFd&o0Jfw~H~Q`Nf#h{bEeh9?{?A zbq^&4kBKxEb4-i%lf2H6@#s&C$zc~`n&B5?n&}r~n)Rsu4zF9Ii!qTF^O*jg$Ely< z_Tw31Kiy4I3dRJ^rTqlXqx}TV|K`e-ZgKjj8tvDUbArc3+KoA`{=fKzHUNd{y|+;8 zFSK7k`wLt|`wLu5`wM(*AnFc6sYHE0j^kjw9)c2&G8APP$~`FLuj`t|_Z`}0(!K+i z(7pq+Y2Sgl!%=sHtbe|DTE7Im9*L5Ol7y0sG75$IyVw`~Vc(&hoAw>}IPE(ypY|PC zFdB8o$oiM#()x|X>v1S4DC1EkpiD%e{yrU=)-M&W??st}G8u)=IbxZ+K6Z+gQJv3{ z()z_=x&L%4uDjgz1MjoqmK*N+!4Ft*o#n2Nf6&SXfx{lQ;=0ORKl~BV?yK=>{f1yU z!D+?yl)FAL%gTm=$xE%cesb53US?(YfMW}-xL$JCr<90xSM{`hBd|PSwH4P%?)uax ztZXDW$!o>Xn8#=RrT4e?qo3G+p+ytZB-16QNjfBFNY0d;B{^GiuH-z)`H~AH7fCLb ze2n-jvH!l#NbC1ooU47K>)pPYGNs#Z>0Ip>f6e!8@KMwCJP694-1dF;9zrlek=q zK1p1mMZZH_sYSm>T&qQ&B0i}_`-x>*^l9QoExL~QlooxK__P*%p7@Lw{UPx=E&3wy z1ugnx;!9feW#SLD=z8LhwCGQXKh~muN&G7<`ZMBBwdgO1C$#9Vh;M7re?$D47R_}B z=#(^{f44BLUu*TsOkAgALdS4{E>q`-lzJER$|bNbfj_vH z>;>=_f`z7E(W#1HFM@vsxWe>LmFtIISqytI{3T$C=~r~BQrJu3UkR=>{fbVt3ieg- zuLf6}enqER1N$2I*Me(Je~w%~^vZRxuY>;y@Cnne=v3=rUl0G2;FG3b(WyMJd*Jti zUih2SD}C_$P!Dg`D`jRskvHZ+Iqc=|ZvZ!#eq|n1z+M6WMsTC)SLVSc*f+ty8Qg68 zm3goQ_AT%~1wLi^2i=$lTVdY{|2A-&=~w2#cG$PW|1|it=~w2#4%m0VzZ2YP`jvT5 z340~{&w$UE{;4@IMbeZ~B#aum|=% z@V@}QVES`z%!9qK?}h(G@I}+F%!7Tf?}Ps(@Fml)%!B=~?}z_o@MZX0s#j8cFy6QE z_+We>bPiRgqWEB(vGMp|oD+Ukr=s{^T(I%@V0=l@xk~~_*I>X;)C(mHXa|0UkksgQ&D^{ z3>%LR#x>zrbt;Mv#&2vqJ{W&1{Hjhx@xl1k#^ZzWcfzmgR1_bK-`jY6FuoIhRi~o( zVEn!!msL76d#O#w(H3r|_%ufZ~G@BjST`xA3d;fZ~G@C*p%K zK={>pK=HvCB;tcHMEKQtK=HvCD&m82kMOJWfZ~HOLc|ASr0}crfZ~IZB;tcHO8C`z zK=HvCBjSTGPWaV%K=HvCFXDqSQTWw)K=Hx2SHuTn@|~9T^16<`@2$qUMWSBbldhrY zJ6}`B%=|#;dP~j;W`Fc%XR13o19j8+HCQE^##!Z?$+^oni*q;WpSeF5?cAS;b}4hp zd(yRw`F$;Ye{3#O{l|YL>gRtc>ZdNmdLH8a?O_jd?)5#&xzFe1+)wQael6M+U3;^0 z4aR5JPxEb|@1MzE{I|ki`gek>z7t&YkD`8h#_67Ru|3sxu{{U;VtWqy#rC{P{jB>} z(Qf^}i*}AVr-QbKzRy;-#}g3secD@mTr(G9Jz{$f*~Ru8_KWQ~;uqWV2Cb*O^IKd$ zuITnwXFs+F-|I(_eN&XMZ@E=)>+OQu?-bl|w_xP}(GK7LN6~S@wN(urC-5j8C-68O zC-6<`w`z!Jx9c9!4tWB6eY-~r`<_vPd&ddxn<%({GKgyk8Y6NEqUczm&y#elz<20a zf$!0=0#7li3tvY4Og$~i)Q7+Q@=~O!50fuDBQ1)K2io}Qcz~zrcz|_uJixQ0BU4@X zoU|y?lQW?!W$(Wtj&crY?pM0R`-E~2a#1H2b(VrlK{x0I^T0fi>hH=!eW76{l6HA#6WZqEsQ)wA6bj_Aa@J4XB={ug z0X?7>^n%pxcpm02)`$JqY>t@^^?j&c2A0WoCs$ab%CTII;X<*~4< zdbvwK?OCGNdB{$=dp>B;m?zEZ~A=pXXB zW*#8v<+=Jr&rI28NS-sq?U*>fwS5O_3s+Z^K^}G({5Bi`%V;h^*%OBT2 z<#o;6Lek5Z>A&>k>z{EuQ*sf;v^5yy$ye{P*s$)|1@&f%UUWalLsXuIEvwC@< z{x_aP93#@pi}b6W6?%iGSpS;aC|9vW|Hf0QH+oj;zvVW{S6rq4&a+zod(RsEdv2qg z#kKlBc-HCv7jC1x#r68Xd7jk&kH@3`2e(n~qF4Wu$EP!InQnz{_0}hpzgVtE zcsJ;sycK#EZlfH=jruL#O?r3lX1yo3Q6A$Ky_feX-R9k@_u)3mW!$F!z`I?K_CBrO z!EKb!xI@3oyHoGyt<+<=jdB{F(ffO=^nu=I^}$|MFSlb1s$RZJkM};O5A%{8dO3C- zKZj{nFW;>X=XIf%2Yr7*d8y{{d0tQ8b)c8WgwxCS=!v{8^m6{43hCu9=*ix_`e<&4 zULF%pFMm-V%j-ff4;quIm+#Y4cpd2FG2!&`m-Gp|F7$FfCeq9I>#5$C^-0_gy*!5Y z)7@e8a@YHxYP4UWmj{hY)ytPaBhEshdeF=H{zAXa>gCy}n}aeN^+_+!#p|UgZj?Nf z$5ED{kRN(E-*@P*S-m_Tb(hQf(92^A@VXGC2xSFIF-i#v^#{G2?>qF@tX^JrkFRq5hzk$E?TeCs8~oUKBbX%Y?p~`GJ*D9q8pTE-WuOXN4~4 zuFw9+%Cf-RpIccr=>EjYa=^!bWo5Zw{+FU1^zxXcST6XrmAS#9YgU#A7XPgk`kuSK z^zW=}8Mx{@E6WGh{G(_Gy*y?)me>8Ol@);N|J}+8K~KPnq218SeZLTTdAVeTTW{CcDscOq8e0wSxLaduz{&v{TMJeV(V!E$ z>v!Fwu_wUYBQ+$vyX*Ii(%6&W-fN{xH=~{T%CQfnKiW8P}kl{e5U{*cMYSzggZf)la|B zGQC{QPo{Q`E5RCIs9vt-FH`-QpSMgeSM!|xXgB+Ppyp?+S`!~_cFSJ50zasSVaQWAlF~j=SF?I~Xs$Q4+w+Iu_ll~QhvtG){e6EF>$#a;9-1>wbF$hH?-kO^!{v}u z{e$14zR<;+)yqS3&8gj?KLo#5RJ}Yj7oF-K`6Jd7MlTP|S*LkeV_2hFo4Zc!j(;0o zFAvRer*7R3$~(_QKa_J$@t<ObaBc%r!i5^IrT%@I_06O`Q_oXa5c|d9h0i9tNG>ObaBc%r!i5^IgN?5 zb;?6m^UK3&;cA|_Iwn&2Ho^|D4uGIp$<d zy7bNT@80rJ*2aT=Ak#2Umi~XZZ6MU-5ynEZ!W() zT%LGn{yEKE$}bO>hu&O%Ijt||CVAqa`RBAg$}bO>hyItEUw$#N>HP9=dO78t$70`7 z&N;<#(#t6iUCl2Kr1vCdxUdF_B(QdFX0>c{sgX%`;cWr0V4z`QE)D%uI87A)63O7b9GFrULKTR z9!4*xymJ~8&pF4KNH3>6bkfV!{PJ*md1#)wIyO}=56v$Rr2ZbKtJiYe{{$Fqi10r zblg9>_fe#`y@(Gsks{sh8jE z{?k;S^2=MImp7GXPS-Te>E%u3my;G7PA^yU&PfBLJaf7>H1+bE-T#{EQ+|0%^zx?i z%+>7))ytd8FK>xnuI8WnF+R#KC;hrL_uZy;lwaNwyXBwhs#56QGR(Cy*xDUoaR2yGsoN_y*wWGq`29=#Hl{zmxt5K zo69p-=a#CMH=AD`MlTP|JEytN^USe5q?adSdv10wbE;4I<>B=5=JL$d?NRme=JLzK z>E)sM=QMXIzdRM&(VBaxQ#;Bp52u%h=ATnL$}dmH`dV|(b$Z|Y|DIoNVIsdgHQ?}h z@dcX$&&w_2&&G|Yy<(}TExmpZcOKs><-MP)0%I0)RQ-5qucWmx{ z7B{<(MQiV0(c1e~{BO8lMN{{wXzBeaI_^t>`z}BC-*X>ldIlw(OH|^wqr_eKT_|x6 z&cliO@cT{rY_7)dG5KdY&VTu5HO^~^NAP<{KUjm`HxiHIe3Ez)=ZD1iaK3jL^y6HP zScmg8;(45t5ijC=3y&K5IjL#)kTA4fUBC>a#S|XK1L;&M=>kLBC6)-y6~I ziOlCy(C>oi_dfJ{9`pGS=I?UM-`kkKr!jvg)7`^YA>;NO8Mhb8xUI{$jl9bm#M?u2WZYgTkG1l6xa9H3lgDF)JRWQ1@o>rGktdJG3VA%%rbWefuk-ri@m$X7b+}CR)duR$ zbo*1l`#?ifo%b5sSnt5I8?UiV^$q7V)^P4jUISiF;kJ$S4RaWqa`vNTcs6}r{D;p! z8UNuqJZ{2Q+i>BG0qQs~uHS91fqOuA5s6II4WOo_?A6hdBK2=S9LtEcd@Z zsDVXZ`L+ihJT>L4*TK>*H(D%=*4;SuvX>Lud7~xH@*VVwYm>Kh9#x69w{&+m;u(9@ zcy36eu0_-HQX1v+QX1v+QX1v+QX1v+QW_r;&r4~P&r4~<^HQpD|G4TY8v7x5U^?z2 z2OhjnW3Pa(K0vn&WS#>j?A}*Sq);zsQTOnP+1}Sr z%=I2RX7~R1Sep0nv2^e2$EJB%uk}|Wt0R5HPKP3W^qiJP`J9$U`J9$U`J9$U`J9$U z`J9%c4CS5By1;3 zH1;<5&T5Ul1HSi!#@+=_c}3lKP850HJyGmEc`VcW_OT`2caCLy-#wP=wbQt3B9XU; zai5Iz(eq&%4I{h0o_C9!ev&K$? zb=x%d0eE(&7|ZDsKJN!7%Dv}dJO8Z4E`T3Cudxroi!W;IBk<#wMcofiRC+%;QRO{< z%;&vutlazIu?p`;$2NI=G;V*SkJ#yaq>r9U(bru&bFL7qM3Y;Y?i;4=1OHm_hnQLiY!BY5hoY^@` zRxWks733B=a|?@|uKavwVTrS-xG*;_Kc{45NpaT5{QOnRN5JxUPF5*jh|NkFmza{2 zotvCAK5KMxc2@GZtT9W*W~YqJN*t3sW=Yn#?8KzpamnL`ULW?Ky|8dgXLfmXu&cYY z?+dZtF6|NT1lR8`?G-bE?IWZ;`}JUZinK>O8El^-?FaFtq53BlJG0SF)yf#235~zb zEE^BsQ2+3(68v>$yYLVh^^YH9*y|3eR)%|S(qCuhKN@VuGjO=Q`>A02x3p;O!lFM8 zw&U3n%%*i_cxYD8-@91qjah0JJBry&Z$d#~X%0)sURzMIb~#H}TCg&~?J99I`Je2i z#Vnz?Fxyq?VhN==Yf5=-*Ydn9m~--r3o(_lvoVoaLRR7Oyt$w^a^OzW7b=F6n1N#mt8DLKWoCXStIS`*U})6EKr>83Sl ztUkuHP8nx5OPV$Uw*G$IBil*Nj zze(CWir%K*jH4#aI4Zm0+hD#X4nd*qGTUE&AM__LV|M=k#=p?zk#JFaGtOUse+>|A z`fL-t{+}u&)PhI?d1oJum9E9Usk`STcT=GfKtmutk5 cx&5ZkT(7z8#*3!f+kYx*{DWChGPM1F0t1*d`2YX_ diff --git a/hsa/gfx950/fmha_v3_fwd/fwd_hd192x128_bf16_causal_group.co b/hsa/gfx950/fmha_v3_fwd/fwd_hd192x128_bf16_causal_group.co deleted file mode 100755 index 018f398ff816064540576e859e3a3e6748030e60..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 46640 zcmeI53wTsTn)j=_6E1QIv^oYgm~clSBq2b62z^3;4sr`10-~KJw~h%(NWxX@?u2j= z5Mo3`M9d%}gRGMgP`nIjG29d!5}g@uyTb{N6K7{u=iAxcnQ!--{{C-O9jNX}Xp-#i z^X)gu^HhF)>aD6%r%v_T=luKaQ=X|arkyqFVCc}c7r z{`Y1*Sr=qeU7Ilf6@9WXgWY5~$~N{XvjX59AT|xMK4P2dp$m)+6x(ccAS<_tZG&n( z>g$%*g?-D%fh-oAY>jd7a(`yz-%LHDzHxl>eqEMD$9J>)Gj^MfdbGc`{?Wepcv6Sy zFNlfvh-`z#*mp&C6l~F;HY;`d+y@z}c-^@?%T?-(&Cf0=%q}Q>diyJ}&XU~HuUO%x z*JGWTWhZuRm$6r3OY_!ccY7^%m9t=Fwj)2UFzfF`QOB~CIXT%S8%33ui=A0UrEV=$ zI=pPHgEwKcRJOJ_`{lgR3Bg_b5$|GAVZquP_AuSpS)Uem7P5ztEmZo??%@MQ52IVy zS;!s^YoXGAb`KXCJ&bE%XCZr-)Iz13?%_>N#FuBzpK5f{*24ZmcGBKLrB*vBr}ClF zOlN_!#Mtj#(L|Urap^4(stNVou%LtW7j>}y(stB;wKRK$qaeG`7=w&jb+G=L4%T1aj`{}{Ts!=W-geYikN;w2M;qAE z(FX9T*ycHKzJKma+R;Y9RH*N01Lr&1z{Pena4_GY zxP;5?Xdz$_KJ91&pLMi>uiDW720?KOSK84+z#x3x(FU43+Q9GH(ZH+AOYTHe}^3zdk*p+tE_scns=r3&T3x!iaXX zaAZ{r!x7hxh60BpvBNDSb-0D(cC>J0g&2;)mCEd}wWFcH;YjUp3-%7TFtZ&k99k7{ zb#=)!w;hcH4#k2Fx3H+gEi7$E3kO#zS6G)!8SQ8ya3Hce+=8paEfln)1q{UXtE@|= z;&wCXSYqhh+*8sS&33#_%7LBw?L=K!}pLV!~ z&pO<~S8Zvbedn##m}Cyz*=6Mw=3bv{SH#}iI@y|AsMhNF>p-Vzvi)_Zzi(mhH!OLjcABzpzETMpRSGuWHS z^jd6bZgGhtvuI^uS;nierE8ZjD=PSWbX#{Ho1ca6;I6fVgZy#qYUir#oRT687#+9G z_$GW+c1dYoQDL?5HNE&Uy@mf&z-qy_>PB3GERz8CMI?bu#x>YvK9x7pos@^T1Dlzd zDX-^{dw!F5ao-CTyrx8X^k;mvFShW%tq0!b3K*$tI}3W@4eX6i#!Up3;+vE4f3l?~ ztHi+=aS|<+`|r#%`AwVre(W*4i;wTu+(jIMrHe0`GQzs};)q?k_-ugJWcJMkZQaej z43OPo_7wv@&|in3K`+^RVSi)?YiRq{F1}jO3;JlA_D8&}i|-2MgWrQ&nK^o^IV{>A z7B<9>^Wis}1sUVq#JWV+;kB7noU*VDr@~m}DNFb*ZjWz!-C%#3*Mlb0HrsdF%g=l5 zVU^yy%kSP~V&1x0{#krH=lJJvru)-5=lkb#S}GS*=iJVG3sCtUTqJvd+nRkHrY3X?KGb^Z!)LB zN#b}~N`w9OayouHjz7#ERyS)vW!>OmXoGD%HFz{}_o*Qxi91dWi6d5@8eB=-d&4e7n29yWHN%w|n_}$?a$Oc3*#Ax%~p)?&t5vw~hXF?OE^fc0FI- zG`${O^Va$6&-D0<-!rEvqTX|rZEUpHTi8{$sj=ytg*BZ!lh=gTW}dgPu_*#QI{Q(6 zdHsdFt{*=4MAr|`EvGh`E}Sv_`rJpR^7?Z;pU!U+wk_bV!)LpRUUu@7m%V+;KXO7f z28{XRCUTC&c$kN$Rb$M(_HNz0-tOHi#$s&Di8AlD+q~Y}Z?7@;5QKQ-~l$;p26 zX&M9nkj>goZL_vxBWsGOY`WSjtUTG9yg7MC*k;>~A%oAC+w8V-+h$u8(vD<%a(VKQ zu)5?S$s3dXVK^=vOLFo?B8>&5@EUvdMphqFS%0S2OYF+f@7aru-=D#o2e$9ozQ*sL z#w+&IQ~k|nJzdQ8p8hxoSEru!c+HnResiP8@Avq<=8K-DaGU#T_#}7JRB^7pJ#(hV zw`V@NBkaB@)f>=0#(U_nv*&u@e8dbpf1y_m(q>ja6#E?*`!b{~ zq#Ps{(ql*kNQFqnNF_*RNUM<6Agx1MkL2d9Ha?W!((zG#OZ}t#|H}4_i;iCDqAsl|xZ?9(fY=Lieg;(ISfws%Vwm+(Y%= zv@@~x44?C~KdN7jZRGWW59`$j`@HMSIM-e0#<}p`22y2K~8;ZHz^X(Kp(LU*N~=PoCsr z190ppUx6|k&z>7B_QU%>`0AN+dE)(iX6V&3XYxj({mrcRkjmb@2Q}e6TTWv}pA!}K z>J8yDHq9u<=hw)z>a;CL^!~>Cvx@9^U;BHDx$o~I=BnTN13qW{w?%ULP_7qCZ0HIf z)En#uMu6SHKHx22U$6&g1$%=2@R({EUvEr|*B>*$>vf7_q2sN-Li*woU zO>o&qB*Iq>tfuqeA2^WvD*wPiUY3f92frF~&2L$f$Zy%;x5WD?aJxGT+`c7fyGPJ= zPi1>m#CG`itw_K7!i?s-37Vq?!-ffVu?cpw3*M3;*wZE0o9g4!pbymttyCYpjp~Cx zp!#4W)d%mS`rzGEAM8){F$~dE9~?mS!9i3X976TMVN@Tym+FHfs6H55eC;^?^5tEO z#YHpDM4MnzhF~%S@5Ut~*}+t@gLblmGszys*j%!M3&;*GB0IR0nR&Yz(SlhvL05)g z0ol<`G18R-9nq@;3n9#ei+_tV#ogSK6LPxIsGhIVMXpkJW9I7ZlMo|C<7c%Xev zys*=}C;R&Gfp)JZ?DRe$du3XneM`Eq)BA$#+ZSKA8}ARVQ_QOx!9<^6(iOpEvg7>P z$PT8G9ki1joJn?^-??N57myuXM0RkgH&e{38o?}|pzDfY0ol<`G11q6T6$EjCn=dNqx;x#=N5KJ)x*lAvoeL-5FeNnow)4U@4(#6;9bFoeHY9RSW_(W z7pFwgydNa}_Z_HD<4gUb{ze9E_Y2y-Q`w$4eth{PTUcFR^Ts;cr1RxN245&o^QUd{ zpefv6lsCz1?^O2w5dyKRXG_hgG9|rqyaJXR~EbT*0>|W&G z3;TWGeTF?=+DDt%2;`4|eIz*2u#b`UaV8dv{8-pWfujt&Chb#9EDrf`u*ZY(hJC8E zPd71WQVj{PL#k>RZP?SLeZGk$B0mxKG2j@({-Cr!WMX5HKNj|J;5ftXl=e&$ekH^j zl3*VXjyLSt(w=K#6Ocav_KDy`!@fe=OHC{p`N^=~58iLsS4#V86PtwmNw7}_C&S)^ z`?02S6D}PB=Yb7&8_H>*X0)TsgB0Yaz&-_>V%U{=kc#|N*r$S14ZAWArXhbC?9;*N zhFzHlcI4Y(PXp5oyD|@EAb$qz4}cFCc4Z#m1G!-)?6bgGhFzHlvynd=_Br4j!>-JO zxyYXjdpekI*p+!O5Bc+8pAXJA?8-cVR@1NmcKkG`;X%W$%!7rXIs zbwl7UXkOTxEPP&gp?Q#hC0AM}TXeXq2?0Fr;uAF*=(pgV-UM%sS}l7G;hR_-74OkqDD?JtAm zAM|W1_YZomupgH8AA#f_^gJu~4|=|^ACdOgK=Kdzax3=_dXcc#O8ZYh@(=n7EB6n2 zsj$Bx?Z-g!5Bf?g_YeANVLvJDZ-e9?^tD#*AN0qC{T*q47bO3nKVjwmLHFDl;2+d^ zK>k55w{riWZxHRM^ML$=zR}A4gT7hV)pk60+RFWd{*17z^ML$={;ZYz2mLu=SLXrw2mN^~_Ye9D!miE(@(=opR_-74mxNuN z2jn00A6mJ8&|emIbsms^&|k4~|DeAr?CLxq|DgZK%Kd}>ny{<$fc%60cUJBn^dAem zIuFP{=s&S?|DYcec6A<*f6z}@xqr~#5_WYSkbltMwsQZVzbowOJRtv||J2I;gI;$h z_YdD%zVQ5>ebNv;<;||d0n-HsJs>z_j^MEQg7-coIO1W!SchO-reH#@U}C=D*doEC zQo#wU1(P2aoa7O-Z4gY^ESS1YaN15m`!j+wo)et;g5d0z1n0giIPX=#1+NJ%{ITGo zV}eWG5?uPO;G=b**gTW!-5O63s6dVQ)1BZjd!F$1b!TZ4bz!BgGa3nYq zj0I!CQQ#;r4vYii!FVtMOaMoNqrpTl5gY@K0mp)4!ExX?FbPZo$Aja+3E%{9A~+FD z29v@2!TZ5U;3RM|I2p8oHc$gKFa=Bjr+`zyR4^5s3Qh&5fz!b0;B?Rq+QBq14V(ea z03QG!0B3?T!CBxea5gv_oCD4Q=Yn&=bTA#92hIcMgY&@!-~#YL@Ii1PxDb2@d;Wi@_z}67XU0VQ?w96nq4H1bh^H6m)%PO+5-<#d*ESe4?H?po59OM*WmYcqz4SqX1a%Jvw1%B4cv_O z=N}wy?r#%)ve`tRQhcIMsXo!CX%A~}^0J5H1N%f8i_xd0+DTsK=yKF#on zKF#!rKFxkadxw{;)kL32i+NOg&+X7o@%-ayVw~$-<^rF zORmPGQc>@UmePMDwkNDM<37pNnEbezjRGfm%;*|(d(FS_{MPoyC$^tkB8e%Isglzq z?UFMjXG+eNoGUp`a)IPR$wiV&B$rA)O8li5zpv9$`u`T!YTsyDk8h?->G4~-R{Qzi z^RaDCPwD?p4GorWEFXK~(0|(oGm8gPHk)z(=4wpcW=1HIt8v;+Gb25YKKl}}ZGXnh z#(*=PGvhwZ)j0D7GaCoae#wmcGFRi=m(6TEIPX=|0~fqzW)s1MKQ`k7-qpD1n3>%V zE_ut0`z%-E(s#{lGWcknnH{kG4HS1ggA5`Scg`~~sv zERmlPe`1OJf_TCb`4#bPOXNQge`<;3x&w4dn$N#moYKFodSwRgQ!=1qI6=u|n#&w)J`%r)$aPUS+r3-&xP&#(`c`-fin z81f&3Js-?B?21lRfcyg35j@hc+^{P;RUz^VVJ`xU47;LJ6(heG_7&g?!#-8+A9`g8 z@=IVZ1xpRPqEnS2zYO-3;7Y@;=v1qazY6x%;A+FJ=u~TvzXtZT;9A3;E%y(-avk#5 z!Tvb-xM5dxs`bcU5Bn3~6NX*UsocnS!|nk+u(zgHdSUmX9Nw%AqEXt!Tv1xtYKH?!EWU5hW$D4Im52ZgFVRK1N-yf^M*b9`aIZ+{JpTh0KQ<@m3goa z`TJmh5q#0GEAwDK^7q6368IAAH>+2Yf6(8za{r)zAao8@ry~ELpRscPpq~?VRi`5V zpkJ_Z|Db;)?5a*h{z1QF<^DncnXs!m75N9f!OH!E{)w=wIu-c`{pVKhAM{@eyQ))> zf6za-a{r)zDeS6FMgBqmdn@-3`mcmt)v3ro=(?5r2mPwBt2!0=2mRMp?jQ8u2)n9N zk$=#?wQ~QU|D&+0Iu-c`{r6VxAN22pUDc_`Kj?q3a{r+Jv#_f=75NAKU##3e=>I0{ zs!m1zLI1Or`v?8sg$kBgn!U2;(IN19*}>~!-ap)y9&EH z56D00-GzV9dkDKa56D00y@Y?zBZOU@2jn00zQRA~w+g#D56D00w+sKE-y!VkJRtv| z_Y?j>zf0KFc|iU_j}rbtzem{Bc|iU_j}iVsA1LhVJRtv|4;KDGA1dtXJRtv|4;TJH zzfaiJc|iU_A1VBUK1$ftc|iU_j~D(yA1&;jcu!;;V0VtwsNA+G&0)^!+p0OMWBlW&bF+>N~+T z|18R6US3y6UTGFCywW!PaMx7YG>WQiF)h*UDUIupAI-4`aWAd9=BhV_gdcK z{hGNL`w_=;*d~tWh)*2PQJ*-T*J(c$UEkvRab=IUy83WD_+CGP@;60@{4KW%ZoNZr z`(1)N?h&jSDC*(+{|GuyxVNgM^8~&@=LtMc=Lvk1+N~Zc>g~Eu)I*#A-{0<0B7e_l z!M)=I_e~VsKN-Y51oaWI1QB$u(B?@xSKvEzuE6)`T!E*U)P*mje1?`1Vd%r(et9X} z(1*#Em6j4g=L2&Mc{iyjm21na;s2oHMm-qKe`tCLF^XBtJRpWH7LIZ z<=291L7IoPPIJUMY_G%i4X1o#B#2Hl_s^ng_Fcpm02_J{FnHO9<~@?MlL z2g_x>lPk;-71*x8_6BeRSP52w8^Mj>CU6tD8Qcug{@ztHEmU8Soi! z7q|<27JL?@dgoW9MC``)ZfrjXJ_qgr_khoX&x3oxz2FPr3n0bT?8EDQc>N;yBDf#i z555Gx1ZHSHx2l!X^(yV};@XsmtiFMIc{K7>z1*ptb}!TF-IPzUdtPYJm?y33<(b-9 zUY6qe4DBpfFVE7>^D-3cN9|!Aw5pe9YajBmMjRmN1mY6IuCRlVG$ zUFKyeR#5Ges+Z?!jl2xS4|<_N;}~1j%OBG|IIdRp z^5xnWye!2Zs(n)R@svq|ge*{t>E zc@)REMeE~vQnPxtYPaz`ie=oU{lK$bi}XCD-O2MPo^gkEw`Zr;-&3VU^E`@ad|Dge zsn!O0p3#PQRK47WKB#*6E^V0SS?yjAb->HyZ{=Amp*{db;eCXv-q4e?>w6VM_^zwi{se1W7Es2+b zULF-nFMm;+z{^4}=Y1l*e7~0Lc}bhZ^P!hV(Ky``LN9l||EYz>6?%CLBo7i@k7Yn#&G^8~s0{S-C?~d;oijrhbTwvuWM-LQ&dceU6Q*c_ek!Q+$Xu8 z2)&%f26}l^OL{rkpqED(dU-MW@3mOi3b3N91^S|^v9gDSm4cfh5Pu}~@-l31xz)l} zf?MyfuvOspyDV%qxZ@rRTLV@Nw6L{c^-v3RLRaIi`z-8naQ7$+0^VJXdq!K>6X4!) z76e?l8uv}KFb}wYvgL1HFQ@YYy*#R|dU-VNlRM*kJL7w8mV=!r%hmW~s%LKq)BuC^ay5RL%Fq1# z=Jawk&e?~0bKe*Gbz9<{sosJMH>a1YanV$7QG?ju&BjMlou!|*Q7?c027390Ht6LI zLN5;$e|;H!EciUMULG8${a1RqF5V}kmxqesuE9QceT(wYk)d_9s+R}HdQ-hUe+Yc9 zsCs#DEI5_l_h+%68|mf2G2=8RYjp8mA-z0Q3^|oQ_$|r{UA$GjJUG^z>K*<=;Cn^Y z%Y$Rlsr=DDV?QDE^5B?tnuoQzIijtx>s0Ufx1sg&;23wRck&PBh_=SMQ@!{899l0A zj)}jFdOkfRqOGy0C=N*AZN^9-~@G3Vs}DHfgL(AD_zP+GVeXKrYc!P>eSUmi*qr?_+K6UCfUJEW~s z9J(4`9!d*WJd_r$#+j>qQnmHY_;RV2 zhtS0-?%a#>!DG&GKIl0P6o;J!DB(;Ojfo#N2d z`0`L%xEg1!_DR*&1LDg==;9Q2PJQAr=jan@>lB9`9A8djbt4@-IL=({o2st|$CuNX z-AE5N;?JwlM~XKOjzbTQFQ@UlksfZupHq7jZyp?n-Wgvmbo9>n^3M43&iHaX&jHWF z=!`G#j4!8o&>3Ie8DHKRU)~vCPQO3tj4%H$iZ2hLfva)nHMoZ7apt&hp_p`vQ&!{9 zX}hKP@|)4WTZ%KMdz#kb(p!ozr}*Ulh!MO|yto>7UW?gApZjn;6kksLzEK=H*;|S)r}KpUwHhl< z_0;%t+8@Q3Q+{i4=qxC{oX!#U;cG31oZ`<(N27RidQOQNhpxt#(>Xvpt;UQ~{5iEl z@#Uf7&~GZfJcRxo9CuFd6&`1fxkWmAa2$GT@#UmT-$?&%EzVq>TdL09YJ7PJ{X01B zoZc%u&K$=>I(u*&dTa6Jq4e+8;>^|KQFZp#;>$zDi3i7@)7+)_@=$T;t;Ltq{-SOW zCmtMsPWz+y@=$T;f2;B37sFeQFAt@cQ`~tp#+G8v$&Zs>PI2gJe0eCnT#YlQdDqgj zht>GE&vix!NaHFYk;mzd5~};?8Y2A3Ww9=Y#ZeibGf9%R}kqYMi-xK2*J2jV}+S zms8w1^@(E6X^xOyPI2gJe0eCnT#YkV`=sjS0rBM_^m2+jr#|tRbM%Sya*9JIyTX8bjFtpouEm14o58Xlg{TycRoLQHs(R+^P@YTA5HP|>T@{AKXg7ny7T$b zhT2Yq%^zsB8&yAjCPUR`SJd|GETAaChJgQ#aT6}pZy*xPnoaQdYmnY*m+VTu_sz>qV zq4e_L_;ad9@#U%5Ut6BJPVbxl-{Z?oOvINb`|Vy2zF?#EY0LRKxnXq;rpmgqYtL}! z{=G^*`?*>^`+1jq_VaG3FYb}ga^5SS<-AWm%Xz=l3v1+anh(h5G#`}DX+9*iz{B#n z%SYsMmygQlF2Bx%wpS~ksr-g~rt)$5OyxJFW_MCP8~GjiY~=Ukvyo3p?ae2jb$nVr z>$qM%>-enH($33g7k?<9U3^hKyZB?{nKblW0BQWB;Tz9{AuXOXcGA#|XR?qMP8v69 z*y^)U)Muio&q7h3fucV9M1AIo`m7W687JzqP1I+asLwJ{pJ8G=r-bxj(tSzSHJ%ef z`Yh?Lq^law=`i$BL-#avO+&A2O{X;U$JTVm)}Ci^qvu$(_4yTTeQw47gy&VX^qh*D zeLh9!b1CrLKc^J1wC`D2IEqN-zXC4nvh7>dPMq-U@b_$`6JRb z7Ll%k|&ep^Mqol@82P}ky6*Wggs-cZ-vP}kZ} z*Vs_k)=<~fP}kB>*U(Vc&M?-;px-6Y?~Um9M8^6Q^t&MXy$}7K$53p zznw1q_G0O`HR-nzcUcR6dpKSC?Zwh>YtnD0Nxz*g{q|z%w>9av5g*9?cDnT2i1X|G z9OW^Q9|wGnI_3GuljmcFJRfW2`Ebhfktff`3VA-(%Jbor=Oa&^j}`KKtd-}(DbGiq zJRd9M`BF01k2Yee&R^(7YuxW-^+QM3pXo*G+lPM3@+ z*IwVl73Q~BdvQPg67vnU``*tBhmBa_dw*~f3vc+g7Z#qHa@J#KDVLi~CPw>io_g8C z3GKYu6l3}hdd1brTe^;}Lfu<>x|*@ZUJcd_Y1S-}v|dWHTrZ_ru9wm**Gp-Z>!mb5 zDAr49mg}W7W4)9bJU^~xiiQ0UJTM*4kpmAtU|}zVhvv{D1DX5437hBT6Dgj<$U8jW z!hQrEdC0&2oL1X1P90vs@phS*{P$EZ2u= zmiHCSa($R)tPkVEK7AW3?0xX`W(zwF)^D@055TiKMPE*z@OnNtQQFea*r? z0e|sh(U-;(HJ(pS9PoUGyw8qV*yrFEZ&}zE;8*Wj*q7ix)QPfRoT&ADdEyPvXUA$h zpC3Em`Qq3?&zHvzd3@CG%NS?s_h;eWC~ud#E^K3+olU#^-3J!-9et1VtAQrgr})a1 z@49$}UoOXWG-F-pAO4bJ1bYauNXc-?dTGB!Zez^~{`W6`@fXnxue67WY<5s?%gt+; znOB&fT{0nBo0S?pb7pFE{P@_o*tqCnqY8=&bF)hv#m+KUp>uh*qqN9T=5m%fN{W}4 zIx?Mwj?$II#YH7$D3O`(%*`vzg+1GmmA!0bt|PB7r^u00RN`4ZPn1nIQ zGRI}b#pjGm7&rV{xBu#eiRW}>iCCmt{p%_7vB*=v-yJf4{i%R#TI(2pU0K;Xf%zk4 z{vT%q<|oPgPhShnpCa?io(RmxcZm4w%5Iwzn2$A~dHz91U_RC=;rX9pQ4aNwpJcQX zwnNQjc;+VkVH{ow%*PryJU{MOVE(_*rnwv2@^N53)|OyaOILiCEH<}rWvt6t>SFRg<(HMP*pi|wXPJ}5mSwLgd0 zT~Jblsg#w4iNsrZ5@ZF=W!VLexVWU$>o4Qv%Y=l~X~xU= z)N2LCOfz1_PaQo*7Kl$6Z{)^}i!*ZLQWC}+1>&X|x$$GC#2dL&uI)G>E^Z2QESWvV znOBxwGJ0&za+lMQvpUP+%8DPKuqHks$+0Xaek|TJD@&aPj@*)>mBmxrTp>0;i#bvk z&(>z8iMeGAeEUf67o1&YHmVyhBops341$IqF>9;jawZWkK>+w6AV*+wfx% z*#1|)zsRU>_$z~hWO>7G)R)<;A6`(~hTZUQ2BT#`!*2B7ARQigal>x-Q-g*-mCf*D zFkTafBGGXf^{>4j29T988~=a(U-0%Qn5e$t@2|bj28ueB+XS!ur^3n~xihLu$3TC^ z`*#bzRiHnt5yby!jOfo8zdmA<)&%EyM*Tm@_y3<{{cb_~3)*C}P@nSRWP1jT`ad=@ zB#rt87b4#nJGJOxS#ggnh)YUFbH(VtF@GLKp3!%;zU?Ps%Q$|+X6)D4cHu=!^=oDQ LzZeB2gX{la=)84o diff --git a/hsa/gfx950/fmha_v3_fwd/fwd_hd192x128_bf16_group.co b/hsa/gfx950/fmha_v3_fwd/fwd_hd192x128_bf16_group.co deleted file mode 100755 index bebe7e9b54368c2e50171176151b1439acc9da47..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 40952 zcmeI533yaRy7$lNPFQ3KG&%+~n6L(gkgx_s=o12VkR>c4D4H}`=s-w95>~Oh6T%`Q z#E6K9m_bBDMl%A6%8(YrqJl%B&bV-ASR5zLrt{6cvwU;E>F@tPRR>Nt37zHM`#f_8 zo~QVAy>Hcf>eQ(|sd~?Qsy*W;O|Yn{Jxme*tjc*sm7fW_{JeaJ_%Jn`vP7jF{_m`G zRN6qyx^|(yiau$~U^`ikV&j-n6aa5eku|7v5qZ`_6BMPF$SXqvS-C{y4Vvq*t(DuP z{%{}*L{>>}D?iBNS&(f?J)^#He$#(doXPWR>hX*|pN4uoUUUEPSbBf5!TcA*-nR>} zL8I?^x)IPtgWBZeds6RHl+xFni!xnB&bWnHg?U-IMNe*gDb86qzv!HjxAwI-XGZZ7 zXKtD^BO|M*XkTHLGc#>jVNP)tG?|%&kfjwBI5V>L%+JlA=gfU6F3-6r>xa`{j?2%^ zF3KuaUWzNqS((-D)wre3+$C9Q3v==^eZHgO=nIBCXsqmlLoju*0#js_|<+DJK+_Y`F~bDf38 zaZ`nfFk|5ID-g$f)*GscR+C~>o0F*{Z%cfziZ~@(?9KPL4EW2pDt^yf%UC5fWL~( z9s|1^SGu6Q1@!~EU`uNaY-_E79W5}hE9puXRJNdgKo{(8t%1F*HL$-W2Ienz8aIN| zRV^`KnjD=z*jfXJTWjD*3k>X8m}b5op7yoCM!-<0X{~`%tu=7E1qOC6Of%gQ&bGip zKqq|KS_7ZA*1)+I7(geOZVKmHU?HFrzHY67`qmowT?-7nyr^hlT7E(CitD zFcjDwLtAYjvDFqvx4^=_#iBd%mY7C|y#yCb>P796d%FsTI=_ACvUU7b-=TVN!x zD`vLZ!t7RCnA-viyO)|~SZCDq7MKX^h|E@7aJAY(ZVN1+Bd*M{&Zq?~FcH`h#jUom zyww&~wZOuj!k{mQw*^K5yP~Ys7S^}g!p0U@*j*U#6P94|ie8~Gn|Fj3D40Ga9aN*#Q*^6AxwCrV>X|BwKk%N~f3{Fg&mz^*? zO|Lip-ME6noTbjaB@bRL__8NCuOQ%UF-Mbh~aH~`f)97 z#3@+Y_@XHYYvYS2+qCi70k3NDrGobM7GFB3++guB_QMByK|5FudcjK2hw@dFcd+<; zpcgz#Id~pa+FE?y19p&uzze%wK%dRxvvZ(y#PL8om`*)dKt1TCz7vi|Jy^+nw>G{i z&QlpeJztB-~$ zWk;>yH@H2%jn%i6+_qL#yw#Kall8uv>Yu7}ntz(k8U7hMtz|PSvTss+GjaG|OYZF? z^505^DKp>NBX#|LAap#&tovT_&c1(-w~yaZ;z51KI>+WPM_BdWZY!(q6YHpOC>xIU z8A5J7+IJwi>1f}0vf^l;GIIM-zw*pceuHlE-=w$4uRN~j+xy$g{ANAh$=^xlpVISP z{at1LIX&Oq-(An6_s}KY>b4zgJl?jaN-o}0gQj__{WZrs{KM~=dNHELb4giK=cutN zmz1@27f)K1izkogT*POKE?ZM~F#;_*@zKJPnh$c?e)!CzZ9hD@h&C>Ma9sV>$&b{M znv=Sm{d*C4EBLGMDQ?oM96st*-ahIdII6;nKJmxjrE@s?!_q&c!ja;0OqZ27A|t)t?%hjApzr#{$#NY#dcB=Gm5x9^_Dhhm zE?vCdu3byL%2D)ZHI9$gD#!47=h3_N9Ukkq9Ah8&`>xYAYwNU4Ym|#IWfw1X>Z=?< z*`FNkVL9PxW2y1jZ@caE!At<=PzKl;Uy(;)Wi`k9gyXzR9gd1|4xD#QgzHj7 zXV=$A>p4C;cX2to*jx_VjV^zOLE4M74{1NrYe-c{Zy+5+dK2j| z(mO~;kdEqh8|vT2`iJs{&X4kj+DG|+r~SU(4%O#wSyTN2#u~? z2II55=6I)zC&OJ|hljavFLiNztqymrb@&|>3huKG$ihnr!c$MrGK$cbPyzU+`0Efd}mA8XS|P> zIx1F&Ph2~(BxQ5Tz?6!V^+^1F(REO5K10%rpU>EQPuq)UBw1FK#e|Ss<`_~&2us(Pj>w`U5AKehm`e0Ai2Ya(V*q8OeSk?#cV0~}^>w|Fxm(SxL zU)`!G@zILTp?1N?gAVG!Nz_LvN-Fi>OzOee)Pr*si*7ePS}@Zt z=t>vNr5<()s0WLw2bWV1uA&}xywrnb)Pw7(2RCvq)Dzb|T6yb}c-3+XuXiV0FXo4~ zFTKqe2h1mSExnEU&Qd?Rd!XJPBlHncpWHuC??@2(E>b^fWS~A(6Z)=FKQkpzKYNq^M?2VIKDT?Iz92^EInJps?jNXM zo*?ub_tdW%8L0PaLeK93^<^o6`t{RcP3*3^A@M1v7nuuJeMq)Wc2z^_dT>4U;N@{;$N8DZ74t*eTcV6{#e8B{OO!FLm>=CeP;ZYBdX6jVlluqi9SK6u zaYg;4k%9VDP3SqUsGpe=ou+U6b_lxKZkJy9eqEVub!CsW0vys9&BS^godLRU-rSUM(q#$0vtUeIi63_0!=mHJYtUk&0QmVeKr+-lG+RR3;i~!e-<1-`5c=% zkbF?+w@dwVU>xNS+0;Q~n$TBD{SU!-%AGbffy@y4T~hxdIGFM*n+ms0?S=V5zgOyC z0f$naV^fEb3x$54)V~T2r+kr39YN*`eU;Sz7)+#mu}vLG776_uQhxv(Mfnn&dKbA& z=nqT%+u&%*SJ>3M$w!3#9jSj897Fk|HgzoNxkcqTxLBfIH4g05m)KN|TrKRF$3YV1 zYi#N`a-Gnd$3ZgXkJ;4mXpf-_$gob|fk>;r-iyd^mIUBQQ|LF+jW?yhd8 z-yb^X`NLOV-AsSO@2z#7=r6zM5PkYghi|mgo@nKx+Gwq6T?`ll_5^!^y}({zZ?HGm z2kZm(1^a^izObT1&#(sgLi{>gJZxk;8<`hXb0_} z25Mjum;{ak$AQUUGB_R_4^99lfcJp+fDX_BrhqBnL~tT_FL*CF37iB@1}B44z$xHV za4MJzrh?PJY2b8lIyeKI0nP+xg7<;D<)3c7pwEGD5AZ5?zcps;n3B6khgDm; zl~sqAhE;ppmsKxU#QKhN@N~>ojJaQ1%&lUyBzI3O*-bgt7Ezp|q7TYBM+LWVjtXw$ z92MNrOPipV#k%A2e3A1R&fAUoV{gr&m%&;@w}-iDCGCN`X%E~>d*J>)+C;r9)+zct zjr04y+9Y>BZHg|(dPTPpr6^Uj0Uo3c@GxzFM`E>9H`|Z3OhB8=YvBIcbiEALH&L7? z<6L~4CxbPdCxfRrPXGq-&rPR=US%t8^LGR#9U$x7)6H-I`Xa%dozRO47>Q`){44U+v=XAZF5i7w(Byix1y$KJKR&Xo$gev zQkP-P6*W!U<({tXcF)lExL3+?%eAl<=U#4F3(wT{y6@BWxhdb$-qJ%+PJD&`;^Al~ z2YcJ;9O}J6=kQtDe!c9ru9hCgI5pNcM*e>7HNDJ^?y%QYua~HF(>co9UFT@;tvbix z{-c+zj6qq9f3Ab(Xm7Y5&<^VI-Tf^+?4nI}yJ%CAPqZo7C)zaOLG4Yw?B0aHHgU~l zv}vw(STD1GB|J**vZr)fuZ`N0&?Pq%Yv zgf@ZG*-zk1_7gbko7&nAG1{kA_Uqwkf$id2%xG87&pxyELSnfib3}ik{%rOa_yGG0 zoXh?KAMTB^eUQpg-iPbh7oYnf#Uk}bx&!G>B(%w*qz*#l5^ zpe%o?Yf_Imd>(`pkCcEk7-ByOG8qjYZ;|BU*9QMUS&6Ec1CnQjZwqd){Ngy31A9 z`(6uPUw75@ooc~4%T*UU!=m&7@3`NBb(O1bz=NXRxsgdd`XL{eX2E*ORTrOOQTl^} z=UcFTa@7r8Xi@G2hv!?cUUJnX7KwW2wWJ;cksr0pf_0LsZuBD-We_;VLyw-~_F8`C z`HlUJPwbyrBgrJmWXTDV4#|m2j{ z?Hf(&@Xfe!9e%^P+Gqc!_ig>Oq#l2$t+jq*{n!(a_S;umlmswoodxSRS6%Ys7Cd%$ z)lJxJQMkV5-@c*9JD##A!@!BpSg=lW)lGWNqKp8iykNn)%vG2AqD2`APJbEoz?rXF zl)J!LuUoM0a@EZ~U{US{A9%}xb(X7c?zMcHNl@h9Jmw?^(JQ>>AD$jR2o zz2r1&Mb^l}dj-q#mLC59wTw zyk}FL9mxSKD?kuGIS2neX{I$@V*PX?be``~*oki`1-wM5X-MN(V-`mt>T-a`WyE@c&S&r zsrYi%Ua;PJl^?2<^6+lzYO<}+o5w*J2Fh>hran&IEcE7au#xia-P9+@TZP^{4mMF9)lJ<@-Y)d!aZpZqOgHsO zvX{`C$3X?T>d9fJ~`!BXS+C`O1n6pT|RL>yM5w(_R!AC zzleIP{x0e{rX347ANrfk=i~N^@?Ps(db=jg!Ewa-?6r&Y+2<4Iv)?Dq=QSRuwC!8E zKd7w3TWx(fANbuPC|?^PPzHCfxA%P?wpK6v69(IzC+M|}_+2qSD_M$iOHpnaxJ;Jc zzXHd>v(;O%o`@E_>Y43wlwXeWE5H>X$6=Mz60s8bmB>E=J_4=+SAma$kAiN{4SGNi z$a)8JFn)0y^k1XTfW0X1Mfno2MAkdJ*b-5Sd@1s)!PQ_HSO%^E*MMumwct8%9mwMy zSs{*x{%iD^@_LkCkMfU!kAWP+K4((I2IMy&|2X(KxDnh4J^?-fZUQ%fo59T>>(%5W zMU*37j{KA0lVAl{0X_vj1#SVifKP)@gRFOIaZ4a3}Zz_yU-&`LMpd;``#fmB%~10^iZjf&Q;($j$yQr*_Oe zPpffL&S&dh_>?eC8ufo=XeacteC}?ng@gTHnc6A644=i*9>zhV{;w?UL%po=oSy!# zZ0)przV@*$=kt2nz_o4E|K-xo>Sg&X-`pm%|0_qU)64Mrz85|voMWT@uLattdRgPy zKmA_|wV%0jwa;}qV*qFa=hdkHYmxS)UY79z<~EuAUwPU&y$oXpXbfKAU-{>;N6D-w!=U%4$!M$AjUY9YZV1@R7 z+$*(zcR!;2S(h=cV3qb)_oLeXb-T6y)Mbn<@M!;Vdo{&VqFJyGed`m(7nEvYp4D0# zPnp(Emodg*jdp`)t=7@APV2187-z6v>*9G#vw1dXH|jFR8a%H3z_U?{^gN;6qRSX> zut~ekvsvrmDc7QP8DkEf)Ova2sJy z{a;(P0eV^ZzXJS`j2kho&of$_UIzZJs8If|ZCbou7XB~YM@s+Kv)W+Kc5SFGhyN=o zl>h5FZMa?*{;z;Gnf+fov_!oO{9jR_{9n&&qx7=yf9Y+a|7)i<+Vg@oMwi3?6~%tK zJ%s-l>Ho^c=lMu3q#UFL zNDGmuhyP3OJJ@T~|CNifi)4BDzoPQ+IUlJ2X)#hEQV|mE!T+WA9qcvg|0+h=C9*vH zUr|f(c^T4jq!majksd*!J@~((R^jubNNyw#66a#+@Ry{&Z&6qV{;wz}^7BqwlzCw0 zM;0Xm%>JoG$pl@WSd=Vq!Oty9HkkXBs0aU7)O_UgerZu$V8JDek^>h0+M+A~i+^iT z7J^H^vnaXX@;`}s@P9=uLVo36EJ_}@>hBgMA9VXIXxbJ3m-jQ_|00e6nI{>%HncwONCiVEfbispJ)yB>d& zz<$UO{v)&BV=(1e*W+)J*bi&2$KU*KiNCqZ|Mec$&q4mLhT?K6abFqw0m?SP|7DKf zVZFrKz%_la|H~ZT!}6oQz;T-3|1!q`RiU2!J*=H^E{6Z>T5&-vpZr17{9oqyA=Y!$ z2CnIY{a@zzB9@=@MbrFW<~SrD>ZQIX{87z`M`FF1A2iMXWsX~7z1g+mcukF8Vx75P zG}Hg}-ZlJRA2h@NRV(~oq2i;?qK&yuvjE;CIFB{|b)PV)-3^6UVuh|0_7Ai(|6#g7~h`{}n36i{*EJ zi}J$H)u{g~I2Mfc_Wm*OyJGf#4Tn#}94p51`~QaHgz$d_$CPm#R$Z_}G&i=4^$va; z+W!?CgT{J?|7eM5ZY&z>9r;^m|5tF#+F8`|T}X;(ZtNQC)qI;2(cBm|);smbq=;F# zKFx_`qyMWZ@oZ-Q7ms!NZ%Gj)i2d^ZxwOIfuTXw2#)YNB4r9W2|7WZii( zL!0O;W1LuU{1^M`T7IqII52bD%>J_A_%HU^wftK~d{{Z!$at{eII-aPFZSQH{98tR z80|40EI3Z=di^J^l;+nd|Xidfb*d?&o^^*L!FA9^xkY@2H?^|6=@54?M%aRy>zE zF02aY!#FU`^$fq6IZlk{(@^{uvBu)R z=x4f?KdZ4gF!R_l`^_4S{|e#H3XTiocSVl_!}-u}791zmSo~Kge^z60VCM5N`^_4Q z{|Xhy6&xSNvCH_cP;p|7#eea5QP+s$3XTus@fiOVDo*TQHU7&N)^Pk+DE}AZ!lKc) zj0xj?oc=GyiJ9ZSLixYUabO&G4ei}yj{gef|6*Ji+r*eKwu$~P#)+BZze4%H%yD4m zHktij=J>Br{x8Obu}zE#W1HyzVw{*c{wtLK%Nz%0Zj;&nbv^#8Y5p(9h1qdE^q4SQ z5Bk3tCuWZS3g!PY$AOuzhuQySj{gef|6*Ji+r*eKjuHC57$;_q{|e>*GRJ|L+hq2C z1;l@a@P9EbjBV0m!q6uAzZfS*|Cc%bE0q5$I1bF*HnaaLIQ}b?|I3IEOGO(Q4;CCJ z799T-%Kv4=htVG6!Ghz&uE&1~|JU{Suj}z&!Tv9PXRgP8UEhCc4E;WjUf+Ld3j5*t z>-#UIu^+JiQiJhdP4s^;F02yw7d;LP^AN-Tb*=rISf24;P4Ry<6bHsRN@M=7hT^~I zTM6a=GRK8+P0u(m&h-rc*R}SKVtK}YHO2qcP#l=~e1iR74aI*o#s6iF5A&gYjQ^rP zsyX{su^!{Un&SU5$A_^VW59U4Q2Sh+McH)3e>K_vg?O+={a=XxYO?38UGc^|J7I=nE8Cn{;$U3ze4%Hg5$$Db{YRQ8t2iRJ;+#(@n50* zU%~NVtjGATWE`(KdzSHg^S?d*OI1Yt*J!`P>w&YJzo$^*z@FPBVb!&2S#|N{J<#;~ zd%4^@sY33Zv_J~s9a0?;m0ui6o&tyF~5W1PiV}K(AfK9*LrWPx$lcL_dT(HhWEo7dM~W0?}J@` z4-ESjJ^kOa4-)re=Uk#3?{<(|@ZJWw4fAkv2i~vX-{wlZFQNZU$NX3STa9@wxgYN< z_`oW>e?T6@e3Cqj`5}1(^S!g64|6%P2JZ7hD(h)ZoO9<{N32(PvOU~~5! zZ0>%8{|x&KUhD5@sC@;m?_@OM;`_w5vU-<~G#+jHc7Ta))~#8Fk@e!F*?yl>Bu z_iatyw^QVOdz!p&&yn|S#02U0ZN&TN_w8x&zKyt@>wib}7^jZ|{*F53^~jOeW3jv* zE9CWX%IlFMug79}Jyyu;;gr`SM_!M`@_MY0*TX5VM~=K6i{ajQkM(ghr`;FGi{YLAt-)JTFYp5J&RelKWx(E9>uYmjRw<`O<{SR7|SHaiPMA?0ZQa!I8n(o4j?sHl%_c^VX`<&LxeNO9_h<#4$?UIF#-=aA=4{>-pe7sprE3 zWuA`?to3-=ZeO^UY;!8y%Y9wz<-V@NRu*>u1f!&_34(##x*zU9FXSVzEaBq~iO?4Y(O|?Ur zaQ3_Rt;%zU|`#1Zj{37Of06GmnX9h{jlctpmq zdBZakhiAkO8$4`Y#)!=LgzOQ6NA$nk?5jSgx}>eLZEv8eqm-BL4wT<45g2N@N@Q-DY-6L%-Xs#O37|XDM-+EAsHG(@Na@yd`liXOT;h z|5ILEsKgcKXF7|WN?dW)@?yQTb5TwP#96t8`Nj`{D{&e5i}<753V*@6*f|e$p|l7?sQef|G|BjwkeD>ikWLtK`QxxF(!^w=Y<%)a<72|$VV6IS zNVxn_8#$!uKN;QhlKgS$wB$Kcw8<%A90XpAmgG3T+ag(v%EkxD-nSbHLF0RGNLiQn z5~G@Ne=+Eg1$lqrvD(SJao>qR{?C4YzER(}zZmQ-%Nu&5z7#9n@qxAty>UM>7%dAL zdZYaYd3wC#8hYdYXVAF+Ni+I67@x_0NIWm2zP%@<%2mHtE~JnfA0!8XtZ&?(FaQ2| zFHxWp8&?PXV`0<3Vff6tJO}~KR>;H`VyU~ALq#W@V`ZY1?cX?YVx;!iR z{pE&B@1Hur!X1qH^$GykQr?^ g*N=Tp Date: Wed, 17 Dec 2025 18:20:28 +0800 Subject: [PATCH 4/4] 3 --- hsa/gfx950/fmha_v3_fwd/codegen.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/hsa/gfx950/fmha_v3_fwd/codegen.py b/hsa/gfx950/fmha_v3_fwd/codegen.py index c221410cc4..4b326b0759 100644 --- a/hsa/gfx950/fmha_v3_fwd/codegen.py +++ b/hsa/gfx950/fmha_v3_fwd/codegen.py @@ -31,20 +31,20 @@ // template<> struct FmhaFwdV3Name> { static constexpr const char * fwd_v3_name = "fmha_fwd_hd128_fp16"; }; // template<> struct FmhaFwdV3Name> { static constexpr const char * fwd_v3_name = "fmha_fwd_hd128_fp16_causal"; }; // template<> struct FmhaFwdV3Name> { static constexpr const char * fwd_v3_name = "fmha_fwd_hd128_fp16_causal"; }; -template<> struct FmhaFwdV3Name> { static constexpr const char * fwd_v3_name = "fmha_fwd_hd192_hd128_bf16"; }; -template<> struct FmhaFwdV3Name> { static constexpr const char * fwd_v3_name = "fmha_fwd_hd192_hd128_bf16"; }; -template<> struct FmhaFwdV3Name> { static constexpr const char * fwd_v3_name = "fmha_fwd_hd192_hd128_bf16_causal"; }; -template<> struct FmhaFwdV3Name> { static constexpr const char * fwd_v3_name = "fmha_fwd_hd192_hd128_bf16_causal"; }; +template<> struct FmhaFwdV3Name> { static constexpr const char * fwd_v3_name = "fmha_fwd_hd192x128_bf16"; }; +template<> struct FmhaFwdV3Name> { static constexpr const char * fwd_v3_name = "fmha_fwd_hd192x128_bf16"; }; +template<> struct FmhaFwdV3Name> { static constexpr const char * fwd_v3_name = "fmha_fwd_hd192x128_bf16_causal"; }; +template<> struct FmhaFwdV3Name> { static constexpr const char * fwd_v3_name = "fmha_fwd_hd192x128_bf16_causal"; }; // ######################################################| DataType | HDim | MaskType | kIsSEQPad | kIsHDPad | kStoreLSE | GPUArch | BF16Cvt | kIsGroupMode_ | template<> struct FmhaFwdV3Name> { static constexpr const char * fwd_v3_name = "fmha_fwd_hd128_bf16_group"; }; template<> struct FmhaFwdV3Name> { static constexpr const char * fwd_v3_name = "fmha_fwd_hd128_bf16_group"; }; template<> struct FmhaFwdV3Name> { static constexpr const char * fwd_v3_name = "fmha_fwd_hd128_bf16_causal_group"; }; template<> struct FmhaFwdV3Name> { static constexpr const char * fwd_v3_name = "fmha_fwd_hd128_bf16_causal_group"; }; -template<> struct FmhaFwdV3Name> { static constexpr const char * fwd_v3_name = "fmha_fwd_hd192_hd128_bf16_group"; }; -template<> struct FmhaFwdV3Name> { static constexpr const char * fwd_v3_name = "fmha_fwd_hd192_hd128_bf16_group"; }; -template<> struct FmhaFwdV3Name> { static constexpr const char * fwd_v3_name = "fmha_fwd_hd192_hd128_bf16_causal_group"; }; -template<> struct FmhaFwdV3Name> { static constexpr const char * fwd_v3_name = "fmha_fwd_hd192_hd128_bf16_causal_group"; }; +template<> struct FmhaFwdV3Name> { static constexpr const char * fwd_v3_name = "fmha_fwd_hd192x128_bf16_group"; }; +template<> struct FmhaFwdV3Name> { static constexpr const char * fwd_v3_name = "fmha_fwd_hd192x128_bf16_group"; }; +template<> struct FmhaFwdV3Name> { static constexpr const char * fwd_v3_name = "fmha_fwd_hd192x128_bf16_causal_group"; }; +template<> struct FmhaFwdV3Name> { static constexpr const char * fwd_v3_name = "fmha_fwd_hd192x128_bf16_causal_group"; }; // #####################################################| DataType | HDim | MaskType | kIsSEQPad | kIsHDPad | kStoreLSE | GPUArch template<> struct FmhaFwdV3Buf> { static constexpr const void * fwd_v3_buf = fwd_hd128_bf16; };