diff --git a/frame/include/bli_edge_case_macro_defs.h b/frame/include/bli_edge_case_macro_defs.h index 242045a029..4a1fba7ac0 100644 --- a/frame/include/bli_edge_case_macro_defs.h +++ b/frame/include/bli_edge_case_macro_defs.h @@ -38,14 +38,14 @@ // Helper macros for edge-case handling within gemm microkernels. -#define GEMM_UKR_SETUP_CT_PRE(ch,mr,nr,row_major) \ +#define GEMM_UKR_SETUP_CT_PRE(ch,mr,nr,row_major,alignment) \ \ PASTEMAC(ch,ctype)* restrict _beta = beta; \ PASTEMAC(ch,ctype)* restrict _c = c; \ const inc_t _rs_c = rs_c; \ const inc_t _cs_c = cs_c; \ PASTEMAC(ch,ctype) _ct[ BLIS_STACK_BUF_MAX_SIZE / sizeof( PASTEMAC(ch,type) ) ] \ - __attribute__((aligned(BLIS_STACK_BUF_ALIGN_SIZE))); \ + __attribute__((aligned(alignment))); \ const inc_t _rs_ct = row_major ? nr : 1; \ const inc_t _cs_ct = row_major ? 1 : mr; @@ -64,27 +64,27 @@ #define GEMM_UKR_SETUP_CT(ch,mr,nr,row_major) \ \ - GEMM_UKR_SETUP_CT_PRE(ch,mr,nr,row_major); \ + GEMM_UKR_SETUP_CT_PRE(ch,mr,nr,row_major,1); \ const bool _use_ct = ( row_major ? cs_c != 1 : rs_c != 1 ) || \ m != mr || n != nr; \ GEMM_UKR_SETUP_CT_POST(ch); #define GEMM_UKR_SETUP_CT_AMBI(ch,mr,nr,row_major) \ \ - GEMM_UKR_SETUP_CT_PRE(ch,mr,nr,row_major); \ + GEMM_UKR_SETUP_CT_PRE(ch,mr,nr,row_major,1); \ const bool _use_ct = ( cs_c != 1 && rs_c != 1 ) || \ m != mr || n != nr; \ GEMM_UKR_SETUP_CT_POST(ch); #define GEMM_UKR_SETUP_CT_ANY(ch,mr,nr,row_major) \ \ - GEMM_UKR_SETUP_CT_PRE(ch,mr,nr,row_major); \ + GEMM_UKR_SETUP_CT_PRE(ch,mr,nr,row_major,1); \ const bool _use_ct = m != mr || n != nr; \ GEMM_UKR_SETUP_CT_POST(ch); #define GEMM_UKR_SETUP_CT_ALIGNED(ch,mr,nr,row_major,alignment) \ \ - GEMM_UKR_SETUP_CT_PRE(ch,mr,nr,row_major); \ + GEMM_UKR_SETUP_CT_PRE(ch,mr,nr,row_major,alignment); \ const bool _use_ct = ( row_major ? cs_c != 1 : rs_c != 1 ) || \ m != mr || n != nr || \ ( (uintptr_t)_c % alignment ) || \ diff --git a/kernels/haswell/3/bli_gemm_haswell_asm_d8x6.c b/kernels/haswell/3/bli_gemm_haswell_asm_d8x6.c index a3a8b0b09f..dd9526d566 100644 --- a/kernels/haswell/3/bli_gemm_haswell_asm_d8x6.c +++ b/kernels/haswell/3/bli_gemm_haswell_asm_d8x6.c @@ -100,7 +100,7 @@ void bli_sgemm_haswell_asm_16x6 uint64_t rs_c = rs_c0; uint64_t cs_c = cs_c0; - GEMM_UKR_SETUP_CT( s, 16, 6, true ); + GEMM_UKR_SETUP_CT( s, 16, 6, false ); begin_asm()