diff --git a/config/zen/bli_cntx_init_zen.c b/config/zen/bli_cntx_init_zen.c index ed7287cee0..615a31a043 100644 --- a/config/zen/bli_cntx_init_zen.c +++ b/config/zen/bli_cntx_init_zen.c @@ -173,15 +173,22 @@ void bli_cntx_init_zen( cntx_t* cntx ) mc = 510, kc = 1024 and nc = 4080 */ +#ifdef BLIS_ENABLE_ZEN_BLOCK_SIZES + // Zen optmized level 3 cache block sizes #if BLIS_ENABLE_SINGLE_INSTANCE_BLOCK_SIZES - bli_blksz_init_easy( &blkszs[ BLIS_MC ], 144, 510, 144, 72 ); - bli_blksz_init_easy( &blkszs[ BLIS_KC ], 256, 1024, 256, 256 ); - bli_blksz_init_easy( &blkszs[ BLIS_NC ], 4080, 4080, 4080, 4080 ); + bli_blksz_init_easy( &blkszs[ BLIS_MC ], 1020, 510, 510, 255 ); + bli_blksz_init_easy( &blkszs[ BLIS_KC ], 1024, 1024, 1024, 1024 ); + bli_blksz_init_easy( &blkszs[ BLIS_NC ], 8160, 4080, 4080, 3056 ); #else bli_blksz_init_easy( &blkszs[ BLIS_MC ], 144, 240, 144, 72 ); bli_blksz_init_easy( &blkszs[ BLIS_KC ], 256, 512, 256, 256 ); - bli_blksz_init_easy( &blkszs[ BLIS_NC ], 4080, 2040, 4080, 4080 ); + bli_blksz_init_easy( &blkszs[ BLIS_NC ], 4080, 2040, 2040, 1528 ); #endif +#else + bli_blksz_init_easy( &blkszs[ BLIS_MC ], 144, 72, 144, 72 ); + bli_blksz_init_easy( &blkszs[ BLIS_KC ], 256, 256, 256, 256 ); + bli_blksz_init_easy( &blkszs[ BLIS_NC ], 8160, 4080, 4080, 3056 ); +#endif bli_blksz_init_easy( &blkszs[ BLIS_AF ], 8, 8, -1, -1 ); bli_blksz_init_easy( &blkszs[ BLIS_DF ], 8, 8, -1, -1 ); diff --git a/config/zen/bli_family_zen.h b/config/zen/bli_family_zen.h index c82392b60e..1da4a392f7 100644 --- a/config/zen/bli_family_zen.h +++ b/config/zen/bli_family_zen.h @@ -39,6 +39,7 @@ #define BLIS_THREAD_MAX_IR 1 #define BLIS_THREAD_MAX_JR 1 +#define BLIS_ENABLE_ZEN_BLOCK_SIZES // Vanilla BLIS disables AMD's small matrix handling by default. #if 0