From 08cd1a906d3b1f29e760b31fd64ebd42bd5f94e3 Mon Sep 17 00:00:00 2001 From: Kiran Varaganti Date: Fri, 31 May 2019 15:14:22 +0530 Subject: [PATCH] Added back BLIS_ENABLE_ZEN_BLOCK_SIZES macro to zen configuration. This is same as release 1.3. This was added before to improve DGEMM Multithreaded scalability on Naples for when number of threads is greater than 16. By mistake this got deleted in many changes done for 2.0 release, now we are adding this change back., in bli_gemm_front.c - code clean Change-Id: I6827b58d2dab1041fe182fef5a007b679ac4bb1f --- config/zen/bli_cntx_init_zen.c | 15 +++++++++++---- config/zen/bli_family_zen.h | 1 + 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/config/zen/bli_cntx_init_zen.c b/config/zen/bli_cntx_init_zen.c index ed7287cee0..615a31a043 100644 --- a/config/zen/bli_cntx_init_zen.c +++ b/config/zen/bli_cntx_init_zen.c @@ -173,15 +173,22 @@ void bli_cntx_init_zen( cntx_t* cntx ) mc = 510, kc = 1024 and nc = 4080 */ +#ifdef BLIS_ENABLE_ZEN_BLOCK_SIZES + // Zen optmized level 3 cache block sizes #if BLIS_ENABLE_SINGLE_INSTANCE_BLOCK_SIZES - bli_blksz_init_easy( &blkszs[ BLIS_MC ], 144, 510, 144, 72 ); - bli_blksz_init_easy( &blkszs[ BLIS_KC ], 256, 1024, 256, 256 ); - bli_blksz_init_easy( &blkszs[ BLIS_NC ], 4080, 4080, 4080, 4080 ); + bli_blksz_init_easy( &blkszs[ BLIS_MC ], 1020, 510, 510, 255 ); + bli_blksz_init_easy( &blkszs[ BLIS_KC ], 1024, 1024, 1024, 1024 ); + bli_blksz_init_easy( &blkszs[ BLIS_NC ], 8160, 4080, 4080, 3056 ); #else bli_blksz_init_easy( &blkszs[ BLIS_MC ], 144, 240, 144, 72 ); bli_blksz_init_easy( &blkszs[ BLIS_KC ], 256, 512, 256, 256 ); - bli_blksz_init_easy( &blkszs[ BLIS_NC ], 4080, 2040, 4080, 4080 ); + bli_blksz_init_easy( &blkszs[ BLIS_NC ], 4080, 2040, 2040, 1528 ); #endif +#else + bli_blksz_init_easy( &blkszs[ BLIS_MC ], 144, 72, 144, 72 ); + bli_blksz_init_easy( &blkszs[ BLIS_KC ], 256, 256, 256, 256 ); + bli_blksz_init_easy( &blkszs[ BLIS_NC ], 8160, 4080, 4080, 3056 ); +#endif bli_blksz_init_easy( &blkszs[ BLIS_AF ], 8, 8, -1, -1 ); bli_blksz_init_easy( &blkszs[ BLIS_DF ], 8, 8, -1, -1 ); diff --git a/config/zen/bli_family_zen.h b/config/zen/bli_family_zen.h index c82392b60e..1da4a392f7 100644 --- a/config/zen/bli_family_zen.h +++ b/config/zen/bli_family_zen.h @@ -39,6 +39,7 @@ #define BLIS_THREAD_MAX_IR 1 #define BLIS_THREAD_MAX_JR 1 +#define BLIS_ENABLE_ZEN_BLOCK_SIZES // Vanilla BLIS disables AMD's small matrix handling by default. #if 0