From ac73bb6eb24c75169437ebd96bdf461db2411dca Mon Sep 17 00:00:00 2001 From: Chris Millette Date: Wed, 7 May 2025 16:02:20 +0000 Subject: [PATCH 1/2] Add initial support for gfx1151 --- CMakeLists.txt | 2 +- library/include/rocwmma/internal/config.hpp | 8 ++- .../include/rocwmma/internal/constants.hpp | 3 ++ .../include/rocwmma/internal/wmma_impl.hpp | 4 +- samples/common.hpp | 3 +- test/gemm/gemm_kernel_base_dispatch_impl.hpp | 50 ++++++++++--------- test/gemm/gemm_test_traits.hpp | 3 +- test/hip_device.cpp | 4 ++ test/hip_device.hpp | 1 + .../detail/load_contamination.hpp | 25 +++++----- .../detail/store_contamination.hpp | 25 +++++----- .../detail/fill_fragment.hpp | 25 +++++----- test/unit/layout_test/detail/col_layout.hpp | 25 +++++----- test/unit/layout_test/detail/colnt_layout.hpp | 25 +++++----- test/unit/layout_test/detail/row_layout.hpp | 25 +++++----- test/unit/layout_test/detail/rownt_layout.hpp | 25 +++++----- .../detail/load_store_matrix_sync.hpp | 25 +++++----- .../detail/map_block_to_matrix_override.hpp | 25 +++++----- .../detail/map_matrix_to_data_override.hpp | 25 +++++----- 19 files changed, 179 insertions(+), 149 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 82d10fd94..b29303bb5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -101,7 +101,7 @@ if (ADDRESS_SANITIZER_ENABLED) TARGETS "gfx90a:xnack+;gfx942:xnack+;gfx950:xnack+" ) else() rocm_check_target_ids(DEFAULT_GPU_TARGETS - TARGETS "gfx908;gfx90a;gfx942;gfx950;gfx1100;gfx1101;gfx1102;gfx1200;gfx1201" ) + TARGETS "gfx908;gfx90a;gfx942;gfx950;gfx1100;gfx1101;gfx1102;gfx1151;gfx1200;gfx1201" ) endif() # check if ROCm supports `__hip_fp8_e5m2` and `__hip_fp8_e4m3` diff --git a/library/include/rocwmma/internal/config.hpp b/library/include/rocwmma/internal/config.hpp index 6ab3e9503..e0451bc6a 100644 --- a/library/include/rocwmma/internal/config.hpp +++ b/library/include/rocwmma/internal/config.hpp @@ -37,6 +37,7 @@ /// ROCWMMA_ARCH_GFX1100 /// ROCWMMA_ARCH_GFX1101 /// ROCWMMA_ARCH_GFX1102 +/// ROCWMMA_ARCH_GFX1151 /// ROCWMMA_ARCH_GFX1200 /// ROCWMMA_ARCH_GFX1201 /// @@ -63,6 +64,8 @@ #define ROCWMMA_ARCH_GFX1101 __gfx1101__ #elif defined(__gfx1102__) && ROCWMMA_DEVICE_COMPILE #define ROCWMMA_ARCH_GFX1102 __gfx1102__ +#elif defined(__gfx1151__) && ROCWMMA_DEVICE_COMPILE +#define ROCWMMA_ARCH_GFX1151 __gfx1151__ #elif defined(__gfx1200__) && ROCWMMA_DEVICE_COMPILE #define ROCWMMA_ARCH_GFX1200 __gfx1200__ #elif defined(__gfx1201__) && ROCWMMA_DEVICE_COMPILE @@ -94,6 +97,9 @@ static_assert(0, "Unsupported architecture"); #if !defined(ROCWMMA_ARCH_GFX1102) #define ROCWMMA_ARCH_GFX1102 0 #endif +#if !defined(ROCWMMA_ARCH_GFX1151) +#define ROCWMMA_ARCH_GFX1151 0 +#endif #if !defined(ROCWMMA_ARCH_GFX1200) #define ROCWMMA_ARCH_GFX1200 0 #endif @@ -122,7 +128,7 @@ static_assert(0, "Unsupported architecture"); #define ROCWMMA_BLOCK_DIM_32_SUPPORTED 1 #endif -#if ROCWMMA_ARCH_GFX1100 || ROCWMMA_ARCH_GFX1101 || ROCWMMA_ARCH_GFX1102 +#if ROCWMMA_ARCH_GFX1100 || ROCWMMA_ARCH_GFX1101 || ROCWMMA_ARCH_GFX1102 || ROCWMMA_ARCH_GFX1151 #define ROCWMMA_ARCH_GFX11 1 #define ROCWMMA_WAVE32_MODE 1 #define ROCWMMA_BLOCK_DIM_16_SUPPORTED 1 diff --git a/library/include/rocwmma/internal/constants.hpp b/library/include/rocwmma/internal/constants.hpp index 5968b4169..78156c1be 100644 --- a/library/include/rocwmma/internal/constants.hpp +++ b/library/include/rocwmma/internal/constants.hpp @@ -42,6 +42,7 @@ namespace rocwmma static constexpr uint32_t AMDGCN_ARCH_ID_GFX1100 = 0x1100; static constexpr uint32_t AMDGCN_ARCH_ID_GFX1101 = 0x1101; static constexpr uint32_t AMDGCN_ARCH_ID_GFX1102 = 0x1102; + static constexpr uint32_t AMDGCN_ARCH_ID_GFX1151 = 0x1151; static constexpr uint32_t AMDGCN_ARCH_ID_GFX1200 = 0x1200; static constexpr uint32_t AMDGCN_ARCH_ID_GFX1201 = 0x1201; static constexpr uint32_t AMDGCN_ARCH_ID_NONE = 0x0000; @@ -70,6 +71,8 @@ namespace rocwmma static constexpr uint32_t AMDGCN_CURRENT_ARCH_ID = AMDGCN_ARCH_ID_GFX1101; #elif ROCWMMA_ARCH_GFX1102 static constexpr uint32_t AMDGCN_CURRENT_ARCH_ID = AMDGCN_ARCH_ID_GFX1102; +#elif ROCWMMA_ARCH_GFX1151 + static constexpr uint32_t AMDGCN_CURRENT_ARCH_ID = AMDGCN_ARCH_ID_GFX1151; #elif ROCWMMA_ARCH_GFX1200 static constexpr uint32_t AMDGCN_CURRENT_ARCH_ID = AMDGCN_ARCH_ID_GFX1200; #elif ROCWMMA_ARCH_GFX1201 diff --git a/library/include/rocwmma/internal/wmma_impl.hpp b/library/include/rocwmma/internal/wmma_impl.hpp index 466a91eac..110757b22 100644 --- a/library/include/rocwmma/internal/wmma_impl.hpp +++ b/library/include/rocwmma/internal/wmma_impl.hpp @@ -48,7 +48,8 @@ namespace rocwmma TargetId, Constants::AMDGCN_ARCH_ID_GFX1100, Constants::AMDGCN_ARCH_ID_GFX1101, - Constants::AMDGCN_ARCH_ID_GFX1102> && Cond>; + Constants::AMDGCN_ARCH_ID_GFX1102, + Constants::AMDGCN_ARCH_ID_GFX1151> && Cond>; // Enabler for all of gfx12 template @@ -66,6 +67,7 @@ namespace rocwmma Constants::AMDGCN_ARCH_ID_GFX1100, Constants::AMDGCN_ARCH_ID_GFX1101, Constants::AMDGCN_ARCH_ID_GFX1102, + Constants::AMDGCN_ARCH_ID_GFX1151, Constants::AMDGCN_ARCH_ID_GFX1200, Constants::AMDGCN_ARCH_ID_GFX1201> && Cond>; diff --git a/samples/common.hpp b/samples/common.hpp index 153d7660a..d7eb1e194 100644 --- a/samples/common.hpp +++ b/samples/common.hpp @@ -90,7 +90,8 @@ bool isGfx11() return ((deviceName.find("gfx1100") != std::string::npos) || (deviceName.find("gfx1101") != std::string::npos) - || (deviceName.find("gfx1102") != std::string::npos)); + || (deviceName.find("gfx1102") != std::string::npos) + || (deviceName.find("gfx1151") != std::string::npos)); } bool isGfx12() diff --git a/test/gemm/gemm_kernel_base_dispatch_impl.hpp b/test/gemm/gemm_kernel_base_dispatch_impl.hpp index 20fdad288..6c5fbb891 100644 --- a/test/gemm/gemm_kernel_base_dispatch_impl.hpp +++ b/test/gemm/gemm_kernel_base_dispatch_impl.hpp @@ -80,18 +80,19 @@ namespace rocwmma ROCWMMA_SWITCH_BODY2_ARG2( \ waveSize, SWITCH_BODY_TBLOCK_Y, HipDevice::Wave32, HipDevice::Wave64, ARCH_ID) -#define DISPATCH_GUARD_BODY \ - ROCWMMA_SWITCH_BODY9_ARG1(deviceArch, \ - SWITCH_BODY_WAVE_SIZE, \ - HipDevice::GFX908, \ - HipDevice::GFX90A, \ - HipDevice::GFX942, \ - HipDevice::GFX950, \ - HipDevice::GFX1100, \ - HipDevice::GFX1101, \ - HipDevice::GFX1102, \ - HipDevice::GFX1200, \ - HipDevice::GFX1201) +#define DISPATCH_GUARD_BODY \ + ROCWMMA_SWITCH_BODY10_ARG1(deviceArch, \ + SWITCH_BODY_WAVE_SIZE, \ + HipDevice::GFX908, \ + HipDevice::GFX90A, \ + HipDevice::GFX942, \ + HipDevice::GFX950, \ + HipDevice::GFX1100, \ + HipDevice::GFX1101, \ + HipDevice::GFX1102, \ + HipDevice::GFX1151, \ + HipDevice::GFX1200, \ + HipDevice::GFX1201) DISPATCH_GUARD_BODY @@ -156,18 +157,19 @@ namespace rocwmma ROCWMMA_SWITCH_BODY2_ARG2( \ waveSize, SWITCH_BODY_TBLOCK_Y, HipDevice::Wave32, HipDevice::Wave64, ARCH_ID) -#define DISPATCH_KERNEL_FUNC_BODY \ - ROCWMMA_SWITCH_BODY9_ARG1(deviceArch, \ - SWITCH_BODY_WAVE_SIZE, \ - HipDevice::GFX908, \ - HipDevice::GFX90A, \ - HipDevice::GFX942, \ - HipDevice::GFX950, \ - HipDevice::GFX1100, \ - HipDevice::GFX1101, \ - HipDevice::GFX1102, \ - HipDevice::GFX1200, \ - HipDevice::GFX1201) +#define DISPATCH_KERNEL_FUNC_BODY \ + ROCWMMA_SWITCH_BODY10_ARG1(deviceArch, \ + SWITCH_BODY_WAVE_SIZE, \ + HipDevice::GFX908, \ + HipDevice::GFX90A, \ + HipDevice::GFX942, \ + HipDevice::GFX950, \ + HipDevice::GFX1100, \ + HipDevice::GFX1101, \ + HipDevice::GFX1102, \ + HipDevice::GFX1151, \ + HipDevice::GFX1200, \ + HipDevice::GFX1201) DISPATCH_KERNEL_FUNC_BODY diff --git a/test/gemm/gemm_test_traits.hpp b/test/gemm/gemm_test_traits.hpp index 874f13438..0df88e1ef 100644 --- a/test/gemm/gemm_test_traits.hpp +++ b/test/gemm/gemm_test_traits.hpp @@ -103,12 +103,13 @@ namespace rocwmma IsGfx1100 = (ArchId == Constants::AMDGCN_ARCH_ID_GFX1100), IsGfx1101 = (ArchId == Constants::AMDGCN_ARCH_ID_GFX1101), IsGfx1102 = (ArchId == Constants::AMDGCN_ARCH_ID_GFX1102), + IsGfx1151 = (ArchId == Constants::AMDGCN_ARCH_ID_GFX1151), IsGfx1200 = (ArchId == Constants::AMDGCN_ARCH_ID_GFX1200), IsGfx1201 = (ArchId == Constants::AMDGCN_ARCH_ID_GFX1201), IsGfx9 = IsGfx908 || IsGfx90A || IsGfx942 || IsGfx950, - IsGfx11 = IsGfx1100 || IsGfx1101 || IsGfx1102, + IsGfx11 = IsGfx1100 || IsGfx1101 || IsGfx1102 || IsGfx1151, IsGfx12 = IsGfx1200 || IsGfx1201, }; diff --git a/test/hip_device.cpp b/test/hip_device.cpp index 145fa9893..bcb61791c 100644 --- a/test/hip_device.cpp +++ b/test/hip_device.cpp @@ -73,6 +73,10 @@ namespace rocwmma { mGcnArch = hipGcnArch_t::GFX1102; } + else if(deviceName.find("gfx1151") != std::string::npos) + { + mGcnArch = hipGcnArch_t::GFX1151; + } else if(deviceName.find("gfx1200") != std::string::npos) { mGcnArch = hipGcnArch_t::GFX1200; diff --git a/test/hip_device.hpp b/test/hip_device.hpp index b15b53871..efca01414 100644 --- a/test/hip_device.hpp +++ b/test/hip_device.hpp @@ -53,6 +53,7 @@ namespace rocwmma GFX1100 = Constants::AMDGCN_ARCH_ID_GFX1100, GFX1101 = Constants::AMDGCN_ARCH_ID_GFX1101, GFX1102 = Constants::AMDGCN_ARCH_ID_GFX1102, + GFX1151 = Constants::AMDGCN_ARCH_ID_GFX1151, GFX1200 = Constants::AMDGCN_ARCH_ID_GFX1200, GFX1201 = Constants::AMDGCN_ARCH_ID_GFX1201, UNSUPPORTED_ARCH = Constants::AMDGCN_ARCH_ID_NONE, diff --git a/test/unit/contamination_test/detail/load_contamination.hpp b/test/unit/contamination_test/detail/load_contamination.hpp index a8fc75151..a1afe3001 100644 --- a/test/unit/contamination_test/detail/load_contamination.hpp +++ b/test/unit/contamination_test/detail/load_contamination.hpp @@ -121,18 +121,19 @@ namespace rocwmma ROCWMMA_SWITCH_BODY2_ARG2( \ waveSize, CASE_IMPL_ASSIGN2, HipDevice::Wave32, HipDevice::Wave64, ARCH_ID) -#define DISPATCH_GUARD_BODY \ - ROCWMMA_SWITCH_BODY9_ARG1(deviceArch, \ - SWITCH_BODY_WAVE_SIZE, \ - HipDevice::GFX908, \ - HipDevice::GFX90A, \ - HipDevice::GFX942, \ - HipDevice::GFX950, \ - HipDevice::GFX1100, \ - HipDevice::GFX1101, \ - HipDevice::GFX1102, \ - HipDevice::GFX1200, \ - HipDevice::GFX1201) +#define DISPATCH_GUARD_BODY \ + ROCWMMA_SWITCH_BODY10_ARG1(deviceArch, \ + SWITCH_BODY_WAVE_SIZE, \ + HipDevice::GFX908, \ + HipDevice::GFX90A, \ + HipDevice::GFX942, \ + HipDevice::GFX950, \ + HipDevice::GFX1100, \ + HipDevice::GFX1101, \ + HipDevice::GFX1102, \ + HipDevice::GFX1151, \ + HipDevice::GFX1200, \ + HipDevice::GFX1201) DISPATCH_GUARD_BODY diff --git a/test/unit/contamination_test/detail/store_contamination.hpp b/test/unit/contamination_test/detail/store_contamination.hpp index 7b17c77e4..887e9cac1 100644 --- a/test/unit/contamination_test/detail/store_contamination.hpp +++ b/test/unit/contamination_test/detail/store_contamination.hpp @@ -121,18 +121,19 @@ namespace rocwmma ROCWMMA_SWITCH_BODY2_ARG2( \ waveSize, CASE_IMPL_ASSIGN2, HipDevice::Wave32, HipDevice::Wave64, ARCH_ID) -#define DISPATCH_GUARD_BODY \ - ROCWMMA_SWITCH_BODY9_ARG1(deviceArch, \ - SWITCH_BODY_WAVE_SIZE, \ - HipDevice::GFX908, \ - HipDevice::GFX90A, \ - HipDevice::GFX942, \ - HipDevice::GFX950, \ - HipDevice::GFX1100, \ - HipDevice::GFX1101, \ - HipDevice::GFX1102, \ - HipDevice::GFX1200, \ - HipDevice::GFX1201) +#define DISPATCH_GUARD_BODY \ + ROCWMMA_SWITCH_BODY10_ARG1(deviceArch, \ + SWITCH_BODY_WAVE_SIZE, \ + HipDevice::GFX908, \ + HipDevice::GFX90A, \ + HipDevice::GFX942, \ + HipDevice::GFX950, \ + HipDevice::GFX1100, \ + HipDevice::GFX1101, \ + HipDevice::GFX1102, \ + HipDevice::GFX1151, \ + HipDevice::GFX1200, \ + HipDevice::GFX1201) DISPATCH_GUARD_BODY diff --git a/test/unit/fill_fragment_test/detail/fill_fragment.hpp b/test/unit/fill_fragment_test/detail/fill_fragment.hpp index 2c0f407ab..b192845e1 100644 --- a/test/unit/fill_fragment_test/detail/fill_fragment.hpp +++ b/test/unit/fill_fragment_test/detail/fill_fragment.hpp @@ -103,18 +103,19 @@ namespace rocwmma ROCWMMA_SWITCH_BODY2_ARG2( \ waveSize, CASE_IMPL_ASSIGN2, HipDevice::Wave32, HipDevice::Wave64, ARCH_ID) -#define DISPATCH_GUARD_BODY \ - ROCWMMA_SWITCH_BODY9_ARG1(deviceArch, \ - SWITCH_BODY_WAVE_SIZE, \ - HipDevice::GFX908, \ - HipDevice::GFX90A, \ - HipDevice::GFX942, \ - HipDevice::GFX950, \ - HipDevice::GFX1100, \ - HipDevice::GFX1101, \ - HipDevice::GFX1102, \ - HipDevice::GFX1200, \ - HipDevice::GFX1201) +#define DISPATCH_GUARD_BODY \ + ROCWMMA_SWITCH_BODY10_ARG1(deviceArch, \ + SWITCH_BODY_WAVE_SIZE, \ + HipDevice::GFX908, \ + HipDevice::GFX90A, \ + HipDevice::GFX942, \ + HipDevice::GFX950, \ + HipDevice::GFX1100, \ + HipDevice::GFX1101, \ + HipDevice::GFX1102, \ + HipDevice::GFX1151, \ + HipDevice::GFX1200, \ + HipDevice::GFX1201) DISPATCH_GUARD_BODY diff --git a/test/unit/layout_test/detail/col_layout.hpp b/test/unit/layout_test/detail/col_layout.hpp index 1395af770..581970629 100644 --- a/test/unit/layout_test/detail/col_layout.hpp +++ b/test/unit/layout_test/detail/col_layout.hpp @@ -105,18 +105,19 @@ namespace rocwmma ROCWMMA_SWITCH_BODY2_ARG2( \ waveSize, CASE_IMPL_ASSIGN2, HipDevice::Wave32, HipDevice::Wave64, ARCH_ID) -#define DISPATCH_GUARD_BODY \ - ROCWMMA_SWITCH_BODY9_ARG1(deviceArch, \ - SWITCH_BODY_WAVE_SIZE, \ - HipDevice::GFX908, \ - HipDevice::GFX90A, \ - HipDevice::GFX942, \ - HipDevice::GFX950, \ - HipDevice::GFX1100, \ - HipDevice::GFX1101, \ - HipDevice::GFX1102, \ - HipDevice::GFX1200, \ - HipDevice::GFX1201) +#define DISPATCH_GUARD_BODY \ + ROCWMMA_SWITCH_BODY10_ARG1(deviceArch, \ + SWITCH_BODY_WAVE_SIZE, \ + HipDevice::GFX908, \ + HipDevice::GFX90A, \ + HipDevice::GFX942, \ + HipDevice::GFX950, \ + HipDevice::GFX1100, \ + HipDevice::GFX1101, \ + HipDevice::GFX1102, \ + HipDevice::GFX1151, \ + HipDevice::GFX1200, \ + HipDevice::GFX1201) DISPATCH_GUARD_BODY diff --git a/test/unit/layout_test/detail/colnt_layout.hpp b/test/unit/layout_test/detail/colnt_layout.hpp index a5dae6022..c706b86ff 100644 --- a/test/unit/layout_test/detail/colnt_layout.hpp +++ b/test/unit/layout_test/detail/colnt_layout.hpp @@ -105,18 +105,19 @@ namespace rocwmma ROCWMMA_SWITCH_BODY2_ARG2( \ waveSize, CASE_IMPL_ASSIGN2, HipDevice::Wave32, HipDevice::Wave64, ARCH_ID) -#define DISPATCH_GUARD_BODY \ - ROCWMMA_SWITCH_BODY9_ARG1(deviceArch, \ - SWITCH_BODY_WAVE_SIZE, \ - HipDevice::GFX908, \ - HipDevice::GFX90A, \ - HipDevice::GFX942, \ - HipDevice::GFX950, \ - HipDevice::GFX1100, \ - HipDevice::GFX1101, \ - HipDevice::GFX1102, \ - HipDevice::GFX1200, \ - HipDevice::GFX1201) +#define DISPATCH_GUARD_BODY \ + ROCWMMA_SWITCH_BODY10_ARG1(deviceArch, \ + SWITCH_BODY_WAVE_SIZE, \ + HipDevice::GFX908, \ + HipDevice::GFX90A, \ + HipDevice::GFX942, \ + HipDevice::GFX950, \ + HipDevice::GFX1100, \ + HipDevice::GFX1101, \ + HipDevice::GFX1102, \ + HipDevice::GFX1151, \ + HipDevice::GFX1200, \ + HipDevice::GFX1201) DISPATCH_GUARD_BODY diff --git a/test/unit/layout_test/detail/row_layout.hpp b/test/unit/layout_test/detail/row_layout.hpp index 2670dc469..6d403f007 100644 --- a/test/unit/layout_test/detail/row_layout.hpp +++ b/test/unit/layout_test/detail/row_layout.hpp @@ -98,18 +98,19 @@ namespace rocwmma ROCWMMA_SWITCH_BODY2_ARG2( \ waveSize, CASE_IMPL_ASSIGN2, HipDevice::Wave32, HipDevice::Wave64, ARCH_ID) -#define DISPATCH_GUARD_BODY \ - ROCWMMA_SWITCH_BODY9_ARG1(deviceArch, \ - SWITCH_BODY_WAVE_SIZE, \ - HipDevice::GFX908, \ - HipDevice::GFX90A, \ - HipDevice::GFX942, \ - HipDevice::GFX950, \ - HipDevice::GFX1100, \ - HipDevice::GFX1101, \ - HipDevice::GFX1102, \ - HipDevice::GFX1200, \ - HipDevice::GFX1201) +#define DISPATCH_GUARD_BODY \ + ROCWMMA_SWITCH_BODY10_ARG1(deviceArch, \ + SWITCH_BODY_WAVE_SIZE, \ + HipDevice::GFX908, \ + HipDevice::GFX90A, \ + HipDevice::GFX942, \ + HipDevice::GFX950, \ + HipDevice::GFX1100, \ + HipDevice::GFX1101, \ + HipDevice::GFX1102, \ + HipDevice::GFX1151, \ + HipDevice::GFX1200, \ + HipDevice::GFX1201) DISPATCH_GUARD_BODY diff --git a/test/unit/layout_test/detail/rownt_layout.hpp b/test/unit/layout_test/detail/rownt_layout.hpp index a378c01cc..45abb8106 100644 --- a/test/unit/layout_test/detail/rownt_layout.hpp +++ b/test/unit/layout_test/detail/rownt_layout.hpp @@ -105,18 +105,19 @@ namespace rocwmma ROCWMMA_SWITCH_BODY2_ARG2( \ waveSize, CASE_IMPL_ASSIGN2, HipDevice::Wave32, HipDevice::Wave64, ARCH_ID) -#define DISPATCH_GUARD_BODY \ - ROCWMMA_SWITCH_BODY9_ARG1(deviceArch, \ - SWITCH_BODY_WAVE_SIZE, \ - HipDevice::GFX908, \ - HipDevice::GFX90A, \ - HipDevice::GFX942, \ - HipDevice::GFX950, \ - HipDevice::GFX1100, \ - HipDevice::GFX1101, \ - HipDevice::GFX1102, \ - HipDevice::GFX1200, \ - HipDevice::GFX1201) +#define DISPATCH_GUARD_BODY \ + ROCWMMA_SWITCH_BODY10_ARG1(deviceArch, \ + SWITCH_BODY_WAVE_SIZE, \ + HipDevice::GFX908, \ + HipDevice::GFX90A, \ + HipDevice::GFX942, \ + HipDevice::GFX950, \ + HipDevice::GFX1100, \ + HipDevice::GFX1101, \ + HipDevice::GFX1102, \ + HipDevice::GFX1151, \ + HipDevice::GFX1200, \ + HipDevice::GFX1201) DISPATCH_GUARD_BODY diff --git a/test/unit/load_store_matrix_sync_test/detail/load_store_matrix_sync.hpp b/test/unit/load_store_matrix_sync_test/detail/load_store_matrix_sync.hpp index 7aeae30ea..20417e5f7 100644 --- a/test/unit/load_store_matrix_sync_test/detail/load_store_matrix_sync.hpp +++ b/test/unit/load_store_matrix_sync_test/detail/load_store_matrix_sync.hpp @@ -98,18 +98,19 @@ namespace rocwmma ROCWMMA_SWITCH_BODY2_ARG2( \ waveSize, CASE_IMPL_ASSIGN2, HipDevice::Wave32, HipDevice::Wave64, ARCH_ID) -#define DISPATCH_GUARD_BODY \ - ROCWMMA_SWITCH_BODY9_ARG1(deviceArch, \ - SWITCH_BODY_WAVE_SIZE, \ - HipDevice::GFX908, \ - HipDevice::GFX90A, \ - HipDevice::GFX942, \ - HipDevice::GFX950, \ - HipDevice::GFX1100, \ - HipDevice::GFX1101, \ - HipDevice::GFX1102, \ - HipDevice::GFX1200, \ - HipDevice::GFX1201) +#define DISPATCH_GUARD_BODY \ + ROCWMMA_SWITCH_BODY10_ARG1(deviceArch, \ + SWITCH_BODY_WAVE_SIZE, \ + HipDevice::GFX908, \ + HipDevice::GFX90A, \ + HipDevice::GFX942, \ + HipDevice::GFX950, \ + HipDevice::GFX1100, \ + HipDevice::GFX1101, \ + HipDevice::GFX1102, \ + HipDevice::GFX1151, \ + HipDevice::GFX1200, \ + HipDevice::GFX1201) DISPATCH_GUARD_BODY diff --git a/test/unit/map_util_test/detail/map_block_to_matrix_override.hpp b/test/unit/map_util_test/detail/map_block_to_matrix_override.hpp index 9858088bd..988440f07 100644 --- a/test/unit/map_util_test/detail/map_block_to_matrix_override.hpp +++ b/test/unit/map_util_test/detail/map_block_to_matrix_override.hpp @@ -181,18 +181,19 @@ namespace rocwmma ROCWMMA_SWITCH_BODY2_ARG2( \ waveSize, CASE_IMPL_ASSIGN2, HipDevice::Wave32, HipDevice::Wave64, ARCH_ID) -#define DISPATCH_GUARD_BODY \ - ROCWMMA_SWITCH_BODY9_ARG1(deviceArch, \ - SWITCH_BODY_WAVE_SIZE, \ - HipDevice::GFX908, \ - HipDevice::GFX90A, \ - HipDevice::GFX942, \ - HipDevice::GFX950, \ - HipDevice::GFX1100, \ - HipDevice::GFX1101, \ - HipDevice::GFX1102, \ - HipDevice::GFX1200, \ - HipDevice::GFX1201) +#define DISPATCH_GUARD_BODY \ + ROCWMMA_SWITCH_BODY10_ARG1(deviceArch, \ + SWITCH_BODY_WAVE_SIZE, \ + HipDevice::GFX908, \ + HipDevice::GFX90A, \ + HipDevice::GFX942, \ + HipDevice::GFX950, \ + HipDevice::GFX1100, \ + HipDevice::GFX1101, \ + HipDevice::GFX1102, \ + HipDevice::GFX1151, \ + HipDevice::GFX1200, \ + HipDevice::GFX1201) DISPATCH_GUARD_BODY diff --git a/test/unit/map_util_test/detail/map_matrix_to_data_override.hpp b/test/unit/map_util_test/detail/map_matrix_to_data_override.hpp index 48a8a4227..e17c0b5bb 100644 --- a/test/unit/map_util_test/detail/map_matrix_to_data_override.hpp +++ b/test/unit/map_util_test/detail/map_matrix_to_data_override.hpp @@ -182,18 +182,19 @@ namespace rocwmma ROCWMMA_SWITCH_BODY2_ARG2( \ waveSize, CASE_IMPL_ASSIGN2, HipDevice::Wave32, HipDevice::Wave64, ARCH_ID) -#define DISPATCH_GUARD_BODY \ - ROCWMMA_SWITCH_BODY9_ARG1(deviceArch, \ - SWITCH_BODY_WAVE_SIZE, \ - HipDevice::GFX908, \ - HipDevice::GFX90A, \ - HipDevice::GFX942, \ - HipDevice::GFX950, \ - HipDevice::GFX1100, \ - HipDevice::GFX1101, \ - HipDevice::GFX1102, \ - HipDevice::GFX1200, \ - HipDevice::GFX1201) +#define DISPATCH_GUARD_BODY \ + ROCWMMA_SWITCH_BODY10_ARG1(deviceArch, \ + SWITCH_BODY_WAVE_SIZE, \ + HipDevice::GFX908, \ + HipDevice::GFX90A, \ + HipDevice::GFX942, \ + HipDevice::GFX950, \ + HipDevice::GFX1100, \ + HipDevice::GFX1101, \ + HipDevice::GFX1102, \ + HipDevice::GFX1151, \ + HipDevice::GFX1200, \ + HipDevice::GFX1201) DISPATCH_GUARD_BODY From 6dafe44daf31b515935667d6409eb0e855274dcc Mon Sep 17 00:00:00 2001 From: Chris Millette Date: Fri, 9 May 2025 15:25:16 -0500 Subject: [PATCH 2/2] Add more gfx1151 test cases --- test/dlrm/dlrm_kernel_base_impl.hpp | 3 +- .../detail/cross_lane_ops.hpp | 15 +++---- .../detail/layout_traits.hpp | 25 ++++++------ .../detail/layout_traits_int.hpp | 39 ++++++++++--------- test/unit/unit_test_traits.hpp | 3 +- 5 files changed, 45 insertions(+), 40 deletions(-) diff --git a/test/dlrm/dlrm_kernel_base_impl.hpp b/test/dlrm/dlrm_kernel_base_impl.hpp index 7d0ba907f..ee33fca77 100644 --- a/test/dlrm/dlrm_kernel_base_impl.hpp +++ b/test/dlrm/dlrm_kernel_base_impl.hpp @@ -108,7 +108,8 @@ namespace rocwmma // Arch auto isGfx908 = deviceArch == DeviceInfo::GFX908; auto isGfx11 = (deviceArch == DeviceInfo::GFX1100) || (deviceArch == DeviceInfo::GFX1101) - || (deviceArch == DeviceInfo::GFX1102); + || (deviceArch == DeviceInfo::GFX1102) + || (deviceArch == DeviceInfo::GFX1151); auto isGfx12 = (deviceArch == DeviceInfo::GFX1200) || (deviceArch == DeviceInfo::GFX1201); diff --git a/test/unit/cross_lane_ops_test/detail/cross_lane_ops.hpp b/test/unit/cross_lane_ops_test/detail/cross_lane_ops.hpp index 34e06e975..b0bf32b59 100644 --- a/test/unit/cross_lane_ops_test/detail/cross_lane_ops.hpp +++ b/test/unit/cross_lane_ops_test/detail/cross_lane_ops.hpp @@ -75,7 +75,8 @@ namespace rocwmma // gfx11 doesn't support dpp wave shift / rotate, bcast15x16 or bcast31x32 bool isGfx11 = (deviceArch == Base::DeviceInfo::GFX1100 || deviceArch == Base::DeviceInfo::GFX1101 - || deviceArch == Base::DeviceInfo::GFX1102); + || deviceArch == Base::DeviceInfo::GFX1102 + || deviceArch == Base::DeviceInfo::GFX1151); bool isGfx12 = (deviceArch == Base::DeviceInfo::GFX1200) || (deviceArch == Base::DeviceInfo::GFX1201); @@ -253,12 +254,12 @@ namespace rocwmma // Map GTest params to Kernel params using TestParamsT = std::tuple; using KernelT = DppOpsKernel< - std::tuple_element_t, // DataT - std::tuple_element_t, // CrossLaneOp - std::tuple_element_t::value, // WriteRowMask - std::tuple_element_t::value, // WriteBankMask - std::tuple_element_t::value // BoundCtrl - >; + std::tuple_element_t, // DataT + std::tuple_element_t, // CrossLaneOp + std::tuple_element_t::value, // WriteRowMask + std::tuple_element_t::value, // WriteBankMask + std::tuple_element_t::value // BoundCtrl + >; return std::make_shared(); } diff --git a/test/unit/layout_traits_test/detail/layout_traits.hpp b/test/unit/layout_traits_test/detail/layout_traits.hpp index bd319286c..84110442d 100644 --- a/test/unit/layout_traits_test/detail/layout_traits.hpp +++ b/test/unit/layout_traits_test/detail/layout_traits.hpp @@ -88,18 +88,19 @@ namespace rocwmma ROCWMMA_SWITCH_BODY2_ARG2( \ waveSize, CASE_IMPL_ASSIGN2, HipDevice::Wave32, HipDevice::Wave64, ARCH_ID) -#define DISPATCH_GUARD_BODY \ - ROCWMMA_SWITCH_BODY9_ARG1(deviceArch, \ - SWITCH_BODY_WAVE_SIZE, \ - HipDevice::GFX908, \ - HipDevice::GFX90A, \ - HipDevice::GFX942, \ - HipDevice::GFX950, \ - HipDevice::GFX1100, \ - HipDevice::GFX1101, \ - HipDevice::GFX1102, \ - HipDevice::GFX1200, \ - HipDevice::GFX1201) +#define DISPATCH_GUARD_BODY \ + ROCWMMA_SWITCH_BODY10_ARG1(deviceArch, \ + SWITCH_BODY_WAVE_SIZE, \ + HipDevice::GFX908, \ + HipDevice::GFX90A, \ + HipDevice::GFX942, \ + HipDevice::GFX950, \ + HipDevice::GFX1100, \ + HipDevice::GFX1101, \ + HipDevice::GFX1102, \ + HipDevice::GFX1151, \ + HipDevice::GFX1200, \ + HipDevice::GFX1201) DISPATCH_GUARD_BODY diff --git a/test/unit/layout_traits_test/detail/layout_traits_int.hpp b/test/unit/layout_traits_test/detail/layout_traits_int.hpp index 747af1fd3..2e4e1ac4e 100644 --- a/test/unit/layout_traits_test/detail/layout_traits_int.hpp +++ b/test/unit/layout_traits_test/detail/layout_traits_int.hpp @@ -93,18 +93,19 @@ namespace rocwmma ROCWMMA_SWITCH_BODY2_ARG2( \ waveSize, CASE_IMPL_ASSIGN2, HipDevice::Wave32, HipDevice::Wave64, ARCH_ID) -#define DISPATCH_GUARD_BODY \ - ROCWMMA_SWITCH_BODY9_ARG1(deviceArch, \ - SWITCH_BODY_WAVE_SIZE, \ - HipDevice::GFX908, \ - HipDevice::GFX90A, \ - HipDevice::GFX942, \ - HipDevice::GFX950, \ - HipDevice::GFX1100, \ - HipDevice::GFX1101, \ - HipDevice::GFX1102, \ - HipDevice::GFX1200, \ - HipDevice::GFX1201) +#define DISPATCH_GUARD_BODY \ + ROCWMMA_SWITCH_BODY10_ARG1(deviceArch, \ + SWITCH_BODY_WAVE_SIZE, \ + HipDevice::GFX908, \ + HipDevice::GFX90A, \ + HipDevice::GFX942, \ + HipDevice::GFX950, \ + HipDevice::GFX1100, \ + HipDevice::GFX1101, \ + HipDevice::GFX1102, \ + HipDevice::GFX1151, \ + HipDevice::GFX1200, \ + HipDevice::GFX1201) DISPATCH_GUARD_BODY @@ -147,13 +148,13 @@ namespace rocwmma // Map GTest params to Kernel params using TestParamsT = std::tuple; using KernelT = LayoutTraitsIntKernel< - std::tuple_element_t::value, // BlockM - std::tuple_element_t::value, // BlockN - std::tuple_element_t, // DataT - std::tuple_element_t, // DataLayout - std::tuple_element_t::value, // MmaDim - std::tuple_element_t::value // SplitK - >; + std::tuple_element_t::value, // BlockM + std::tuple_element_t::value, // BlockN + std::tuple_element_t, // DataT + std::tuple_element_t, // DataLayout + std::tuple_element_t::value, // MmaDim + std::tuple_element_t::value // SplitK + >; return std::make_shared(); } diff --git a/test/unit/unit_test_traits.hpp b/test/unit/unit_test_traits.hpp index bc63160ef..fce378e40 100644 --- a/test/unit/unit_test_traits.hpp +++ b/test/unit/unit_test_traits.hpp @@ -77,11 +77,12 @@ namespace rocwmma IsGfx1100 = (ArchId == Constants::AMDGCN_ARCH_ID_GFX1100), IsGfx1101 = (ArchId == Constants::AMDGCN_ARCH_ID_GFX1101), IsGfx1102 = (ArchId == Constants::AMDGCN_ARCH_ID_GFX1102), + IsGfx1151 = (ArchId == Constants::AMDGCN_ARCH_ID_GFX1151), IsGfx1200 = (ArchId == Constants::AMDGCN_ARCH_ID_GFX1200), IsGfx1201 = (ArchId == Constants::AMDGCN_ARCH_ID_GFX1201), IsGfx9 = IsGfx908 || IsGfx90A || IsGfx942 || IsGfx950, - IsGfx11 = IsGfx1100 || IsGfx1101 || IsGfx1102, + IsGfx11 = IsGfx1100 || IsGfx1101 || IsGfx1102 || IsGfx1151, IsGfx12 = IsGfx1200 || IsGfx1201, }; };