From 1a1060d8c659119b6dc22902278509bf06ddeead Mon Sep 17 00:00:00 2001 From: lalala-sh Date: Mon, 22 Dec 2025 07:59:34 +0000 Subject: [PATCH 1/2] fix bug --- .../ck_gemm_moe_2stages_codegen/gemm_moe_ck2stages_common.cuh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/csrc/ck_gemm_moe_2stages_codegen/gemm_moe_ck2stages_common.cuh b/csrc/ck_gemm_moe_2stages_codegen/gemm_moe_ck2stages_common.cuh index aa7cd67366..80a07dbba7 100644 --- a/csrc/ck_gemm_moe_2stages_codegen/gemm_moe_ck2stages_common.cuh +++ b/csrc/ck_gemm_moe_2stages_codegen/gemm_moe_ck2stages_common.cuh @@ -76,7 +76,7 @@ void ck_moe_stage1_gemm(const hipStream_t& stream, ck::is_same_v ? 1 : NXDLPerWave; // Note: some fp8 instances didn't compile with AK1/BK1=16 static constexpr ck::index_t K1 = - (NPerBlock == 64 && sizeof(A0DataType) == 1 && sizeof(B0DataType) == 1) ? 8 : 16; + (PipelineVer == ck::BlockGemmPipelineVersion::v3 && NPerBlock == 64 && sizeof(A0DataType) == 1 && sizeof(B0DataType) == 1) ? 8 : 16; static constexpr ck::index_t AK1 = K1 / sizeof(A0DataType); static constexpr ck::index_t BK1 = ck::is_same_v ? 32 : K1 / sizeof(B0DataType); static constexpr ck::index_t EVec = 16 / sizeof(EDataType); @@ -262,7 +262,7 @@ void ck_moe_stage2_gemm(const hipStream_t& stream, static constexpr ck::index_t CShuffleMLane = BLOCKSIZE / CShuffleNLane; // Note: some fp8 instances didn't compile with AK1/BK1=16 static constexpr ck::index_t K1 = - (KPerBlock == 64 && sizeof(A0DataType) == 1 && sizeof(B0DataType) == 1) ? 8 : 16; + (PipelineVer == ck::BlockGemmPipelineVersion::v3 && NPerBlock == 64 && sizeof(A0DataType) == 1 && sizeof(B0DataType) == 1) ? 8 : 16; static constexpr ck::index_t AK1 = K1 / sizeof(A0DataType); static constexpr ck::index_t BK1 = ck::is_same_v ? 32 / sizeof(B0DataType) : K1 / sizeof(B0DataType); From 23e15a8aaa2739c3bd957eea4285acbc9c9f23eb Mon Sep 17 00:00:00 2001 From: lalala-sh Date: Mon, 22 Dec 2025 08:55:26 +0000 Subject: [PATCH 2/2] update --- csrc/ck_gemm_moe_2stages_codegen/gemm_moe_ck2stages_common.cuh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/csrc/ck_gemm_moe_2stages_codegen/gemm_moe_ck2stages_common.cuh b/csrc/ck_gemm_moe_2stages_codegen/gemm_moe_ck2stages_common.cuh index 80a07dbba7..705460baed 100644 --- a/csrc/ck_gemm_moe_2stages_codegen/gemm_moe_ck2stages_common.cuh +++ b/csrc/ck_gemm_moe_2stages_codegen/gemm_moe_ck2stages_common.cuh @@ -262,7 +262,7 @@ void ck_moe_stage2_gemm(const hipStream_t& stream, static constexpr ck::index_t CShuffleMLane = BLOCKSIZE / CShuffleNLane; // Note: some fp8 instances didn't compile with AK1/BK1=16 static constexpr ck::index_t K1 = - (PipelineVer == ck::BlockGemmPipelineVersion::v3 && NPerBlock == 64 && sizeof(A0DataType) == 1 && sizeof(B0DataType) == 1) ? 8 : 16; + (KPerBlock == 64 && sizeof(A0DataType) == 1 && sizeof(B0DataType) == 1) ? 8 : 16; static constexpr ck::index_t AK1 = K1 / sizeof(A0DataType); static constexpr ck::index_t BK1 = ck::is_same_v ? 32 / sizeof(B0DataType) : K1 / sizeof(B0DataType);