diff --git a/aiter/configs/a4w4_blockscale_tuned_gemm.csv b/aiter/configs/a4w4_blockscale_tuned_gemm.csv index 2c6aecc440..51a05157f0 100644 --- a/aiter/configs/a4w4_blockscale_tuned_gemm.csv +++ b/aiter/configs/a4w4_blockscale_tuned_gemm.csv @@ -915,3 +915,9 @@ cu_num,M,N,K,kernelId,splitK,us,kernelName,tflops,bw,errRatio 256,3000,7168,256,40,0,17.4695,_ZN5aiter41f4gemm_bf16_per1x32Fp4_BpreShuffle_96x512E,630.24,2536.39,0.0 256,8,2112,7168,21,0,12.8435,_ZN5aiter41f4gemm_bf16_per1x32Fp4_BpreShuffle_32x128E,18.86,594.22,0.0 256,1,2112,7168,21,0,12.3647,_ZN5aiter41f4gemm_bf16_per1x32Fp4_BpreShuffle_32x128E,2.45,612.81,0.0 +256,3000,3072,1536,47,0,15.3407,_ZN5aiter42f4gemm_bf16_per1x32Fp4_BpreShuffle_160x256E,1845.52,1505.49,0.0 +256,3000,7168,2048,50,0,37.8602,_ZN5aiter42f4gemm_bf16_per1x32Fp4_BpreShuffle_192x256E,2326.46,1410.98,0.0 +256,3000,512,7168,29,0,16.2854,_ZN5aiter41f4gemm_bf16_per1x32Fp4_BpreShuffle_64x128E,1352.14,961.54,0.0 +256,8,3072,1536,42,0,5.4682,_ZN5aiter42f4gemm_bf16_per1x32Fp4_BpreShuffle_128x128E,13.81,441.57,0.0 +256,8,7168,2048,29,0,5.836,_ZN5aiter41f4gemm_bf16_per1x32Fp4_BpreShuffle_64x128E,40.25,1278.77,0.0 +256,8,512,7168,29,0,9.6677,_ZN5aiter41f4gemm_bf16_per1x32Fp4_BpreShuffle_64x128E,6.07,193.62,0.0 diff --git a/aiter/configs/a4w4_blockscale_untuned_gemm.csv b/aiter/configs/a4w4_blockscale_untuned_gemm.csv index 6be1afc43f..3c91c37b07 100644 --- a/aiter/configs/a4w4_blockscale_untuned_gemm.csv +++ b/aiter/configs/a4w4_blockscale_untuned_gemm.csv @@ -183,7 +183,13 @@ M,N,K 8192, 8192, 1024 16384, 8192, 1024 1, 2112, 7168 +8, 2112, 7168 +8, 3072, 1536 +8, 7168, 2048 +8, 512, 7168 3000, 2112, 7168 -60000, 4096, 512 3000, 7168, 256 -8, 2112, 7168 +3000, 3072, 1536 +3000, 7168, 2048 +3000, 512, 7168 +60000, 4096, 512