Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
f05d85f
add asm bf16_gemm to gemm tuner
yzhou103 Oct 13, 2025
8acb336
mdf_fileName (#1160)
amd-ruitang3 Oct 11, 2025
678d717
Tuning Utility update (#1102)
valarLip Oct 12, 2025
4c8f6bc
[MI35X] Fix meta mha 950 error to make guard return bool func itself …
ZhangLirong-amd Oct 12, 2025
af44cf6
Migrate tests to mi325-1gpu runner due to the slow network issues on …
gyohuangxin Oct 13, 2025
0c8e10e
Temporarily using mi355 runners util issues on mi325 runners are fixe…
gyohuangxin Oct 13, 2025
0bbad4d
Update sglang_downstream.yaml to use aiter-mi325-1gpu runner (#1181)
gyohuangxin Oct 13, 2025
b82f22c
CI: Add Aiter Release Package CI (#1171)
gyohuangxin Oct 13, 2025
6b049c8
Merge branch 'main' into tune_gemm_op_bf16
yzhou103 Oct 14, 2025
cf1d28e
update gemmTuner
yzhou103 Oct 14, 2025
2180a72
update tuned_gemm.py
yzhou103 Oct 14, 2025
8336cb6
fix lint error
yzhou103 Oct 14, 2025
de39b82
update asm maxSplitK and tuned_gemm.csv
yzhou103 Oct 14, 2025
07f5bf2
Add AITER_CONFIG_GEMM_BF16 and profile_file params to save all temp r…
yzhou103 Oct 15, 2025
e4e2420
fix error
yzhou103 Oct 15, 2025
97d8c0d
Merge branch 'main' into tune_gemm_op_bf16
yzhou103 Oct 15, 2025
244a924
add padded_m logic and update gemm_common
yzhou103 Oct 16, 2025
20b7e0c
Merge branch 'main' into tune_gemm_op_bf16
yzhou103 Oct 16, 2025
1a81657
refactor get tuned config in tuned_gemm
yzhou103 Oct 17, 2025
deecc1f
fix lint err
yzhou103 Oct 17, 2025
68f3341
wrapper the get_gemm_a16w16_config using torch_compile_guard
yzhou103 Oct 17, 2025
bd0703b
fix lint error
yzhou103 Oct 17, 2025
ed1310d
refactor tuned_gemm.py
yzhou103 Oct 17, 2025
d14ae9c
wrapper gemm_a16w16 with torch_compile_guard
yzhou103 Oct 17, 2025
ba4ff41
fix lint error
yzhou103 Oct 17, 2025
27b039b
add gen_fake func
yzhou103 Oct 17, 2025
d752ca0
Merge branch 'main' into tune_gemm_op_bf16
yzhou103 Oct 17, 2025
c5735c5
update
valarLip Oct 17, 2025
57fbebb
updae GemmTuner --profile_file
yzhou103 Oct 18, 2025
d2c05d4
Merge branch 'main' into tune_gemm_op_bf16
yzhou103 Oct 18, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 9 additions & 11 deletions aiter/configs/tuned_gemm.csv
Original file line number Diff line number Diff line change
@@ -1,11 +1,9 @@
M,N,K,bias,dtype,outdtype,scaleAB,cu_num,libtype,solidx,soltimes,kernelName,tflops,bw
64,256,5120,False,torch.bfloat16,torch.float32,False,80,asm,2,0,_ZN5aiter28bf16gemm_outf32_tn_64x64_pf3E,0,0
80,256,5120,False,torch.bfloat16,torch.float32,False,80,asm,3,0,_ZN5aiter28bf16gemm_outf32_tn_96x64_pf3E,0,0
128,256,5120,False,torch.bfloat16,torch.float32,False,80,asm,2,0,_ZN5aiter28bf16gemm_outf32_tn_64x64_pf3E,0,0
150,256,5120,False,torch.bfloat16,torch.float32,False,80,asm,0,0,_ZN5aiter28bf16gemm_outf32_tn_32x64_pf3E,0,0
192,256,5120,False,torch.bfloat16,torch.float32,False,80,asm,3,0,_ZN5aiter28bf16gemm_outf32_tn_96x64_pf3E,0,0
220,256,5120,False,torch.bfloat16,torch.float32,False,80,asm,0,0,_ZN5aiter28bf16gemm_outf32_tn_32x64_pf3E,0,0
256,256,5120,False,torch.bfloat16,torch.float32,False,80,asm,2,0,_ZN5aiter28bf16gemm_outf32_tn_64x64_pf3E,0,0
384,256,5120,False,torch.bfloat16,torch.float32,False,80,asm,3,0,_ZN5aiter28bf16gemm_outf32_tn_96x64_pf3E,0,0
448,256,5120,False,torch.bfloat16,torch.float32,False,80,asm,0,0,_ZN5aiter28bf16gemm_outf32_tn_32x64_pf3E,0,0
512,256,5120,False,torch.bfloat16,torch.float32,False,80,asm,2,0,_ZN5aiter28bf16gemm_outf32_tn_64x64_pf3E,0,0
M,N,K,bias,dtype,outdtype,scaleAB,cu_num,libtype,solidx,splitK,soltimes,kernelName,err_ratio,tflops,bw
64,256,5120,False,torch.bfloat16,torch.float32,False,80,asm,1,10,8.3,_ZN5aiter28bf16gemm_outf32_tn_32x64_pf3E,0.0,20.21,402.69
80,256,5120,False,torch.bfloat16,torch.float32,False,80,asm,2,10,8.8,_ZN5aiter28bf16gemm_outf32_tn_48x64_pf3E,0.0,23.83,400.29
128,256,5120,False,torch.bfloat16,torch.float32,False,80,asm,3,10,11.2,_ZN5aiter28bf16gemm_outf32_tn_64x64_pf3E,0.0,29.96,362.79
150,256,5120,False,torch.bfloat16,torch.float32,False,80,asm,2,5,11.7,_ZN5aiter28bf16gemm_outf32_tn_48x64_pf3E,0.0,33.61,368.46
192,256,5120,False,torch.bfloat16,torch.float32,False,80,asm,5,10,12.8,_ZN5aiter28bf16gemm_outf32_tn_96x64_pf3E,0.0,39.32,373.76
220,256,5120,False,torch.bfloat16,torch.float32,False,80,asm,3,5,13.1,_ZN5aiter28bf16gemm_outf32_tn_64x64_pf3E,0.0,44.02,389.28
256,256,5120,False,torch.bfloat16,torch.float32,False,80,asm,3,5,14.4,_ZN5aiter28bf16gemm_outf32_tn_64x64_pf3E,0.0,46.6,382.29
384,256,5120,False,torch.bfloat16,torch.float32,False,80,asm,5,5,19.7,_ZN5aiter28bf16gemm_outf32_tn_96x64_pf3E,0.0,51.1,352.63
10 changes: 10 additions & 0 deletions aiter/configs/untuned_gemm.csv
Original file line number Diff line number Diff line change
@@ -1 +1,11 @@
M,N,K,bias,dtype,outdtype,scaleAB
64,256,5120,False,torch.bfloat16,torch.float32,False
80,256,5120,False,torch.bfloat16,torch.float32,False
128,256,5120,False,torch.bfloat16,torch.float32,False
150,256,5120,False,torch.bfloat16,torch.float32,False
192,256,5120,False,torch.bfloat16,torch.float32,False
220,256,5120,False,torch.bfloat16,torch.float32,False
256,256,5120,False,torch.bfloat16,torch.float32,False
384,256,5120,False,torch.bfloat16,torch.float32,False
448,256,5120,False,torch.bfloat16,torch.float32,False
512,256,5120,False,torch.bfloat16,torch.float32,False
8 changes: 8 additions & 0 deletions aiter/jit/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,11 @@ def mp_lock(
"AITER_CONFIG_BATCHED_GEMM_BF16",
f"{AITER_ROOT_DIR}/aiter/configs/bf16_tuned_batched_gemm.csv",
)

AITER_CONFIG_GEMM_BF16 = os.getenv(
"AITER_CONFIG_GEMM_BF16",
f"{AITER_ROOT_DIR}/aiter/configs/tuned_gemm.csv",
)
## merge config files
##example: AITER_CONFIG_GEMM_A4W4="/path1:/path2"
import pandas as pd
Expand Down Expand Up @@ -156,6 +161,9 @@ def update_config_files(file_path: str, merge_name: str):
AITER_CONFIG_BF16_BATCHED_GEMM_FILE = update_config_files(
AITER_CONFIG_BF16_BATCHED_GEMM, "bf16_tuned_batched_gemm"
)
AITER_CONFIG_GEMM_BF16_FILE = update_config_files(
AITER_CONFIG_GEMM_BF16, "bf16_tuned_gemm"
)
# config_env end here

find_aiter = importlib.util.find_spec("aiter")
Expand Down
4 changes: 2 additions & 2 deletions aiter/ops/gemm_op_a4w4.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,8 +93,8 @@ def gemm_a4w4(
splitK = ck_config.get("splitK", None)
kernelName = ck_config["kernelName"]
if (
m < 256
or (ck_config is not None and kernelName.find("_ZN") == -1)
ck_config is not None
and kernelName.find("_ZN") == -1
# or bias is None
):
splitK = 0 if splitK is None else splitK
Expand Down
Loading