diff --git a/.github/scripts/op_tune.sh b/.github/scripts/op_tune.sh index 68e17d1cc4..30f10b8f7b 100755 --- a/.github/scripts/op_tune.sh +++ b/.github/scripts/op_tune.sh @@ -18,11 +18,11 @@ testFailedFiles=() declare -a tune_jobs=( "ck_batched_gemm_a8w8:csrc/ck_batched_gemm_a8w8:op_tests/test_batched_gemm_a8w8.py:python3 csrc/ck_batched_gemm_a8w8/batched_gemm_a8w8_tune.py -i aiter/configs/a8w8_untuned_batched_gemm.csv -o aiter/configs/a8w8_tuned_batched_gemm.csv" "ck_batched_gemm_bf16:csrc/ck_batched_gemm_bf16:op_tests/test_batched_gemm_bf16.py:python3 csrc/ck_batched_gemm_bf16/batched_gemm_bf16_tune.py -i aiter/configs/bf16_untuned_batched_gemm.csv -o aiter/configs/bf16_tuned_batched_gemm.csv" -# "csrc/ck_gemm_a4w4_blockscale:op_tests/test_gemm_a4w4_blockscale.py:python3 csrc/ck_gemm_a4w4_blockscale/gemm_a4w4_blockscale_tune.py -i aiter/configs/a4w4_blockscale_untuned_gemm.csv -o aiter/configs/a4w4_blockscale_tuned_gemm.csv" "ck_gemm_a8w8:csrc/ck_gemm_a8w8:op_tests/test_gemm_a8w8.py:python3 csrc/ck_gemm_a8w8/gemm_a8w8_tune.py -i aiter/configs/a8w8_untuned_gemm.csv -o aiter/configs/a8w8_tuned_gemm.csv" "ck_gemm_a8w8_blockscale:csrc/ck_gemm_a8w8_blockscale:op_tests/test_gemm_a8w8_blockscale.py:python3 csrc/ck_gemm_a8w8_blockscale/gemm_a8w8_blockscale_tune.py -i aiter/configs/a8w8_blockscale_untuned_gemm.csv -o aiter/configs/a8w8_blockscale_tuned_gemm.csv" "ck_gemm_a8w8_blockscale_bpreshuffle:csrc/ck_gemm_a8w8_blockscale_bpreshuffle:op_tests/test_gemm_a8w8_blockscale.py:python3 csrc/ck_gemm_a8w8_blockscale_bpreshuffle/gemm_a8w8_blockscale_bpreshuffle_tune.py -i aiter/configs/a8w8_blockscale_bpreshuffle_untuned_gemm.csv -o aiter/configs/a8w8_blockscale_bpreshuffle_tuned_gemm.csv" "ck_gemm_a8w8_bpreshuffle:csrc/ck_gemm_a8w8_bpreshuffle:op_tests/test_gemm_a8w8.py:python3 csrc/ck_gemm_a8w8_bpreshuffle/gemm_a8w8_bpreshuffle_tune.py -i aiter/configs/a8w8_bpreshuffle_untuned_gemm.csv -o aiter/configs/a8w8_bpreshuffle_tuned_gemm.csv" + #"ck_gemm_a4w4_blockscale:csrc/ck_gemm_a4w4_blockscale:op_tests/test_gemm_a4w4_blockscale.py:python3 csrc/ck_gemm_a4w4_blockscale/gemm_a4w4_blockscale_tune.py -i aiter/configs/a4w4_blockscale_untuned_gemm.csv -o aiter/configs/a4w4_blockscale_tuned_gemm.csv" ) for job in "${tune_jobs[@]}"; do diff --git a/.github/workflows/operators-tuning.yaml b/.github/workflows/operators-tuning.yaml index 6563a33b4c..a9a75add45 100644 --- a/.github/workflows/operators-tuning.yaml +++ b/.github/workflows/operators-tuning.yaml @@ -78,7 +78,7 @@ jobs: docker exec \ -w /workspace \ operators_tuning_test \ - ./.github/scripts/op_tune.sh test "ck_batched_gemm_a8w8, ck_gemm_a8w8, ck_gemm_a8w8_blockscale" + ./.github/scripts/op_tune.sh test "${{ github.event.inputs.shapes }}" - name: Operators tuning Tests run: | @@ -87,7 +87,7 @@ jobs: docker exec \ -w /workspace \ operators_tuning_test \ - ./.github/scripts/op_tune.sh tune "ck_batched_gemm_a8w8, ck_gemm_a8w8, ck_gemm_a8w8_blockscale" "${{ github.event.inputs.arguments }}" + ./.github/scripts/op_tune.sh tune "${{ github.event.inputs.shapes }}" "${{ github.event.inputs.arguments }}" - name: Show the difference after tuning run: | @@ -100,7 +100,7 @@ jobs: docker exec \ -w /workspace \ operators_tuning_test \ - ./.github/scripts/op_tune.sh test "ck_batched_gemm_a8w8, ck_gemm_a8w8, ck_gemm_a8w8_blockscale" + ./.github/scripts/op_tune.sh test "${{ github.event.inputs.shapes }}" - name: Upload tuned CSVs uses: actions/upload-artifact@v4 diff --git a/docs/autotuning_pipeline.md b/docs/autotuning_pipeline.md index 45c3416723..2fda6eb9e8 100644 --- a/docs/autotuning_pipeline.md +++ b/docs/autotuning_pipeline.md @@ -14,4 +14,22 @@ Based on this, we provide CI pipelines to generate and use these tuned CSV files - [Manual Pipeline](https://github.com/ROCm/aiter/actions/workflows/operators-tuning.yaml): Allows users to select specific shapes to tune and choose whether to upload the results to the Aiter repository. + 1. Navigate to the Autotuning Pipelines GitHub Actions workflow page: https://github.com/ROCm/aiter/actions/workflows/operators-tuning.yaml + + 2. To trigger the workflow, click the `Run workflow` button at the top right corner of the Actions page. By default, this will run the tuning process for all shapes available in the `aiter/configs` directory. If you wish to tune only specific shapes, enter a comma-separated list of shape names in the `List of shape names to run` field, for example: `ck_gemm_a8w8, ck_gemm_a8w8_blockscale, ck_gemm_a8w8_blockscale_bpreshuffle, ck_gemm_a8w8_bpreshuffle`. If additional arguments are needed for the tuning script, you can provide them in the `Additional arguments for the tuning script` field. A full list of supported arguments can be found in the [base_tuner.py script](https://github.com/ROCm/aiter/blob/main/aiter/utility/base_tuner.py#L70). + + ![Aiter Autotuning CI Pipeline - 1](https://raw.githubusercontent.com/ROCm/aiter/main/docs/images/autotuning_ci_pipeline_1.jpeg) + + 3. During the workflow execution, the following steps will be performed: + - Run performance tests before tuning. + - Execute the tuning process for the selected operators. + - Display the differences in the CSV files after tuning. + - Run performance tests again after tuning to compare results. + - Upload the tuned CSV files as GitHub workflow artifacts. + - You can download the tuned CSV artifacts and upload them to the Aiter repository as needed. + + 4. If you wish to upload your own untuned CSV files, please create a new branch and update the relevant untuned CSV files in the `aiter/configs` directory. Then, trigger the workflow on your branch to proceed with tuning. + + ![Aiter Autotuning CI Pipeline - 2](https://raw.githubusercontent.com/ROCm/aiter/main/docs/images/autotuning_ci_pipeline_2.jpeg) + - Scheduled Pipeline: Runs nightly or weekly to generate all tuned CSV files and automatically upload the results to the Aiter repository. diff --git a/docs/images/autotuning_ci_pipeline_1.jpeg b/docs/images/autotuning_ci_pipeline_1.jpeg new file mode 100644 index 0000000000..91f0a9a6a6 Binary files /dev/null and b/docs/images/autotuning_ci_pipeline_1.jpeg differ diff --git a/docs/images/autotuning_ci_pipeline_2.jpeg b/docs/images/autotuning_ci_pipeline_2.jpeg new file mode 100644 index 0000000000..1dd45f2776 Binary files /dev/null and b/docs/images/autotuning_ci_pipeline_2.jpeg differ