From 07b19ee56e3051873bcb8679cea4d5e0cc186b33 Mon Sep 17 00:00:00 2001 From: Balint Cristian Date: Tue, 10 Oct 2023 21:54:29 +0300 Subject: [PATCH] [Target][CI] Add LLVM functions for current system info --- python/tvm/target/codegen.py | 42 +++++++++ python/tvm/testing/utils.py | 89 ++++++------------- src/target/llvm/llvm_module.cc | 25 ++++++ tests/python/contrib/test_amx.py | 13 ++- tests/python/contrib/test_gemm_acc32_vnni.py | 4 +- .../python/integration/test_auto_tensorize.py | 12 +-- tests/python/relay/test_op_level1.py | 4 +- tests/python/relay/test_op_level10.py | 4 +- tests/python/relay/test_op_level2.py | 4 +- .../python/target/test_llvm_features_info.py | 6 ++ .../test_meta_schedule_cpu_dot_product.py | 8 +- 11 files changed, 128 insertions(+), 83 deletions(-) diff --git a/python/tvm/target/codegen.py b/python/tvm/target/codegen.py index 1a2efd4efaff..b2a92c2ca21b 100644 --- a/python/tvm/target/codegen.py +++ b/python/tvm/target/codegen.py @@ -96,6 +96,48 @@ def llvm_get_intrinsic_name(intrin_id: int) -> str: return _ffi_api.llvm_get_intrinsic_name(intrin_id) +def llvm_get_system_x86_vendor(): + """Get system x86 vendor info. + + Parameters + ---------- + + Returns + ------- + vendor : str + The current system's cpu vendor. + """ + return _ffi_api.llvm_get_system_x86_vendor() + + +def llvm_get_system_triple(): + """Get system host triple. + + Parameters + ---------- + + Returns + ------- + triple : str + The current system's triple. + """ + return _ffi_api.llvm_get_system_triple() + + +def llvm_get_system_cpu(): + """Get system host cpu name. + + Parameters + ---------- + + Returns + ------- + cpu_name : str + The current system's cpu name. + """ + return _ffi_api.llvm_get_system_cpu() + + def llvm_get_targets(): """Get LLVM target list. diff --git a/python/tvm/testing/utils.py b/python/tvm/testing/utils.py index 7817ddcb0189..7c8b0e883c71 100644 --- a/python/tvm/testing/utils.py +++ b/python/tvm/testing/utils.py @@ -77,7 +77,6 @@ def test_something(): import textwrap import time import shutil -import subprocess from pathlib import Path from typing import Optional, Callable, Union, List, Tuple @@ -91,6 +90,7 @@ def test_something(): import tvm.te import tvm._ffi +from tvm.target import codegen from tvm.contrib import nvcc, cudnn, rocm import tvm.contrib.hexagon._ci_env_check as hexagon from tvm.driver.tvmc.frontends import load_model @@ -1002,76 +1002,43 @@ def _corstone300_compile_time_check(): requires_vitis_ai = Feature("vitis_ai", "Vitis AI", cmake_flag="USE_VITIS_AI") -def _arm_dot_supported(): - arch = platform.machine() +# check cpu features +def _has_cpu_feat(features): + cpu = codegen.llvm_get_system_cpu() + triple = codegen.llvm_get_system_triple() + target = "llvm -mtriple=%s -mcpu=%s" % (triple, cpu) + has_feat = codegen.target_has_features(features, tvm.target.Target(target)) - if arch not in ["arm64", "aarch64"]: - return False + return has_feat - if sys.platform.startswith("darwin"): - cpu_info = subprocess.check_output("sysctl -a", shell=True).strip().decode() - for line in cpu_info.split("\n"): - if line.startswith("hw.optional.arm.FEAT_DotProd"): - return bool(int(line.split(":", 1)[1])) - elif sys.platform.startswith("linux"): - return True - return False - - -def _is_intel(): - # Only linux is supported for now. - if sys.platform.startswith("linux"): - with open("/proc/cpuinfo", "r") as content: - return "Intel" in content.read() - - return False - - -def _has_vnni(): - arch = platform.machine() - # Only linux is supported for now. - if arch == "x86_64" and sys.platform.startswith("linux"): - with open("/proc/cpuinfo", "r") as content: - return "avx512_vnni" in content.read() - - return False - - -# check avx512 intrinsic groups for SkyLake X -def _has_slavx512(): - # Check LLVM support - llvm_version = tvm.target.codegen.llvm_version_major() - is_llvm_support = llvm_version >= 8 - arch = platform.machine() - # Only linux is supported for now. - if arch == "x86_64" and sys.platform.startswith("linux"): - with open("/proc/cpuinfo", "r") as content: - ctx = content.read() - check = ( - "avx512f" in ctx - and "avx512cd" in ctx - and "avx512bw" in ctx - and "avx512dq" in ctx - and "avx512vl" in ctx - ) - return check and is_llvm_support - - return False +requires_arm_dot = Feature( + "arm_dot", + "ARM dot product", + run_time_check=lambda: _has_cpu_feat("dotprod"), +) -requires_arm_dot = Feature("arm_dot", "ARM dot product", run_time_check=_arm_dot_supported) +requires_x86_vnni = Feature( + "x86_vnni", + "x86 VNNI Extensions", + run_time_check=lambda: (_has_cpu_feat("avx512vnni") or _has_cpu_feat("avxvnni")), +) -requires_cascadelake = Feature( - "cascadelake", "x86 CascadeLake", run_time_check=lambda: _has_vnni() and _is_intel() +requires_x86_avx512 = Feature( + "x86_avx512", + "x86 AVX512 Extensions", + run_time_check=lambda: _has_cpu_feat( + ["avx512bw", "avx512cd", "avx512dq", "avx512vl", "avx512f"] + ), ) -requires_skylake_avx512 = Feature( - "skylake_avx512", - "x86 SkyLake AVX512", - run_time_check=lambda: _has_slavx512() and _is_intel(), +requires_x86_amx = Feature( + "x86_amx", + "x86 AMX Extensions", + run_time_check=lambda: _has_cpu_feat("amx-int8"), ) diff --git a/src/target/llvm/llvm_module.cc b/src/target/llvm/llvm_module.cc index 05a7df230f61..7878121410a2 100644 --- a/src/target/llvm/llvm_module.cc +++ b/src/target/llvm/llvm_module.cc @@ -41,6 +41,7 @@ #include #include #include +#include #include #include #include @@ -485,6 +486,30 @@ TVM_REGISTER_GLOBAL("target.llvm_get_intrinsic_name").set_body_typed([](int64_t #endif }); +TVM_REGISTER_GLOBAL("target.llvm_get_system_x86_vendor").set_body_typed([]() -> String { +#if TVM_LLVM_VERSION >= 120 +#if defined(__i386__) || defined(_M_IX86) || defined(__x86_64__) || defined(_M_X64) + using namespace llvm::sys::detail::x86; + const auto x86_sign = getVendorSignature(); + if (x86_sign == VendorSignatures::GENUINE_INTEL) + return "intel"; + else if (x86_sign == VendorSignatures::AUTHENTIC_AMD) + return "amd"; + else if (x86_sign == VendorSignatures::UNKNOWN) + return "unknown"; +#endif +#endif + return "unimplemented"; +}); + +TVM_REGISTER_GLOBAL("target.llvm_get_system_triple").set_body_typed([]() -> String { + return llvm::sys::getDefaultTargetTriple(); +}); + +TVM_REGISTER_GLOBAL("target.llvm_get_system_cpu").set_body_typed([]() -> String { + return llvm::sys::getHostCPUName().str(); +}); + TVM_REGISTER_GLOBAL("target.llvm_get_targets").set_body_typed([]() -> Array { auto llvm_instance = std::make_unique(); LLVMTargetInfo llvm_backend(*llvm_instance, "llvm"); diff --git a/tests/python/contrib/test_amx.py b/tests/python/contrib/test_amx.py index 30da7e56fb8d..cd4f62cd62f6 100644 --- a/tests/python/contrib/test_amx.py +++ b/tests/python/contrib/test_amx.py @@ -27,8 +27,13 @@ import pytest -@tvm.testing.requires_llvm -@pytest.mark.skip("skip due to AMX feature not avaliable yet") +has_amx_runtime = pytest.mark.skipif( + not tvm.get_global_func("runtime.amx_init", True), reason="AMX runtime not available" +) + + +@has_amx_runtime +@tvm.testing.requires_x86_amx def test_amx_u8s8s32_matmul_tensorize(): m = 1024 k = 1024 @@ -113,8 +118,8 @@ def test_amx_u8s8s32_matmul_tensorize(): tvm.testing.assert_allclose(y.numpy(), np.dot(a.astype("int32"), b.T.astype("int32")), rtol=0) -@tvm.testing.requires_llvm -@pytest.mark.skip("skip due to AMX feature not avaliable yet") +@has_amx_runtime +@tvm.testing.requires_x86_amx def test_amx_check_support(): amx_init = tvm.get_global_func("runtime.amx_init") amx_tileconfig = tvm.get_global_func("runtime.amx_tileconfig") diff --git a/tests/python/contrib/test_gemm_acc32_vnni.py b/tests/python/contrib/test_gemm_acc32_vnni.py index c01f7758cb45..2e15d38612ce 100644 --- a/tests/python/contrib/test_gemm_acc32_vnni.py +++ b/tests/python/contrib/test_gemm_acc32_vnni.py @@ -97,7 +97,7 @@ def verify_fc_int8_acc32(m=1024, n=1024, k=1024, target="llvm -mcpu=cascadelake" # t_func.export_library("tensorize_acc32.o") -@tvm.testing.requires_cascadelake +@tvm.testing.requires_x86_vnni def test_fc_int8_acc32_vnni(): # For LLVM < 8.0, it shows "'cascadelake' is not a recognized processor for this target # (ignoring processor)" error with the following setting. After LLVM 8.0 is enabled in the @@ -105,7 +105,7 @@ def test_fc_int8_acc32_vnni(): verify_fc_int8_acc32() -@tvm.testing.requires_skylake_avx512 +@tvm.testing.requires_x86_avx512 def test_fc_int8_acc32_avx512(): verify_fc_int8_acc32(target="llvm -mcpu=skylake-avx512") diff --git a/tests/python/integration/test_auto_tensorize.py b/tests/python/integration/test_auto_tensorize.py index 8900a1ff6c8a..7831e5c8d72d 100644 --- a/tests/python/integration/test_auto_tensorize.py +++ b/tests/python/integration/test_auto_tensorize.py @@ -287,12 +287,12 @@ def _test_bert_int8(relay_mod, params, input_info, target, sch_rules, postprocs) print(runtime.benchmark(dev, number=1, repeat=50).mean) -@tvm.testing.requires_cascadelake +@tvm.testing.requires_x86_vnni def test_vnni_dense(): _test_dense("uint8", SCH_RULES_FOR_VNNI, POSTPROCS_FOR_VNNI, CASCADELAKE_VNNI_TARGET) -@tvm.testing.requires_skylake_avx512 +@tvm.testing.requires_x86_avx512 def test_avx512_dense(): _test_dense("uint8", SCH_RULES_FOR_AVX512, POSTPROCS_FOR_VNNI, SKYLAKE_AVX512_TARGET) @@ -310,12 +310,12 @@ def test_dp4a_dense(): # ) -@tvm.testing.requires_cascadelake +@tvm.testing.requires_x86_vnni def test_vnni_conv2d(): _test_conv2d("uint8", SCH_RULES_FOR_VNNI, POSTPROCS_FOR_VNNI, CASCADELAKE_VNNI_TARGET) -@tvm.testing.requires_skylake_avx512 +@tvm.testing.requires_x86_avx512 def test_avx512_conv2d(): _test_conv2d("uint8", SCH_RULES_FOR_AVX512, POSTPROCS_FOR_VNNI, SKYLAKE_AVX512_TARGET) @@ -333,7 +333,7 @@ def test_dp4a_conv2d(): # ) -@tvm.testing.requires_cascadelake +@tvm.testing.requires_x86_vnni @pytest.mark.skipif(tvm.testing.IS_IN_CI, reason="Slow on CI") def test_vnni_bert_int8(): pytest.importorskip("onnx") @@ -348,7 +348,7 @@ def test_vnni_bert_int8(): ) -@tvm.testing.requires_skylake_avx512 +@tvm.testing.requires_x86_avx512 @pytest.mark.skip("Due to quantized BERT download issue") def test_avx512_bert_int8(): relay_mod, params, input_info = load_quantized_bert_base() diff --git a/tests/python/relay/test_op_level1.py b/tests/python/relay/test_op_level1.py index e7def019239b..ca8ffda9ba59 100644 --- a/tests/python/relay/test_op_level1.py +++ b/tests/python/relay/test_op_level1.py @@ -846,13 +846,13 @@ def test_dense_amx_int8(): np.testing.assert_equal(out, ref) -@tvm.testing.requires_cascadelake +@tvm.testing.requires_x86_vnni @pytest.mark.parametrize("m,n,k", [(32, 128, 96), (32, 128, 97)]) def test_dense_vnni(m, n, k): dense_x86_test(m, n, k) -@tvm.testing.requires_skylake_avx512 +@tvm.testing.requires_x86_avx512 @pytest.mark.parametrize("m,n,k", [(32, 128, 96), (32, 128, 97)]) def test_dense_skylake_avx512(m, n, k): dense_x86_test(m, n, k, "llvm -mcpu=skylake-avx512", ["pmaddubs", "pmaddw", "vpaddd"]) diff --git a/tests/python/relay/test_op_level10.py b/tests/python/relay/test_op_level10.py index 9db1bcf78b2a..6036f707126b 100644 --- a/tests/python/relay/test_op_level10.py +++ b/tests/python/relay/test_op_level10.py @@ -568,7 +568,7 @@ def test_batch_matmul_amx(b, m, n, k): np.testing.assert_equal(out, ref) -@tvm.testing.requires_cascadelake +@tvm.testing.requires_x86_vnni @pytest.mark.parametrize( "b,m,n,k", [ @@ -581,7 +581,7 @@ def test_batch_matmul_vnni(b, m, n, k): batch_matmul_x86_test(b, m, n, k) -@tvm.testing.requires_skylake_avx512 +@tvm.testing.requires_x86_avx512 @pytest.mark.parametrize( "b,m,n,k", [ diff --git a/tests/python/relay/test_op_level2.py b/tests/python/relay/test_op_level2.py index bd984d32e6bd..cb785021783d 100644 --- a/tests/python/relay/test_op_level2.py +++ b/tests/python/relay/test_op_level2.py @@ -2237,12 +2237,12 @@ def test_conv2d_int8_alter_dtype_arm(): ) -@tvm.testing.requires_cascadelake +@tvm.testing.requires_x86_vnni def test_conv2d_int8_alter_dtype_vnni(): _test_conv2d_int8_alter_dtype("int8", "llvm -mcpu=cascadelake", ["vpdpbusd"]) -@tvm.testing.requires_skylake_avx512 +@tvm.testing.requires_x86_avx512 def test_conv2d_int8_alter_dtype_avx512(): _test_conv2d_int8_alter_dtype( "int8", "llvm -mcpu=skylake-avx512", ["pmaddubs", "pmaddw", "vpaddd"] diff --git a/tests/python/target/test_llvm_features_info.py b/tests/python/target/test_llvm_features_info.py index 1be71331dda8..edcbc891c90d 100644 --- a/tests/python/target/test_llvm_features_info.py +++ b/tests/python/target/test_llvm_features_info.py @@ -30,7 +30,13 @@ def test_llvm_targets(): # check blank results assert len(codegen.llvm_get_targets()) + assert len(codegen.llvm_get_system_cpu()) + assert len(codegen.llvm_get_system_triple()) + assert len(codegen.llvm_get_system_x86_vendor()) # check ffi vs python + assert codegen.llvm_get_system_cpu() == _ffi_api.llvm_get_system_cpu() + assert codegen.llvm_get_system_triple() == _ffi_api.llvm_get_system_triple() + assert codegen.llvm_get_system_x86_vendor() == _ffi_api.llvm_get_system_x86_vendor() assert str(codegen.llvm_get_targets()) == str(_ffi_api.llvm_get_targets()) # check LLVM target -mcpu legality diff --git a/tests/python/unittest/test_meta_schedule_cpu_dot_product.py b/tests/python/unittest/test_meta_schedule_cpu_dot_product.py index 6dc72d69336f..592c772a04dd 100644 --- a/tests/python/unittest/test_meta_schedule_cpu_dot_product.py +++ b/tests/python/unittest/test_meta_schedule_cpu_dot_product.py @@ -165,13 +165,13 @@ def schedule_16x4_dense_fn_database(target, intrin, m=1024, n=1024, k=1024): f_check(lib, dev) -@tvm.testing.requires_cascadelake +@tvm.testing.requires_x86_vnni def test_vnni_schedule_fn_database(): target = tvm.target.Target("llvm -keys=x86,cpu -mcpu=cascadelake -num-cores=4") schedule_16x4_dense_fn_database(target, VNNI_INTRIN) -@tvm.testing.requires_skylake_avx512 +@tvm.testing.requires_x86_avx512 def test_avx512_schedule_fn_database(): target = tvm.target.Target("llvm -keys=x86,cpu -mcpu=skylake-avx512 -num-cores=4") schedule_16x4_dense_fn_database(target, AVX512_INTRIN, 16, 16, 16) @@ -255,13 +255,13 @@ def schedule_rule_dense_16x4(sch: Schedule, dense_block: BlockRV): f_check(lib, dev) -@tvm.testing.requires_cascadelake +@tvm.testing.requires_x86_vnni def test_vnni_schedule_fn_tune(): target = tvm.target.Target("llvm -keys=x86,cpu -mcpu=cascadelake -num-cores=4") schedule_16x4_dense_fn_tune(target, VNNI_INTRIN) -@tvm.testing.requires_skylake_avx512 +@tvm.testing.requires_x86_avx512 def test_avx512_schedule_fn_tune(): target = tvm.target.Target("llvm -keys=x86,cpu -mcpu=skylake-avx512 -num-cores=4") schedule_16x4_dense_fn_tune(target, AVX512_INTRIN, 16, 16, 16)