apache · masahi · Oct 12, 2023 · Oct 10, 2023
diff --git a/python/tvm/target/codegen.py b/python/tvm/target/codegen.py
@@ -96,6 +96,48 @@ def llvm_get_intrinsic_name(intrin_id: int) -> str:
     return _ffi_api.llvm_get_intrinsic_name(intrin_id)
 
 
+def llvm_get_system_x86_vendor():
+    """Get system x86 vendor info.
+
+    Parameters
+    ----------
+
+    Returns
+    -------
+    vendor : str
+        The current system's cpu vendor.
+    """
+    return _ffi_api.llvm_get_system_x86_vendor()
+
+
+def llvm_get_system_triple():
+    """Get system host triple.
+
+    Parameters
+    ----------
+
+    Returns
+    -------
+    triple : str
+        The current system's triple.
+    """
+    return _ffi_api.llvm_get_system_triple()
+
+
+def llvm_get_system_cpu():
+    """Get system host cpu name.
+
+    Parameters
+    ----------
+
+    Returns
+    -------
+    cpu_name : str
+        The current system's cpu name.
+    """
+    return _ffi_api.llvm_get_system_cpu()
+
+
 def llvm_get_targets():
     """Get LLVM target list.
 

diff --git a/python/tvm/testing/utils.py b/python/tvm/testing/utils.py
@@ -77,7 +77,6 @@ def test_something():
 import textwrap
 import time
 import shutil
-import subprocess
 
 from pathlib import Path
 from typing import Optional, Callable, Union, List, Tuple
@@ -91,6 +90,7 @@ def test_something():
 import tvm.te
 import tvm._ffi
 
+from tvm.target import codegen
 from tvm.contrib import nvcc, cudnn, rocm
 import tvm.contrib.hexagon._ci_env_check as hexagon
 from tvm.driver.tvmc.frontends import load_model
@@ -1002,76 +1002,43 @@ def _corstone300_compile_time_check():
 requires_vitis_ai = Feature("vitis_ai", "Vitis AI", cmake_flag="USE_VITIS_AI")
 
 
-def _arm_dot_supported():
-    arch = platform.machine()
+# check cpu features
+def _has_cpu_feat(features):
+    cpu = codegen.llvm_get_system_cpu()
+    triple = codegen.llvm_get_system_triple()
+    target = "llvm -mtriple=%s -mcpu=%s" % (triple, cpu)
+    has_feat = codegen.target_has_features(features, tvm.target.Target(target))
 
-    if arch not in ["arm64", "aarch64"]:
-        return False
+    return has_feat
 
-    if sys.platform.startswith("darwin"):
-        cpu_info = subprocess.check_output("sysctl -a", shell=True).strip().decode()
-        for line in cpu_info.split("\n"):
-            if line.startswith("hw.optional.arm.FEAT_DotProd"):
-                return bool(int(line.split(":", 1)[1]))
-    elif sys.platform.startswith("linux"):
-        return True
 
-    return False
-
-
-def _is_intel():
-    # Only linux is supported for now.
-    if sys.platform.startswith("linux"):
-        with open("/proc/cpuinfo", "r") as content:
-            return "Intel" in content.read()
-
-    return False
-
-
-def _has_vnni():
-    arch = platform.machine()
-    # Only linux is supported for now.
-    if arch == "x86_64" and sys.platform.startswith("linux"):
-        with open("/proc/cpuinfo", "r") as content:
-            return "avx512_vnni" in content.read()
-
-    return False
-
-
-# check avx512 intrinsic groups for SkyLake X
-def _has_slavx512():
-    # Check LLVM support
-    llvm_version = tvm.target.codegen.llvm_version_major()
-    is_llvm_support = llvm_version >= 8
-    arch = platform.machine()
-    # Only linux is supported for now.
-    if arch == "x86_64" and sys.platform.startswith("linux"):
-        with open("/proc/cpuinfo", "r") as content:
-            ctx = content.read()
-            check = (
-                "avx512f" in ctx
-                and "avx512cd" in ctx
-                and "avx512bw" in ctx
-                and "avx512dq" in ctx
-                and "avx512vl" in ctx
-            )
-            return check and is_llvm_support
-
-    return False
+requires_arm_dot = Feature(
+    "arm_dot",
+    "ARM dot product",
+    run_time_check=lambda: _has_cpu_feat("dotprod"),
+)
 
 
-requires_arm_dot = Feature("arm_dot", "ARM dot product", run_time_check=_arm_dot_supported)
+requires_x86_vnni = Feature(
+    "x86_vnni",
+    "x86 VNNI Extensions",
+    run_time_check=lambda: (_has_cpu_feat("avx512vnni") or _has_cpu_feat("avxvnni")),
+)
 
 
-requires_cascadelake = Feature(
-    "cascadelake", "x86 CascadeLake", run_time_check=lambda: _has_vnni() and _is_intel()
+requires_x86_avx512 = Feature(
+    "x86_avx512",
+    "x86 AVX512 Extensions",
+    run_time_check=lambda: _has_cpu_feat(
+        ["avx512bw", "avx512cd", "avx512dq", "avx512vl", "avx512f"]
+    ),
 )
 
 
-requires_skylake_avx512 = Feature(
-    "skylake_avx512",
-    "x86 SkyLake AVX512",
-    run_time_check=lambda: _has_slavx512() and _is_intel(),
+requires_x86_amx = Feature(
+    "x86_amx",
+    "x86 AMX Extensions",
+    run_time_check=lambda: _has_cpu_feat("amx-int8"),
 )
 
 

diff --git a/src/target/llvm/llvm_module.cc b/src/target/llvm/llvm_module.cc
@@ -41,6 +41,7 @@
 #include <llvm/IR/Module.h>
 #include <llvm/IRReader/IRReader.h>
 #include <llvm/Support/FileSystem.h>
+#include <llvm/Support/Host.h>
 #include <llvm/Support/SourceMgr.h>
 #include <llvm/Support/raw_ostream.h>
 #include <llvm/Target/TargetMachine.h>
@@ -485,6 +486,30 @@ TVM_REGISTER_GLOBAL("target.llvm_get_intrinsic_name").set_body_typed([](int64_t
 #endif
 });
 
+TVM_REGISTER_GLOBAL("target.llvm_get_system_x86_vendor").set_body_typed([]() -> String {
+#if TVM_LLVM_VERSION >= 120
+#if defined(__i386__) || defined(_M_IX86) || defined(__x86_64__) || defined(_M_X64)
+  using namespace llvm::sys::detail::x86;
+  const auto x86_sign = getVendorSignature();
+  if (x86_sign == VendorSignatures::GENUINE_INTEL)
+    return "intel";
+  else if (x86_sign == VendorSignatures::AUTHENTIC_AMD)
+    return "amd";
+  else if (x86_sign == VendorSignatures::UNKNOWN)
+    return "unknown";
+#endif
+#endif
+  return "unimplemented";
+});
+
+TVM_REGISTER_GLOBAL("target.llvm_get_system_triple").set_body_typed([]() -> String {
+  return llvm::sys::getDefaultTargetTriple();
+});
+
+TVM_REGISTER_GLOBAL("target.llvm_get_system_cpu").set_body_typed([]() -> String {
+  return llvm::sys::getHostCPUName().str();
+});
+
 TVM_REGISTER_GLOBAL("target.llvm_get_targets").set_body_typed([]() -> Array<String> {
   auto llvm_instance = std::make_unique<LLVMInstance>();
   LLVMTargetInfo llvm_backend(*llvm_instance, "llvm");

diff --git a/tests/python/contrib/test_amx.py b/tests/python/contrib/test_amx.py
@@ -27,8 +27,13 @@
 import pytest
 
 
-@tvm.testing.requires_llvm
-@pytest.mark.skip("skip due to AMX feature not avaliable yet")
+has_amx_runtime = pytest.mark.skipif(
+    not tvm.get_global_func("runtime.amx_init", True), reason="AMX runtime not available"
+)
+
+
+@has_amx_runtime
+@tvm.testing.requires_x86_amx
 def test_amx_u8s8s32_matmul_tensorize():
     m = 1024
     k = 1024
@@ -113,8 +118,8 @@ def test_amx_u8s8s32_matmul_tensorize():
     tvm.testing.assert_allclose(y.numpy(), np.dot(a.astype("int32"), b.T.astype("int32")), rtol=0)
 
 
-@tvm.testing.requires_llvm
-@pytest.mark.skip("skip due to AMX feature not avaliable yet")
+@has_amx_runtime
+@tvm.testing.requires_x86_amx
 def test_amx_check_support():
     amx_init = tvm.get_global_func("runtime.amx_init")
     amx_tileconfig = tvm.get_global_func("runtime.amx_tileconfig")

diff --git a/tests/python/contrib/test_gemm_acc32_vnni.py b/tests/python/contrib/test_gemm_acc32_vnni.py
@@ -97,15 +97,15 @@ def verify_fc_int8_acc32(m=1024, n=1024, k=1024, target="llvm -mcpu=cascadelake"
     # t_func.export_library("tensorize_acc32.o")
 
 
-@tvm.testing.requires_cascadelake
+@tvm.testing.requires_x86_vnni
 def test_fc_int8_acc32_vnni():
     # For LLVM < 8.0, it shows "'cascadelake' is not a recognized processor for this target
     # (ignoring processor)" error with the following setting. After LLVM 8.0 is enabled in the
     # test, we should use cascadelake setting.
     verify_fc_int8_acc32()
 
 
-@tvm.testing.requires_skylake_avx512
+@tvm.testing.requires_x86_avx512
 def test_fc_int8_acc32_avx512():
     verify_fc_int8_acc32(target="llvm -mcpu=skylake-avx512")
 

diff --git a/tests/python/integration/test_auto_tensorize.py b/tests/python/integration/test_auto_tensorize.py
@@ -287,12 +287,12 @@ def _test_bert_int8(relay_mod, params, input_info, target, sch_rules, postprocs)
     print(runtime.benchmark(dev, number=1, repeat=50).mean)
 
 
-@tvm.testing.requires_cascadelake
+@tvm.testing.requires_x86_vnni
 def test_vnni_dense():
     _test_dense("uint8", SCH_RULES_FOR_VNNI, POSTPROCS_FOR_VNNI, CASCADELAKE_VNNI_TARGET)
 
 
-@tvm.testing.requires_skylake_avx512
+@tvm.testing.requires_x86_avx512
 def test_avx512_dense():
     _test_dense("uint8", SCH_RULES_FOR_AVX512, POSTPROCS_FOR_VNNI, SKYLAKE_AVX512_TARGET)
 
@@ -310,12 +310,12 @@ def test_dp4a_dense():
     # )
 
 
-@tvm.testing.requires_cascadelake
+@tvm.testing.requires_x86_vnni
 def test_vnni_conv2d():
     _test_conv2d("uint8", SCH_RULES_FOR_VNNI, POSTPROCS_FOR_VNNI, CASCADELAKE_VNNI_TARGET)
 
 
-@tvm.testing.requires_skylake_avx512
+@tvm.testing.requires_x86_avx512
 def test_avx512_conv2d():
     _test_conv2d("uint8", SCH_RULES_FOR_AVX512, POSTPROCS_FOR_VNNI, SKYLAKE_AVX512_TARGET)
 
@@ -333,7 +333,7 @@ def test_dp4a_conv2d():
     # )
 
 
-@tvm.testing.requires_cascadelake
+@tvm.testing.requires_x86_vnni
 @pytest.mark.skipif(tvm.testing.IS_IN_CI, reason="Slow on CI")
 def test_vnni_bert_int8():
     pytest.importorskip("onnx")
@@ -348,7 +348,7 @@ def test_vnni_bert_int8():
     )
 
 
-@tvm.testing.requires_skylake_avx512
+@tvm.testing.requires_x86_avx512
 @pytest.mark.skip("Due to quantized BERT download issue")
 def test_avx512_bert_int8():
     relay_mod, params, input_info = load_quantized_bert_base()

diff --git a/tests/python/relay/test_op_level1.py b/tests/python/relay/test_op_level1.py
@@ -846,13 +846,13 @@ def test_dense_amx_int8():
         np.testing.assert_equal(out, ref)
 
 
-@tvm.testing.requires_cascadelake
+@tvm.testing.requires_x86_vnni
 @pytest.mark.parametrize("m,n,k", [(32, 128, 96), (32, 128, 97)])
 def test_dense_vnni(m, n, k):
     dense_x86_test(m, n, k)
 
 
-@tvm.testing.requires_skylake_avx512
+@tvm.testing.requires_x86_avx512
 @pytest.mark.parametrize("m,n,k", [(32, 128, 96), (32, 128, 97)])
 def test_dense_skylake_avx512(m, n, k):
     dense_x86_test(m, n, k, "llvm -mcpu=skylake-avx512", ["pmaddubs", "pmaddw", "vpaddd"])

diff --git a/tests/python/relay/test_op_level10.py b/tests/python/relay/test_op_level10.py
@@ -568,7 +568,7 @@ def test_batch_matmul_amx(b, m, n, k):
         np.testing.assert_equal(out, ref)
 
 
-@tvm.testing.requires_cascadelake
+@tvm.testing.requires_x86_vnni
 @pytest.mark.parametrize(
     "b,m,n,k",
     [
@@ -581,7 +581,7 @@ def test_batch_matmul_vnni(b, m, n, k):
     batch_matmul_x86_test(b, m, n, k)
 
 
-@tvm.testing.requires_skylake_avx512
+@tvm.testing.requires_x86_avx512
 @pytest.mark.parametrize(
     "b,m,n,k",
     [

diff --git a/tests/python/relay/test_op_level2.py b/tests/python/relay/test_op_level2.py
@@ -2237,12 +2237,12 @@ def test_conv2d_int8_alter_dtype_arm():
     )
 
 
-@tvm.testing.requires_cascadelake
+@tvm.testing.requires_x86_vnni
 def test_conv2d_int8_alter_dtype_vnni():
     _test_conv2d_int8_alter_dtype("int8", "llvm -mcpu=cascadelake", ["vpdpbusd"])
 
 
-@tvm.testing.requires_skylake_avx512
+@tvm.testing.requires_x86_avx512
 def test_conv2d_int8_alter_dtype_avx512():
     _test_conv2d_int8_alter_dtype(
         "int8", "llvm -mcpu=skylake-avx512", ["pmaddubs", "pmaddw", "vpaddd"]

diff --git a/tests/python/target/test_llvm_features_info.py b/tests/python/target/test_llvm_features_info.py
@@ -30,7 +30,13 @@ def test_llvm_targets():
 
     # check blank results
     assert len(codegen.llvm_get_targets())
+    assert len(codegen.llvm_get_system_cpu())
+    assert len(codegen.llvm_get_system_triple())
+    assert len(codegen.llvm_get_system_x86_vendor())
     # check ffi vs python
+    assert codegen.llvm_get_system_cpu() == _ffi_api.llvm_get_system_cpu()
+    assert codegen.llvm_get_system_triple() == _ffi_api.llvm_get_system_triple()
+    assert codegen.llvm_get_system_x86_vendor() == _ffi_api.llvm_get_system_x86_vendor()
     assert str(codegen.llvm_get_targets()) == str(_ffi_api.llvm_get_targets())
 
     # check LLVM target -mcpu legality

diff --git a/tests/python/unittest/test_meta_schedule_cpu_dot_product.py b/tests/python/unittest/test_meta_schedule_cpu_dot_product.py
@@ -165,13 +165,13 @@ def schedule_16x4_dense_fn_database(target, intrin, m=1024, n=1024, k=1024):
     f_check(lib, dev)
 
 
-@tvm.testing.requires_cascadelake
+@tvm.testing.requires_x86_vnni
 def test_vnni_schedule_fn_database():
     target = tvm.target.Target("llvm -keys=x86,cpu -mcpu=cascadelake -num-cores=4")
     schedule_16x4_dense_fn_database(target, VNNI_INTRIN)
 
 
-@tvm.testing.requires_skylake_avx512
+@tvm.testing.requires_x86_avx512
 def test_avx512_schedule_fn_database():
     target = tvm.target.Target("llvm -keys=x86,cpu -mcpu=skylake-avx512 -num-cores=4")
     schedule_16x4_dense_fn_database(target, AVX512_INTRIN, 16, 16, 16)
@@ -255,13 +255,13 @@ def schedule_rule_dense_16x4(sch: Schedule, dense_block: BlockRV):
     f_check(lib, dev)
 
 
-@tvm.testing.requires_cascadelake
+@tvm.testing.requires_x86_vnni
 def test_vnni_schedule_fn_tune():
     target = tvm.target.Target("llvm -keys=x86,cpu -mcpu=cascadelake -num-cores=4")
     schedule_16x4_dense_fn_tune(target, VNNI_INTRIN)
 
 
-@tvm.testing.requires_skylake_avx512
+@tvm.testing.requires_x86_avx512
 def test_avx512_schedule_fn_tune():
     target = tvm.target.Target("llvm -keys=x86,cpu -mcpu=skylake-avx512 -num-cores=4")
     schedule_16x4_dense_fn_tune(target, AVX512_INTRIN, 16, 16, 16)