Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 42 additions & 0 deletions python/tvm/target/codegen.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,48 @@ def llvm_get_intrinsic_name(intrin_id: int) -> str:
return _ffi_api.llvm_get_intrinsic_name(intrin_id)


def llvm_get_system_x86_vendor():
"""Get system x86 vendor info.

Parameters
----------

Returns
-------
vendor : str
The current system's cpu vendor.
"""
return _ffi_api.llvm_get_system_x86_vendor()


def llvm_get_system_triple():
"""Get system host triple.

Parameters
----------

Returns
-------
triple : str
The current system's triple.
"""
return _ffi_api.llvm_get_system_triple()


def llvm_get_system_cpu():
"""Get system host cpu name.

Parameters
----------

Returns
-------
cpu_name : str
The current system's cpu name.
"""
return _ffi_api.llvm_get_system_cpu()


def llvm_get_targets():
"""Get LLVM target list.

Expand Down
89 changes: 28 additions & 61 deletions python/tvm/testing/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,6 @@ def test_something():
import textwrap
import time
import shutil
import subprocess

from pathlib import Path
from typing import Optional, Callable, Union, List, Tuple
Expand All @@ -91,6 +90,7 @@ def test_something():
import tvm.te
import tvm._ffi

from tvm.target import codegen
from tvm.contrib import nvcc, cudnn, rocm
import tvm.contrib.hexagon._ci_env_check as hexagon
from tvm.driver.tvmc.frontends import load_model
Expand Down Expand Up @@ -1002,76 +1002,43 @@ def _corstone300_compile_time_check():
requires_vitis_ai = Feature("vitis_ai", "Vitis AI", cmake_flag="USE_VITIS_AI")


def _arm_dot_supported():
arch = platform.machine()
# check cpu features
def _has_cpu_feat(features):
cpu = codegen.llvm_get_system_cpu()
triple = codegen.llvm_get_system_triple()
target = "llvm -mtriple=%s -mcpu=%s" % (triple, cpu)
has_feat = codegen.target_has_features(features, tvm.target.Target(target))

if arch not in ["arm64", "aarch64"]:
return False
return has_feat

if sys.platform.startswith("darwin"):
cpu_info = subprocess.check_output("sysctl -a", shell=True).strip().decode()
for line in cpu_info.split("\n"):
if line.startswith("hw.optional.arm.FEAT_DotProd"):
return bool(int(line.split(":", 1)[1]))
elif sys.platform.startswith("linux"):
return True

return False


def _is_intel():
# Only linux is supported for now.
if sys.platform.startswith("linux"):
with open("/proc/cpuinfo", "r") as content:
return "Intel" in content.read()

return False


def _has_vnni():
arch = platform.machine()
# Only linux is supported for now.
if arch == "x86_64" and sys.platform.startswith("linux"):
with open("/proc/cpuinfo", "r") as content:
return "avx512_vnni" in content.read()

return False


# check avx512 intrinsic groups for SkyLake X
def _has_slavx512():
# Check LLVM support
llvm_version = tvm.target.codegen.llvm_version_major()
is_llvm_support = llvm_version >= 8
arch = platform.machine()
# Only linux is supported for now.
if arch == "x86_64" and sys.platform.startswith("linux"):
with open("/proc/cpuinfo", "r") as content:
ctx = content.read()
check = (
"avx512f" in ctx
and "avx512cd" in ctx
and "avx512bw" in ctx
and "avx512dq" in ctx
and "avx512vl" in ctx
)
return check and is_llvm_support

return False
requires_arm_dot = Feature(
"arm_dot",
"ARM dot product",
run_time_check=lambda: _has_cpu_feat("dotprod"),
)


requires_arm_dot = Feature("arm_dot", "ARM dot product", run_time_check=_arm_dot_supported)
requires_x86_vnni = Feature(
"x86_vnni",
"x86 VNNI Extensions",
run_time_check=lambda: (_has_cpu_feat("avx512vnni") or _has_cpu_feat("avxvnni")),
)


requires_cascadelake = Feature(
"cascadelake", "x86 CascadeLake", run_time_check=lambda: _has_vnni() and _is_intel()
requires_x86_avx512 = Feature(
"x86_avx512",
"x86 AVX512 Extensions",
run_time_check=lambda: _has_cpu_feat(
["avx512bw", "avx512cd", "avx512dq", "avx512vl", "avx512f"]
),
)


requires_skylake_avx512 = Feature(
"skylake_avx512",
"x86 SkyLake AVX512",
run_time_check=lambda: _has_slavx512() and _is_intel(),
requires_x86_amx = Feature(
"x86_amx",
"x86 AMX Extensions",
run_time_check=lambda: _has_cpu_feat("amx-int8"),
)


Expand Down
25 changes: 25 additions & 0 deletions src/target/llvm/llvm_module.cc
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@
#include <llvm/IR/Module.h>
#include <llvm/IRReader/IRReader.h>
#include <llvm/Support/FileSystem.h>
#include <llvm/Support/Host.h>
#include <llvm/Support/SourceMgr.h>
#include <llvm/Support/raw_ostream.h>
#include <llvm/Target/TargetMachine.h>
Expand Down Expand Up @@ -485,6 +486,30 @@ TVM_REGISTER_GLOBAL("target.llvm_get_intrinsic_name").set_body_typed([](int64_t
#endif
});

TVM_REGISTER_GLOBAL("target.llvm_get_system_x86_vendor").set_body_typed([]() -> String {
#if TVM_LLVM_VERSION >= 120
#if defined(__i386__) || defined(_M_IX86) || defined(__x86_64__) || defined(_M_X64)
using namespace llvm::sys::detail::x86;
const auto x86_sign = getVendorSignature();
if (x86_sign == VendorSignatures::GENUINE_INTEL)
return "intel";
else if (x86_sign == VendorSignatures::AUTHENTIC_AMD)
return "amd";
else if (x86_sign == VendorSignatures::UNKNOWN)
return "unknown";
#endif
#endif
return "unimplemented";
});

TVM_REGISTER_GLOBAL("target.llvm_get_system_triple").set_body_typed([]() -> String {
return llvm::sys::getDefaultTargetTriple();
});

TVM_REGISTER_GLOBAL("target.llvm_get_system_cpu").set_body_typed([]() -> String {
return llvm::sys::getHostCPUName().str();
});

TVM_REGISTER_GLOBAL("target.llvm_get_targets").set_body_typed([]() -> Array<String> {
auto llvm_instance = std::make_unique<LLVMInstance>();
LLVMTargetInfo llvm_backend(*llvm_instance, "llvm");
Expand Down
13 changes: 9 additions & 4 deletions tests/python/contrib/test_amx.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,13 @@
import pytest


@tvm.testing.requires_llvm
@pytest.mark.skip("skip due to AMX feature not avaliable yet")
has_amx_runtime = pytest.mark.skipif(
not tvm.get_global_func("runtime.amx_init", True), reason="AMX runtime not available"
)


@has_amx_runtime
@tvm.testing.requires_x86_amx
def test_amx_u8s8s32_matmul_tensorize():
m = 1024
k = 1024
Expand Down Expand Up @@ -113,8 +118,8 @@ def test_amx_u8s8s32_matmul_tensorize():
tvm.testing.assert_allclose(y.numpy(), np.dot(a.astype("int32"), b.T.astype("int32")), rtol=0)


@tvm.testing.requires_llvm
@pytest.mark.skip("skip due to AMX feature not avaliable yet")
@has_amx_runtime
@tvm.testing.requires_x86_amx
def test_amx_check_support():
amx_init = tvm.get_global_func("runtime.amx_init")
amx_tileconfig = tvm.get_global_func("runtime.amx_tileconfig")
Expand Down
4 changes: 2 additions & 2 deletions tests/python/contrib/test_gemm_acc32_vnni.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,15 +97,15 @@ def verify_fc_int8_acc32(m=1024, n=1024, k=1024, target="llvm -mcpu=cascadelake"
# t_func.export_library("tensorize_acc32.o")


@tvm.testing.requires_cascadelake
@tvm.testing.requires_x86_vnni
def test_fc_int8_acc32_vnni():
# For LLVM < 8.0, it shows "'cascadelake' is not a recognized processor for this target
# (ignoring processor)" error with the following setting. After LLVM 8.0 is enabled in the
# test, we should use cascadelake setting.
verify_fc_int8_acc32()


@tvm.testing.requires_skylake_avx512
@tvm.testing.requires_x86_avx512
def test_fc_int8_acc32_avx512():
verify_fc_int8_acc32(target="llvm -mcpu=skylake-avx512")

Expand Down
12 changes: 6 additions & 6 deletions tests/python/integration/test_auto_tensorize.py
Original file line number Diff line number Diff line change
Expand Up @@ -287,12 +287,12 @@ def _test_bert_int8(relay_mod, params, input_info, target, sch_rules, postprocs)
print(runtime.benchmark(dev, number=1, repeat=50).mean)


@tvm.testing.requires_cascadelake
@tvm.testing.requires_x86_vnni
def test_vnni_dense():
_test_dense("uint8", SCH_RULES_FOR_VNNI, POSTPROCS_FOR_VNNI, CASCADELAKE_VNNI_TARGET)


@tvm.testing.requires_skylake_avx512
@tvm.testing.requires_x86_avx512
def test_avx512_dense():
_test_dense("uint8", SCH_RULES_FOR_AVX512, POSTPROCS_FOR_VNNI, SKYLAKE_AVX512_TARGET)

Expand All @@ -310,12 +310,12 @@ def test_dp4a_dense():
# )


@tvm.testing.requires_cascadelake
@tvm.testing.requires_x86_vnni
def test_vnni_conv2d():
_test_conv2d("uint8", SCH_RULES_FOR_VNNI, POSTPROCS_FOR_VNNI, CASCADELAKE_VNNI_TARGET)


@tvm.testing.requires_skylake_avx512
@tvm.testing.requires_x86_avx512
def test_avx512_conv2d():
_test_conv2d("uint8", SCH_RULES_FOR_AVX512, POSTPROCS_FOR_VNNI, SKYLAKE_AVX512_TARGET)

Expand All @@ -333,7 +333,7 @@ def test_dp4a_conv2d():
# )


@tvm.testing.requires_cascadelake
@tvm.testing.requires_x86_vnni
@pytest.mark.skipif(tvm.testing.IS_IN_CI, reason="Slow on CI")
def test_vnni_bert_int8():
pytest.importorskip("onnx")
Expand All @@ -348,7 +348,7 @@ def test_vnni_bert_int8():
)


@tvm.testing.requires_skylake_avx512
@tvm.testing.requires_x86_avx512
@pytest.mark.skip("Due to quantized BERT download issue")
def test_avx512_bert_int8():
relay_mod, params, input_info = load_quantized_bert_base()
Expand Down
4 changes: 2 additions & 2 deletions tests/python/relay/test_op_level1.py
Original file line number Diff line number Diff line change
Expand Up @@ -846,13 +846,13 @@ def test_dense_amx_int8():
np.testing.assert_equal(out, ref)


@tvm.testing.requires_cascadelake
@tvm.testing.requires_x86_vnni
@pytest.mark.parametrize("m,n,k", [(32, 128, 96), (32, 128, 97)])
def test_dense_vnni(m, n, k):
dense_x86_test(m, n, k)


@tvm.testing.requires_skylake_avx512
@tvm.testing.requires_x86_avx512
@pytest.mark.parametrize("m,n,k", [(32, 128, 96), (32, 128, 97)])
def test_dense_skylake_avx512(m, n, k):
dense_x86_test(m, n, k, "llvm -mcpu=skylake-avx512", ["pmaddubs", "pmaddw", "vpaddd"])
Expand Down
4 changes: 2 additions & 2 deletions tests/python/relay/test_op_level10.py
Original file line number Diff line number Diff line change
Expand Up @@ -568,7 +568,7 @@ def test_batch_matmul_amx(b, m, n, k):
np.testing.assert_equal(out, ref)


@tvm.testing.requires_cascadelake
@tvm.testing.requires_x86_vnni
@pytest.mark.parametrize(
"b,m,n,k",
[
Expand All @@ -581,7 +581,7 @@ def test_batch_matmul_vnni(b, m, n, k):
batch_matmul_x86_test(b, m, n, k)


@tvm.testing.requires_skylake_avx512
@tvm.testing.requires_x86_avx512
@pytest.mark.parametrize(
"b,m,n,k",
[
Expand Down
4 changes: 2 additions & 2 deletions tests/python/relay/test_op_level2.py
Original file line number Diff line number Diff line change
Expand Up @@ -2237,12 +2237,12 @@ def test_conv2d_int8_alter_dtype_arm():
)


@tvm.testing.requires_cascadelake
@tvm.testing.requires_x86_vnni
def test_conv2d_int8_alter_dtype_vnni():
_test_conv2d_int8_alter_dtype("int8", "llvm -mcpu=cascadelake", ["vpdpbusd"])


@tvm.testing.requires_skylake_avx512
@tvm.testing.requires_x86_avx512
def test_conv2d_int8_alter_dtype_avx512():
_test_conv2d_int8_alter_dtype(
"int8", "llvm -mcpu=skylake-avx512", ["pmaddubs", "pmaddw", "vpaddd"]
Expand Down
6 changes: 6 additions & 0 deletions tests/python/target/test_llvm_features_info.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,13 @@ def test_llvm_targets():

# check blank results
assert len(codegen.llvm_get_targets())
assert len(codegen.llvm_get_system_cpu())
assert len(codegen.llvm_get_system_triple())
assert len(codegen.llvm_get_system_x86_vendor())
# check ffi vs python
assert codegen.llvm_get_system_cpu() == _ffi_api.llvm_get_system_cpu()
assert codegen.llvm_get_system_triple() == _ffi_api.llvm_get_system_triple()
assert codegen.llvm_get_system_x86_vendor() == _ffi_api.llvm_get_system_x86_vendor()
assert str(codegen.llvm_get_targets()) == str(_ffi_api.llvm_get_targets())

# check LLVM target -mcpu legality
Expand Down
8 changes: 4 additions & 4 deletions tests/python/unittest/test_meta_schedule_cpu_dot_product.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,13 +165,13 @@ def schedule_16x4_dense_fn_database(target, intrin, m=1024, n=1024, k=1024):
f_check(lib, dev)


@tvm.testing.requires_cascadelake
@tvm.testing.requires_x86_vnni
def test_vnni_schedule_fn_database():
target = tvm.target.Target("llvm -keys=x86,cpu -mcpu=cascadelake -num-cores=4")
schedule_16x4_dense_fn_database(target, VNNI_INTRIN)


@tvm.testing.requires_skylake_avx512
@tvm.testing.requires_x86_avx512
def test_avx512_schedule_fn_database():
target = tvm.target.Target("llvm -keys=x86,cpu -mcpu=skylake-avx512 -num-cores=4")
schedule_16x4_dense_fn_database(target, AVX512_INTRIN, 16, 16, 16)
Expand Down Expand Up @@ -255,13 +255,13 @@ def schedule_rule_dense_16x4(sch: Schedule, dense_block: BlockRV):
f_check(lib, dev)


@tvm.testing.requires_cascadelake
@tvm.testing.requires_x86_vnni
def test_vnni_schedule_fn_tune():
target = tvm.target.Target("llvm -keys=x86,cpu -mcpu=cascadelake -num-cores=4")
schedule_16x4_dense_fn_tune(target, VNNI_INTRIN)


@tvm.testing.requires_skylake_avx512
@tvm.testing.requires_x86_avx512
def test_avx512_schedule_fn_tune():
target = tvm.target.Target("llvm -keys=x86,cpu -mcpu=skylake-avx512 -num-cores=4")
schedule_16x4_dense_fn_tune(target, AVX512_INTRIN, 16, 16, 16)
Expand Down