From 15c95d0ba78587b1a1741397440570fab0af4df1 Mon Sep 17 00:00:00 2001 From: John Lawson Date: Mon, 8 Mar 2021 15:05:50 +0000 Subject: [PATCH 1/8] Make declare_intrin_overload return LLVM function --- src/CodeGen_ARM.cpp | 2 ++ src/CodeGen_LLVM.cpp | 7 ++----- src/CodeGen_LLVM.h | 2 +- src/CodeGen_PTX_Dev.cpp | 31 +++++++++++++++++++++++-------- src/CodeGen_PowerPC.cpp | 4 +++- src/CodeGen_WebAssembly.cpp | 4 +++- src/CodeGen_X86.cpp | 4 +++- 7 files changed, 37 insertions(+), 17 deletions(-) diff --git a/src/CodeGen_ARM.cpp b/src/CodeGen_ARM.cpp index e43097b95180..2bbdc925088c 100644 --- a/src/CodeGen_ARM.cpp +++ b/src/CodeGen_ARM.cpp @@ -671,6 +671,8 @@ void CodeGen_ARM::init_module() { intrin_impl = get_llvm_intrin(ret_type, mangled_name, arg_types, scalars_are_vectors); } + intrin_impl->addFnAttr(llvm::Attribute::AttrKind::ReadNone); + intrin_impl->addFnAttr(llvm::Attribute::AttrKind::NoUnwind); declare_intrin_overload(intrin.name, ret_type, intrin_impl, arg_types); if (intrin.flags & ArmIntrinsic::AllowUnsignedOp1) { // Also generate a version of this intrinsic where the second operand is unsigned. diff --git a/src/CodeGen_LLVM.cpp b/src/CodeGen_LLVM.cpp index eda89aca82da..c15166bd00d8 100644 --- a/src/CodeGen_LLVM.cpp +++ b/src/CodeGen_LLVM.cpp @@ -4661,10 +4661,11 @@ llvm::Function *CodeGen_LLVM::get_llvm_intrin(const Type &ret_type, const std::s return get_llvm_intrin(llvm_ret_type, name, llvm_arg_types); } -void CodeGen_LLVM::declare_intrin_overload(const std::string &name, const Type &ret_type, const std::string &impl_name, std::vector arg_types, bool scalars_are_vectors) { +llvm::Function* CodeGen_LLVM::declare_intrin_overload(const std::string &name, const Type &ret_type, const std::string &impl_name, std::vector arg_types, bool scalars_are_vectors) { llvm::Function *intrin = get_llvm_intrin(ret_type, impl_name, arg_types, scalars_are_vectors); internal_assert(intrin); intrinsics[name].emplace_back(ret_type, std::move(arg_types), intrin); + return intrin; } void CodeGen_LLVM::declare_intrin_overload(const std::string &name, const Type &ret_type, llvm::Function *impl, std::vector arg_types) { @@ -4893,10 +4894,6 @@ Value *CodeGen_LLVM::call_intrin(llvm::Type *result_type, int intrin_lanes, } CallInst *call = builder->CreateCall(intrin, arg_values); - - call->setDoesNotAccessMemory(); - call->setDoesNotThrow(); - return call; } diff --git a/src/CodeGen_LLVM.h b/src/CodeGen_LLVM.h index 092bc7713b5b..09f2c68b8d6e 100644 --- a/src/CodeGen_LLVM.h +++ b/src/CodeGen_LLVM.h @@ -446,7 +446,7 @@ class CodeGen_LLVM : public IRVisitor { llvm::Function *get_llvm_intrin(const Type &ret_type, const std::string &name, const std::vector &arg_types, bool scalars_are_vectors = false); llvm::Function *get_llvm_intrin(llvm::Type *ret_type, const std::string &name, const std::vector &arg_types); /** Declare an intrinsic function that participates in overload resolution. */ - void declare_intrin_overload(const std::string &name, const Type &ret_type, const std::string &impl_name, std::vector arg_types, bool scalars_are_vectors = false); + llvm::Function* declare_intrin_overload(const std::string &name, const Type &ret_type, const std::string &impl_name, std::vector arg_types, bool scalars_are_vectors = false); void declare_intrin_overload(const std::string &name, const Type &ret_type, llvm::Function *impl, std::vector arg_types); /** Call an overloaded intrinsic function. Returns nullptr if no suitable overload is found. */ llvm::Value *call_overloaded_intrin(const Type &result_type, const std::string &name, const std::vector &args); diff --git a/src/CodeGen_PTX_Dev.cpp b/src/CodeGen_PTX_Dev.cpp index 26822cda2296..37faf278355c 100644 --- a/src/CodeGen_PTX_Dev.cpp +++ b/src/CodeGen_PTX_Dev.cpp @@ -228,14 +228,29 @@ void CodeGen_PTX_Dev::init_module() { module = get_initial_module_for_ptx_device(target, context); - declare_intrin_overload("dp4a", Int(32), "dp4a_s32_s32", {Int(8, 4), Int(8, 4), Int(32)}); - declare_intrin_overload("dp4a", Int(32), "dp4a_s32_u32", {Int(8, 4), UInt(8, 4), Int(32)}); - declare_intrin_overload("dp4a", Int(32), "dp4a_u32_s32", {UInt(8, 4), Int(8, 4), Int(32)}); - declare_intrin_overload("dp4a", UInt(32), "dp4a_u32_u32", {UInt(8, 4), UInt(8, 4), UInt(32)}); - declare_intrin_overload("dp2a", Int(32), "dp2a_s32_s32", {Int(16, 4), Int(8, 4), Int(32)}); - declare_intrin_overload("dp2a", Int(32), "dp2a_s32_u32", {Int(16, 4), UInt(8, 4), Int(32)}); - declare_intrin_overload("dp2a", Int(32), "dp2a_u32_s32", {UInt(16, 4), Int(8, 4), Int(32)}); - declare_intrin_overload("dp2a", UInt(32), "dp2a_u32_u32", {UInt(16, 4), UInt(8, 4), UInt(32)}); + struct Intrinsic { + const char *intrin; + Type ret; + const char *name; + std::initializer_list args; + }; + + Intrinsic ptx_intrins[] = { + {"dp4a", Int(32), "dp4a_s32_s32", {Int(8, 4), Int(8, 4), Int(32)}}, + {"dp4a", Int(32), "dp4a_s32_u32", {Int(8, 4), UInt(8, 4), Int(32)}}, + {"dp4a", Int(32), "dp4a_u32_s32", {UInt(8, 4), Int(8, 4), Int(32)}}, + {"dp4a", UInt(32), "dp4a_u32_u32", {UInt(8, 4), UInt(8, 4), UInt(32)}}, + {"dp2a", Int(32), "dp2a_s32_s32", {Int(16, 4), Int(8, 4), Int(32)}}, + {"dp2a", Int(32), "dp2a_s32_u32", {Int(16, 4), UInt(8, 4), Int(32)}}, + {"dp2a", Int(32), "dp2a_u32_s32", {UInt(16, 4), Int(8, 4), Int(32)}}, + {"dp2a", UInt(32), "dp2a_u32_u32", {UInt(16, 4), UInt(8, 4), UInt(32)}}, + }; + + for (auto &&i : ptx_intrins) { + auto fn = declare_intrin_overload(i.intrin, std::move(i.ret), i.name, std::move(i.args)); + fn->addFnAttr(llvm::Attribute::AttrKind::ReadNone); + fn->addFnAttr(llvm::Attribute::AttrKind::NoUnwind); + } } void CodeGen_PTX_Dev::visit(const Call *op) { diff --git a/src/CodeGen_PowerPC.cpp b/src/CodeGen_PowerPC.cpp index 312649280f62..b2f782936627 100644 --- a/src/CodeGen_PowerPC.cpp +++ b/src/CodeGen_PowerPC.cpp @@ -113,7 +113,9 @@ void CodeGen_PowerPC::init_module() { arg_types.emplace_back(j); } - declare_intrin_overload(i.name, ret_type, i.intrin_name, std::move(arg_types)); + auto fn = declare_intrin_overload(i.name, ret_type, i.intrin_name, std::move(arg_types)); + fn->addFnAttr(llvm::Attribute::AttrKind::ReadNone); + fn->addFnAttr(llvm::Attribute::AttrKind::NoUnwind); } } diff --git a/src/CodeGen_WebAssembly.cpp b/src/CodeGen_WebAssembly.cpp index fe7ee2fe6db0..39a82cb545be 100644 --- a/src/CodeGen_WebAssembly.cpp +++ b/src/CodeGen_WebAssembly.cpp @@ -87,7 +87,9 @@ void CodeGen_WebAssembly::init_module() { arg_types.emplace_back(i); } - declare_intrin_overload(i.name, ret_type, i.intrin_name, std::move(arg_types)); + auto fn = declare_intrin_overload(i.name, ret_type, i.intrin_name, std::move(arg_types)); + fn->addFnAttr(llvm::Attribute::AttrKind::ReadNone); + fn->addFnAttr(llvm::Attribute::AttrKind::NoUnwind); } } diff --git a/src/CodeGen_X86.cpp b/src/CodeGen_X86.cpp index fbfc4881e70d..22ea1adb6e56 100644 --- a/src/CodeGen_X86.cpp +++ b/src/CodeGen_X86.cpp @@ -205,7 +205,9 @@ void CodeGen_X86::init_module() { arg_types.emplace_back(j); } - declare_intrin_overload(i.name, ret_type, i.intrin_name, std::move(arg_types)); + auto fn = declare_intrin_overload(i.name, ret_type, i.intrin_name, std::move(arg_types)); + fn->addFnAttr(llvm::Attribute::AttrKind::ReadNone); + fn->addFnAttr(llvm::Attribute::AttrKind::NoUnwind); } } From 23f995fd312dad3bf31c23b80d9f919350b51931 Mon Sep 17 00:00:00 2001 From: John Lawson Date: Tue, 9 Mar 2021 10:43:33 +0000 Subject: [PATCH 2/8] Make names same as elsewhere --- src/CodeGen_PTX_Dev.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/CodeGen_PTX_Dev.cpp b/src/CodeGen_PTX_Dev.cpp index 37faf278355c..4e1075d3b68a 100644 --- a/src/CodeGen_PTX_Dev.cpp +++ b/src/CodeGen_PTX_Dev.cpp @@ -229,10 +229,10 @@ void CodeGen_PTX_Dev::init_module() { module = get_initial_module_for_ptx_device(target, context); struct Intrinsic { - const char *intrin; - Type ret; const char *name; - std::initializer_list args; + Type ret_type; + const char *intrin_name; + vector arg_types; }; Intrinsic ptx_intrins[] = { @@ -247,7 +247,7 @@ void CodeGen_PTX_Dev::init_module() { }; for (auto &&i : ptx_intrins) { - auto fn = declare_intrin_overload(i.intrin, std::move(i.ret), i.name, std::move(i.args)); + auto fn = declare_intrin_overload(i.name, i.ret_type, i.intrin_name, std::move(i.arg_types)); fn->addFnAttr(llvm::Attribute::AttrKind::ReadNone); fn->addFnAttr(llvm::Attribute::AttrKind::NoUnwind); } From 840239ce10fbc44a25ac6868bfd76799961960e8 Mon Sep 17 00:00:00 2001 From: John Lawson Date: Tue, 9 Mar 2021 11:22:07 +0000 Subject: [PATCH 3/8] Remove unneeded enum name --- src/CodeGen_ARM.cpp | 4 ++-- src/CodeGen_PTX_Dev.cpp | 4 ++-- src/CodeGen_PowerPC.cpp | 6 ++++-- src/CodeGen_WebAssembly.cpp | 4 ++-- src/CodeGen_X86.cpp | 4 ++-- 5 files changed, 12 insertions(+), 10 deletions(-) diff --git a/src/CodeGen_ARM.cpp b/src/CodeGen_ARM.cpp index 2bbdc925088c..c79301161b93 100644 --- a/src/CodeGen_ARM.cpp +++ b/src/CodeGen_ARM.cpp @@ -671,8 +671,8 @@ void CodeGen_ARM::init_module() { intrin_impl = get_llvm_intrin(ret_type, mangled_name, arg_types, scalars_are_vectors); } - intrin_impl->addFnAttr(llvm::Attribute::AttrKind::ReadNone); - intrin_impl->addFnAttr(llvm::Attribute::AttrKind::NoUnwind); + intrin_impl->addFnAttr(llvm::Attribute::ReadNone); + intrin_impl->addFnAttr(llvm::Attribute::NoUnwind); declare_intrin_overload(intrin.name, ret_type, intrin_impl, arg_types); if (intrin.flags & ArmIntrinsic::AllowUnsignedOp1) { // Also generate a version of this intrinsic where the second operand is unsigned. diff --git a/src/CodeGen_PTX_Dev.cpp b/src/CodeGen_PTX_Dev.cpp index 4e1075d3b68a..02bd70d741a9 100644 --- a/src/CodeGen_PTX_Dev.cpp +++ b/src/CodeGen_PTX_Dev.cpp @@ -248,8 +248,8 @@ void CodeGen_PTX_Dev::init_module() { for (auto &&i : ptx_intrins) { auto fn = declare_intrin_overload(i.name, i.ret_type, i.intrin_name, std::move(i.arg_types)); - fn->addFnAttr(llvm::Attribute::AttrKind::ReadNone); - fn->addFnAttr(llvm::Attribute::AttrKind::NoUnwind); + fn->addFnAttr(llvm::Attribute::ReadNone); + fn->addFnAttr(llvm::Attribute::NoUnwind); } } diff --git a/src/CodeGen_PowerPC.cpp b/src/CodeGen_PowerPC.cpp index b2f782936627..44be32603151 100644 --- a/src/CodeGen_PowerPC.cpp +++ b/src/CodeGen_PowerPC.cpp @@ -1,5 +1,7 @@ #include "CodeGen_Posix.h" +#include "LLVM_Headers.h" + namespace Halide { namespace Internal { @@ -114,8 +116,8 @@ void CodeGen_PowerPC::init_module() { } auto fn = declare_intrin_overload(i.name, ret_type, i.intrin_name, std::move(arg_types)); - fn->addFnAttr(llvm::Attribute::AttrKind::ReadNone); - fn->addFnAttr(llvm::Attribute::AttrKind::NoUnwind); + fn->addFnAttr(llvm::Attribute::ReadNone); + fn->addFnAttr(llvm::Attribute::NoUnwind); } } diff --git a/src/CodeGen_WebAssembly.cpp b/src/CodeGen_WebAssembly.cpp index 39a82cb545be..3ee101a5f211 100644 --- a/src/CodeGen_WebAssembly.cpp +++ b/src/CodeGen_WebAssembly.cpp @@ -88,8 +88,8 @@ void CodeGen_WebAssembly::init_module() { } auto fn = declare_intrin_overload(i.name, ret_type, i.intrin_name, std::move(arg_types)); - fn->addFnAttr(llvm::Attribute::AttrKind::ReadNone); - fn->addFnAttr(llvm::Attribute::AttrKind::NoUnwind); + fn->addFnAttr(llvm::Attribute::ReadNone); + fn->addFnAttr(llvm::Attribute::NoUnwind); } } diff --git a/src/CodeGen_X86.cpp b/src/CodeGen_X86.cpp index 22ea1adb6e56..e2905e028401 100644 --- a/src/CodeGen_X86.cpp +++ b/src/CodeGen_X86.cpp @@ -206,8 +206,8 @@ void CodeGen_X86::init_module() { } auto fn = declare_intrin_overload(i.name, ret_type, i.intrin_name, std::move(arg_types)); - fn->addFnAttr(llvm::Attribute::AttrKind::ReadNone); - fn->addFnAttr(llvm::Attribute::AttrKind::NoUnwind); + fn->addFnAttr(llvm::Attribute::ReadNone); + fn->addFnAttr(llvm::Attribute::NoUnwind); } } From 7fdcf4bea09bbfbbe42a49d96a2db91437ac5b81 Mon Sep 17 00:00:00 2001 From: John Lawson Date: Tue, 9 Mar 2021 11:22:35 +0000 Subject: [PATCH 4/8] Set moved attributes in Hexagon backend --- src/CodeGen_Hexagon.cpp | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/src/CodeGen_Hexagon.cpp b/src/CodeGen_Hexagon.cpp index 1f033fdcc33c..91d13b13596d 100644 --- a/src/CodeGen_Hexagon.cpp +++ b/src/CodeGen_Hexagon.cpp @@ -89,7 +89,6 @@ class CodeGen_Hexagon : public CodeGen_Posix { * return null if the maybe option is true and the intrinsic is * not found. */ ///@{ - using CodeGen_LLVM::call_intrin; llvm::Value *call_intrin(Type t, const std::string &name, std::vector, bool maybe = false); llvm::Value *call_intrin(llvm::Type *t, const std::string &name, @@ -1791,8 +1790,10 @@ Value *CodeGen_Hexagon::call_intrin(Type result_type, const string &name, fn = fn2; } } - return call_intrin(result_type, get_vector_num_elements(fn->getReturnType()), - get_llvm_function_name(fn), std::move(args)); + fn->addFnAttr(llvm::Attribute::ReadNone); + fn->addFnAttr(llvm::Attribute::NoUnwind); + return CodeGen_Posix::call_intrin(result_type, get_vector_num_elements(fn->getReturnType()), + fn, std::move(args)); } Value *CodeGen_Hexagon::call_intrin(llvm::Type *result_type, const string &name, @@ -1812,8 +1813,10 @@ Value *CodeGen_Hexagon::call_intrin(llvm::Type *result_type, const string &name, fn = fn2; } } - return call_intrin(result_type, get_vector_num_elements(fn->getReturnType()), - get_llvm_function_name(fn), std::move(args)); + fn->addFnAttr(llvm::Attribute::ReadNone); + fn->addFnAttr(llvm::Attribute::NoUnwind); + return CodeGen_Posix::call_intrin(result_type, get_vector_num_elements(fn->getReturnType()), + fn, std::move(args)); } string CodeGen_Hexagon::mcpu() const { From d1e9a1c47ec495a2c00628ec1757b2af91b13e30 Mon Sep 17 00:00:00 2001 From: John Lawson Date: Mon, 8 Mar 2021 15:06:34 +0000 Subject: [PATCH 5/8] Use declare_intrin_overload for ARM vabdl --- src/CodeGen_ARM.cpp | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/src/CodeGen_ARM.cpp b/src/CodeGen_ARM.cpp index c79301161b93..b9d5746966eb 100644 --- a/src/CodeGen_ARM.cpp +++ b/src/CodeGen_ARM.cpp @@ -218,6 +218,7 @@ struct ArmIntrinsic { MangleRetArgs = 1 << 4, // Most intrinsics only mangle the return type. Some mangle the return type and arguments instead. ScalarsAreVectors = 1 << 5, // Some intrinsics have scalar arguments that are vector parameters :( SplitArg0 = 1 << 6, // This intrinsic requires splitting the argument into the low and high halves. + NoPrefix = 1 << 7, // Don't prefix the intrinsic with llvm.* }; }; @@ -546,6 +547,18 @@ const ArmIntrinsic intrinsic_defs[] = { {nullptr, "sdot.v4i32.v16i8", Int(32, 4), "dot_product", {Int(32, 4), Int(8, 16), Int(8, 16)}, ArmIntrinsic::NoMangle}, {nullptr, "udot.v4i32.v16i8", Int(32, 4), "dot_product", {Int(32, 4), UInt(8, 16), UInt(8, 16)}, ArmIntrinsic::NoMangle}, {nullptr, "udot.v4i32.v16i8", UInt(32, 4), "dot_product", {UInt(32, 4), UInt(8, 16), UInt(8, 16)}, ArmIntrinsic::NoMangle}, + + // ABDL - Widening absolute difference + // Need to be able to handle both signed and unsigned outputs for signed inputs. + {"vabdl_i8x8" , "vabdl_i8x8" , Int(16, 8), "vabdl", { Int( 8, 8), Int( 8, 8)}, ArmIntrinsic::NoMangle | ArmIntrinsic::NoPrefix}, + {"vabdl_i8x8" , "vabdl_i8x8" , UInt(16, 8), "vabdl", { Int( 8, 8), Int( 8, 8)}, ArmIntrinsic::NoMangle | ArmIntrinsic::NoPrefix}, + {"vabdl_u8x8" , "vabdl_u8x8" , UInt(16, 8), "vabdl", {UInt( 8, 8), UInt( 8, 8)}, ArmIntrinsic::NoMangle | ArmIntrinsic::NoPrefix}, + {"vabdl_i16x4", "vabdl_i16x4", Int(32, 4), "vabdl", { Int(16, 8), Int(16, 8)}, ArmIntrinsic::NoMangle | ArmIntrinsic::NoPrefix}, + {"vabdl_i16x4", "vabdl_i16x4", UInt(32, 4), "vabdl", { Int(16, 8), Int(16, 8)}, ArmIntrinsic::NoMangle | ArmIntrinsic::NoPrefix}, + {"vabdl_u16x4", "vabdl_u16x4", UInt(32, 4), "vabdl", {UInt(16, 8), UInt(16, 8)}, ArmIntrinsic::NoMangle | ArmIntrinsic::NoPrefix}, + {"vabdl_i32x2", "vabdl_i32x2", Int(64, 2), "vabdl", { Int(32, 8), Int(32, 8)}, ArmIntrinsic::NoMangle | ArmIntrinsic::NoPrefix}, + {"vabdl_i32x2", "vabdl_i32x2", UInt(64, 2), "vabdl", { Int(32, 8), Int(32, 8)}, ArmIntrinsic::NoMangle | ArmIntrinsic::NoPrefix}, + {"vabdl_u32x2", "vabdl_u32x2", UInt(64, 2), "vabdl", {UInt(32, 8), UInt(32, 8)}, ArmIntrinsic::NoMangle | ArmIntrinsic::NoPrefix}, }; // clang-format on @@ -606,7 +619,7 @@ void CodeGen_ARM::init_module() { continue; } string full_name = intrin_name; - if (!starts_with(full_name, "llvm.")) { + if (!starts_with(full_name, "llvm.") && (intrin.flags & ArmIntrinsic::NoPrefix) == 0) { full_name = prefix + full_name; } @@ -713,10 +726,7 @@ void CodeGen_ARM::visit(const Cast *op) { (op->value.type().is_int() || op->value.type().is_uint()) && t.bits() == op->value.type().bits() * 2) { if (const Call *absd = Call::as_intrinsic(op->value, {Call::absd})) { - ostringstream ss; - int intrin_lanes = 128 / t.bits(); - ss << "vabdl_" << (absd->args[0].type().is_int() ? "i" : "u") << t.bits() / 2 << "x" << intrin_lanes; - value = call_intrin(t, intrin_lanes, ss.str(), absd->args); + value = call_overloaded_intrin(t, "vabdl", absd->args); return; } } From 64f0f73c484b07e4dbacc202473c4117e35839dc Mon Sep 17 00:00:00 2001 From: John Lawson Date: Tue, 9 Mar 2021 12:32:27 +0000 Subject: [PATCH 6/8] Fix ARM vabdl intrinsic types --- src/CodeGen_ARM.cpp | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/CodeGen_ARM.cpp b/src/CodeGen_ARM.cpp index b9d5746966eb..6090d5f6787a 100644 --- a/src/CodeGen_ARM.cpp +++ b/src/CodeGen_ARM.cpp @@ -550,15 +550,15 @@ const ArmIntrinsic intrinsic_defs[] = { // ABDL - Widening absolute difference // Need to be able to handle both signed and unsigned outputs for signed inputs. - {"vabdl_i8x8" , "vabdl_i8x8" , Int(16, 8), "vabdl", { Int( 8, 8), Int( 8, 8)}, ArmIntrinsic::NoMangle | ArmIntrinsic::NoPrefix}, - {"vabdl_i8x8" , "vabdl_i8x8" , UInt(16, 8), "vabdl", { Int( 8, 8), Int( 8, 8)}, ArmIntrinsic::NoMangle | ArmIntrinsic::NoPrefix}, - {"vabdl_u8x8" , "vabdl_u8x8" , UInt(16, 8), "vabdl", {UInt( 8, 8), UInt( 8, 8)}, ArmIntrinsic::NoMangle | ArmIntrinsic::NoPrefix}, - {"vabdl_i16x4", "vabdl_i16x4", Int(32, 4), "vabdl", { Int(16, 8), Int(16, 8)}, ArmIntrinsic::NoMangle | ArmIntrinsic::NoPrefix}, - {"vabdl_i16x4", "vabdl_i16x4", UInt(32, 4), "vabdl", { Int(16, 8), Int(16, 8)}, ArmIntrinsic::NoMangle | ArmIntrinsic::NoPrefix}, - {"vabdl_u16x4", "vabdl_u16x4", UInt(32, 4), "vabdl", {UInt(16, 8), UInt(16, 8)}, ArmIntrinsic::NoMangle | ArmIntrinsic::NoPrefix}, - {"vabdl_i32x2", "vabdl_i32x2", Int(64, 2), "vabdl", { Int(32, 8), Int(32, 8)}, ArmIntrinsic::NoMangle | ArmIntrinsic::NoPrefix}, - {"vabdl_i32x2", "vabdl_i32x2", UInt(64, 2), "vabdl", { Int(32, 8), Int(32, 8)}, ArmIntrinsic::NoMangle | ArmIntrinsic::NoPrefix}, - {"vabdl_u32x2", "vabdl_u32x2", UInt(64, 2), "vabdl", {UInt(32, 8), UInt(32, 8)}, ArmIntrinsic::NoMangle | ArmIntrinsic::NoPrefix}, + {"vabdl_i8x8", "vabdl_i8x8", Int(16, 8), "vabdl", {Int(8, 8), Int(8, 8)}, ArmIntrinsic::NoMangle | ArmIntrinsic::NoPrefix}, + {"vabdl_i8x8", "vabdl_i8x8", UInt(16, 8), "vabdl", {Int(8, 8), Int(8, 8)}, ArmIntrinsic::NoMangle | ArmIntrinsic::NoPrefix}, + {"vabdl_u8x8", "vabdl_u8x8", UInt(16, 8), "vabdl", {UInt(8, 8), UInt(8, 8)}, ArmIntrinsic::NoMangle | ArmIntrinsic::NoPrefix}, + {"vabdl_i16x4", "vabdl_i16x4", Int(32, 4), "vabdl", {Int(16, 4), Int(16, 4)}, ArmIntrinsic::NoMangle | ArmIntrinsic::NoPrefix}, + {"vabdl_i16x4", "vabdl_i16x4", UInt(32, 4), "vabdl", {Int(16, 4), Int(16, 4)}, ArmIntrinsic::NoMangle | ArmIntrinsic::NoPrefix}, + {"vabdl_u16x4", "vabdl_u16x4", UInt(32, 4), "vabdl", {UInt(16, 4), UInt(16, 4)}, ArmIntrinsic::NoMangle | ArmIntrinsic::NoPrefix}, + {"vabdl_i32x2", "vabdl_i32x2", Int(64, 2), "vabdl", {Int(32, 2), Int(32, 2)}, ArmIntrinsic::NoMangle | ArmIntrinsic::NoPrefix}, + {"vabdl_i32x2", "vabdl_i32x2", UInt(64, 2), "vabdl", {Int(32, 2), Int(32, 2)}, ArmIntrinsic::NoMangle | ArmIntrinsic::NoPrefix}, + {"vabdl_u32x2", "vabdl_u32x2", UInt(64, 2), "vabdl", {UInt(32, 2), UInt(32, 2)}, ArmIntrinsic::NoMangle | ArmIntrinsic::NoPrefix}, }; // clang-format on From 89cfc7e720ac6e3e389910cc7c50f1387dbb1a5b Mon Sep 17 00:00:00 2001 From: John Lawson Date: Tue, 9 Mar 2021 13:08:31 +0000 Subject: [PATCH 7/8] Format and clang-tidy --- src/CodeGen_LLVM.cpp | 2 +- src/CodeGen_LLVM.h | 2 +- src/CodeGen_PTX_Dev.cpp | 2 +- src/CodeGen_PowerPC.cpp | 2 +- src/CodeGen_WebAssembly.cpp | 3 ++- src/CodeGen_X86.cpp | 2 +- 6 files changed, 7 insertions(+), 6 deletions(-) diff --git a/src/CodeGen_LLVM.cpp b/src/CodeGen_LLVM.cpp index c15166bd00d8..7da41a7211fa 100644 --- a/src/CodeGen_LLVM.cpp +++ b/src/CodeGen_LLVM.cpp @@ -4661,7 +4661,7 @@ llvm::Function *CodeGen_LLVM::get_llvm_intrin(const Type &ret_type, const std::s return get_llvm_intrin(llvm_ret_type, name, llvm_arg_types); } -llvm::Function* CodeGen_LLVM::declare_intrin_overload(const std::string &name, const Type &ret_type, const std::string &impl_name, std::vector arg_types, bool scalars_are_vectors) { +llvm::Function *CodeGen_LLVM::declare_intrin_overload(const std::string &name, const Type &ret_type, const std::string &impl_name, std::vector arg_types, bool scalars_are_vectors) { llvm::Function *intrin = get_llvm_intrin(ret_type, impl_name, arg_types, scalars_are_vectors); internal_assert(intrin); intrinsics[name].emplace_back(ret_type, std::move(arg_types), intrin); diff --git a/src/CodeGen_LLVM.h b/src/CodeGen_LLVM.h index 09f2c68b8d6e..54a9f393fd8e 100644 --- a/src/CodeGen_LLVM.h +++ b/src/CodeGen_LLVM.h @@ -446,7 +446,7 @@ class CodeGen_LLVM : public IRVisitor { llvm::Function *get_llvm_intrin(const Type &ret_type, const std::string &name, const std::vector &arg_types, bool scalars_are_vectors = false); llvm::Function *get_llvm_intrin(llvm::Type *ret_type, const std::string &name, const std::vector &arg_types); /** Declare an intrinsic function that participates in overload resolution. */ - llvm::Function* declare_intrin_overload(const std::string &name, const Type &ret_type, const std::string &impl_name, std::vector arg_types, bool scalars_are_vectors = false); + llvm::Function *declare_intrin_overload(const std::string &name, const Type &ret_type, const std::string &impl_name, std::vector arg_types, bool scalars_are_vectors = false); void declare_intrin_overload(const std::string &name, const Type &ret_type, llvm::Function *impl, std::vector arg_types); /** Call an overloaded intrinsic function. Returns nullptr if no suitable overload is found. */ llvm::Value *call_overloaded_intrin(const Type &result_type, const std::string &name, const std::vector &args); diff --git a/src/CodeGen_PTX_Dev.cpp b/src/CodeGen_PTX_Dev.cpp index 02bd70d741a9..3b354e51e342 100644 --- a/src/CodeGen_PTX_Dev.cpp +++ b/src/CodeGen_PTX_Dev.cpp @@ -247,7 +247,7 @@ void CodeGen_PTX_Dev::init_module() { }; for (auto &&i : ptx_intrins) { - auto fn = declare_intrin_overload(i.name, i.ret_type, i.intrin_name, std::move(i.arg_types)); + auto *fn = declare_intrin_overload(i.name, i.ret_type, i.intrin_name, std::move(i.arg_types)); fn->addFnAttr(llvm::Attribute::ReadNone); fn->addFnAttr(llvm::Attribute::NoUnwind); } diff --git a/src/CodeGen_PowerPC.cpp b/src/CodeGen_PowerPC.cpp index 44be32603151..42dec77fd75d 100644 --- a/src/CodeGen_PowerPC.cpp +++ b/src/CodeGen_PowerPC.cpp @@ -115,7 +115,7 @@ void CodeGen_PowerPC::init_module() { arg_types.emplace_back(j); } - auto fn = declare_intrin_overload(i.name, ret_type, i.intrin_name, std::move(arg_types)); + auto *fn = declare_intrin_overload(i.name, ret_type, i.intrin_name, std::move(arg_types)); fn->addFnAttr(llvm::Attribute::ReadNone); fn->addFnAttr(llvm::Attribute::NoUnwind); } diff --git a/src/CodeGen_WebAssembly.cpp b/src/CodeGen_WebAssembly.cpp index 3ee101a5f211..93183780ccf8 100644 --- a/src/CodeGen_WebAssembly.cpp +++ b/src/CodeGen_WebAssembly.cpp @@ -1,5 +1,6 @@ #include "CodeGen_Posix.h" +#include "LLVM_Headers.h" #include namespace Halide { @@ -87,7 +88,7 @@ void CodeGen_WebAssembly::init_module() { arg_types.emplace_back(i); } - auto fn = declare_intrin_overload(i.name, ret_type, i.intrin_name, std::move(arg_types)); + auto *fn = declare_intrin_overload(i.name, ret_type, i.intrin_name, std::move(arg_types)); fn->addFnAttr(llvm::Attribute::ReadNone); fn->addFnAttr(llvm::Attribute::NoUnwind); } diff --git a/src/CodeGen_X86.cpp b/src/CodeGen_X86.cpp index e2905e028401..2038dcce75c8 100644 --- a/src/CodeGen_X86.cpp +++ b/src/CodeGen_X86.cpp @@ -205,7 +205,7 @@ void CodeGen_X86::init_module() { arg_types.emplace_back(j); } - auto fn = declare_intrin_overload(i.name, ret_type, i.intrin_name, std::move(arg_types)); + auto *fn = declare_intrin_overload(i.name, ret_type, i.intrin_name, std::move(arg_types)); fn->addFnAttr(llvm::Attribute::ReadNone); fn->addFnAttr(llvm::Attribute::NoUnwind); } From 6d08157da89790a13821b51f2ef80a36c18a4d08 Mon Sep 17 00:00:00 2001 From: John Lawson Date: Wed, 10 Mar 2021 14:23:16 +0000 Subject: [PATCH 8/8] Rename intrinsic to widening_absd --- src/CodeGen_ARM.cpp | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/CodeGen_ARM.cpp b/src/CodeGen_ARM.cpp index 6090d5f6787a..bc25b2df2715 100644 --- a/src/CodeGen_ARM.cpp +++ b/src/CodeGen_ARM.cpp @@ -550,15 +550,15 @@ const ArmIntrinsic intrinsic_defs[] = { // ABDL - Widening absolute difference // Need to be able to handle both signed and unsigned outputs for signed inputs. - {"vabdl_i8x8", "vabdl_i8x8", Int(16, 8), "vabdl", {Int(8, 8), Int(8, 8)}, ArmIntrinsic::NoMangle | ArmIntrinsic::NoPrefix}, - {"vabdl_i8x8", "vabdl_i8x8", UInt(16, 8), "vabdl", {Int(8, 8), Int(8, 8)}, ArmIntrinsic::NoMangle | ArmIntrinsic::NoPrefix}, - {"vabdl_u8x8", "vabdl_u8x8", UInt(16, 8), "vabdl", {UInt(8, 8), UInt(8, 8)}, ArmIntrinsic::NoMangle | ArmIntrinsic::NoPrefix}, - {"vabdl_i16x4", "vabdl_i16x4", Int(32, 4), "vabdl", {Int(16, 4), Int(16, 4)}, ArmIntrinsic::NoMangle | ArmIntrinsic::NoPrefix}, - {"vabdl_i16x4", "vabdl_i16x4", UInt(32, 4), "vabdl", {Int(16, 4), Int(16, 4)}, ArmIntrinsic::NoMangle | ArmIntrinsic::NoPrefix}, - {"vabdl_u16x4", "vabdl_u16x4", UInt(32, 4), "vabdl", {UInt(16, 4), UInt(16, 4)}, ArmIntrinsic::NoMangle | ArmIntrinsic::NoPrefix}, - {"vabdl_i32x2", "vabdl_i32x2", Int(64, 2), "vabdl", {Int(32, 2), Int(32, 2)}, ArmIntrinsic::NoMangle | ArmIntrinsic::NoPrefix}, - {"vabdl_i32x2", "vabdl_i32x2", UInt(64, 2), "vabdl", {Int(32, 2), Int(32, 2)}, ArmIntrinsic::NoMangle | ArmIntrinsic::NoPrefix}, - {"vabdl_u32x2", "vabdl_u32x2", UInt(64, 2), "vabdl", {UInt(32, 2), UInt(32, 2)}, ArmIntrinsic::NoMangle | ArmIntrinsic::NoPrefix}, + {"vabdl_i8x8", "vabdl_i8x8", Int(16, 8), "widening_absd", {Int(8, 8), Int(8, 8)}, ArmIntrinsic::NoMangle | ArmIntrinsic::NoPrefix}, + {"vabdl_i8x8", "vabdl_i8x8", UInt(16, 8), "widening_absd", {Int(8, 8), Int(8, 8)}, ArmIntrinsic::NoMangle | ArmIntrinsic::NoPrefix}, + {"vabdl_u8x8", "vabdl_u8x8", UInt(16, 8), "widening_absd", {UInt(8, 8), UInt(8, 8)}, ArmIntrinsic::NoMangle | ArmIntrinsic::NoPrefix}, + {"vabdl_i16x4", "vabdl_i16x4", Int(32, 4), "widening_absd", {Int(16, 4), Int(16, 4)}, ArmIntrinsic::NoMangle | ArmIntrinsic::NoPrefix}, + {"vabdl_i16x4", "vabdl_i16x4", UInt(32, 4), "widening_absd", {Int(16, 4), Int(16, 4)}, ArmIntrinsic::NoMangle | ArmIntrinsic::NoPrefix}, + {"vabdl_u16x4", "vabdl_u16x4", UInt(32, 4), "widening_absd", {UInt(16, 4), UInt(16, 4)}, ArmIntrinsic::NoMangle | ArmIntrinsic::NoPrefix}, + {"vabdl_i32x2", "vabdl_i32x2", Int(64, 2), "widening_absd", {Int(32, 2), Int(32, 2)}, ArmIntrinsic::NoMangle | ArmIntrinsic::NoPrefix}, + {"vabdl_i32x2", "vabdl_i32x2", UInt(64, 2), "widening_absd", {Int(32, 2), Int(32, 2)}, ArmIntrinsic::NoMangle | ArmIntrinsic::NoPrefix}, + {"vabdl_u32x2", "vabdl_u32x2", UInt(64, 2), "widening_absd", {UInt(32, 2), UInt(32, 2)}, ArmIntrinsic::NoMangle | ArmIntrinsic::NoPrefix}, }; // clang-format on @@ -726,7 +726,7 @@ void CodeGen_ARM::visit(const Cast *op) { (op->value.type().is_int() || op->value.type().is_uint()) && t.bits() == op->value.type().bits() * 2) { if (const Call *absd = Call::as_intrinsic(op->value, {Call::absd})) { - value = call_overloaded_intrin(t, "vabdl", absd->args); + value = call_overloaded_intrin(t, "widening_absd", absd->args); return; } }