Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 17 additions & 5 deletions src/CodeGen_ARM.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -218,6 +218,7 @@ struct ArmIntrinsic {
MangleRetArgs = 1 << 4, // Most intrinsics only mangle the return type. Some mangle the return type and arguments instead.
ScalarsAreVectors = 1 << 5, // Some intrinsics have scalar arguments that are vector parameters :(
SplitArg0 = 1 << 6, // This intrinsic requires splitting the argument into the low and high halves.
NoPrefix = 1 << 7, // Don't prefix the intrinsic with llvm.*
};
};

Expand Down Expand Up @@ -546,6 +547,18 @@ const ArmIntrinsic intrinsic_defs[] = {
{nullptr, "sdot.v4i32.v16i8", Int(32, 4), "dot_product", {Int(32, 4), Int(8, 16), Int(8, 16)}, ArmIntrinsic::NoMangle},
{nullptr, "udot.v4i32.v16i8", Int(32, 4), "dot_product", {Int(32, 4), UInt(8, 16), UInt(8, 16)}, ArmIntrinsic::NoMangle},
{nullptr, "udot.v4i32.v16i8", UInt(32, 4), "dot_product", {UInt(32, 4), UInt(8, 16), UInt(8, 16)}, ArmIntrinsic::NoMangle},

// ABDL - Widening absolute difference
// Need to be able to handle both signed and unsigned outputs for signed inputs.
{"vabdl_i8x8", "vabdl_i8x8", Int(16, 8), "widening_absd", {Int(8, 8), Int(8, 8)}, ArmIntrinsic::NoMangle | ArmIntrinsic::NoPrefix},
{"vabdl_i8x8", "vabdl_i8x8", UInt(16, 8), "widening_absd", {Int(8, 8), Int(8, 8)}, ArmIntrinsic::NoMangle | ArmIntrinsic::NoPrefix},
{"vabdl_u8x8", "vabdl_u8x8", UInt(16, 8), "widening_absd", {UInt(8, 8), UInt(8, 8)}, ArmIntrinsic::NoMangle | ArmIntrinsic::NoPrefix},
{"vabdl_i16x4", "vabdl_i16x4", Int(32, 4), "widening_absd", {Int(16, 4), Int(16, 4)}, ArmIntrinsic::NoMangle | ArmIntrinsic::NoPrefix},
{"vabdl_i16x4", "vabdl_i16x4", UInt(32, 4), "widening_absd", {Int(16, 4), Int(16, 4)}, ArmIntrinsic::NoMangle | ArmIntrinsic::NoPrefix},
{"vabdl_u16x4", "vabdl_u16x4", UInt(32, 4), "widening_absd", {UInt(16, 4), UInt(16, 4)}, ArmIntrinsic::NoMangle | ArmIntrinsic::NoPrefix},
{"vabdl_i32x2", "vabdl_i32x2", Int(64, 2), "widening_absd", {Int(32, 2), Int(32, 2)}, ArmIntrinsic::NoMangle | ArmIntrinsic::NoPrefix},
{"vabdl_i32x2", "vabdl_i32x2", UInt(64, 2), "widening_absd", {Int(32, 2), Int(32, 2)}, ArmIntrinsic::NoMangle | ArmIntrinsic::NoPrefix},
{"vabdl_u32x2", "vabdl_u32x2", UInt(64, 2), "widening_absd", {UInt(32, 2), UInt(32, 2)}, ArmIntrinsic::NoMangle | ArmIntrinsic::NoPrefix},
};
// clang-format on

Expand Down Expand Up @@ -606,7 +619,7 @@ void CodeGen_ARM::init_module() {
continue;
}
string full_name = intrin_name;
if (!starts_with(full_name, "llvm.")) {
if (!starts_with(full_name, "llvm.") && (intrin.flags & ArmIntrinsic::NoPrefix) == 0) {
full_name = prefix + full_name;
}

Expand Down Expand Up @@ -671,6 +684,8 @@ void CodeGen_ARM::init_module() {
intrin_impl = get_llvm_intrin(ret_type, mangled_name, arg_types, scalars_are_vectors);
}

intrin_impl->addFnAttr(llvm::Attribute::ReadNone);
intrin_impl->addFnAttr(llvm::Attribute::NoUnwind);
declare_intrin_overload(intrin.name, ret_type, intrin_impl, arg_types);
if (intrin.flags & ArmIntrinsic::AllowUnsignedOp1) {
// Also generate a version of this intrinsic where the second operand is unsigned.
Expand Down Expand Up @@ -711,10 +726,7 @@ void CodeGen_ARM::visit(const Cast *op) {
(op->value.type().is_int() || op->value.type().is_uint()) &&
t.bits() == op->value.type().bits() * 2) {
if (const Call *absd = Call::as_intrinsic(op->value, {Call::absd})) {
ostringstream ss;
int intrin_lanes = 128 / t.bits();
ss << "vabdl_" << (absd->args[0].type().is_int() ? "i" : "u") << t.bits() / 2 << "x" << intrin_lanes;
value = call_intrin(t, intrin_lanes, ss.str(), absd->args);
value = call_overloaded_intrin(t, "widening_absd", absd->args);
return;
}
}
Expand Down
13 changes: 8 additions & 5 deletions src/CodeGen_Hexagon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,6 @@ class CodeGen_Hexagon : public CodeGen_Posix {
* return null if the maybe option is true and the intrinsic is
* not found. */
///@{
using CodeGen_LLVM::call_intrin;
llvm::Value *call_intrin(Type t, const std::string &name,
std::vector<Expr>, bool maybe = false);
llvm::Value *call_intrin(llvm::Type *t, const std::string &name,
Expand Down Expand Up @@ -1791,8 +1790,10 @@ Value *CodeGen_Hexagon::call_intrin(Type result_type, const string &name,
fn = fn2;
}
}
return call_intrin(result_type, get_vector_num_elements(fn->getReturnType()),
get_llvm_function_name(fn), std::move(args));
fn->addFnAttr(llvm::Attribute::ReadNone);
fn->addFnAttr(llvm::Attribute::NoUnwind);
return CodeGen_Posix::call_intrin(result_type, get_vector_num_elements(fn->getReturnType()),
fn, std::move(args));
}

Value *CodeGen_Hexagon::call_intrin(llvm::Type *result_type, const string &name,
Expand All @@ -1812,8 +1813,10 @@ Value *CodeGen_Hexagon::call_intrin(llvm::Type *result_type, const string &name,
fn = fn2;
}
}
return call_intrin(result_type, get_vector_num_elements(fn->getReturnType()),
get_llvm_function_name(fn), std::move(args));
fn->addFnAttr(llvm::Attribute::ReadNone);
fn->addFnAttr(llvm::Attribute::NoUnwind);
return CodeGen_Posix::call_intrin(result_type, get_vector_num_elements(fn->getReturnType()),
fn, std::move(args));
}

string CodeGen_Hexagon::mcpu() const {
Expand Down
7 changes: 2 additions & 5 deletions src/CodeGen_LLVM.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4661,10 +4661,11 @@ llvm::Function *CodeGen_LLVM::get_llvm_intrin(const Type &ret_type, const std::s
return get_llvm_intrin(llvm_ret_type, name, llvm_arg_types);
}

void CodeGen_LLVM::declare_intrin_overload(const std::string &name, const Type &ret_type, const std::string &impl_name, std::vector<Type> arg_types, bool scalars_are_vectors) {
llvm::Function *CodeGen_LLVM::declare_intrin_overload(const std::string &name, const Type &ret_type, const std::string &impl_name, std::vector<Type> arg_types, bool scalars_are_vectors) {
llvm::Function *intrin = get_llvm_intrin(ret_type, impl_name, arg_types, scalars_are_vectors);
internal_assert(intrin);
intrinsics[name].emplace_back(ret_type, std::move(arg_types), intrin);
return intrin;
}

void CodeGen_LLVM::declare_intrin_overload(const std::string &name, const Type &ret_type, llvm::Function *impl, std::vector<Type> arg_types) {
Expand Down Expand Up @@ -4893,10 +4894,6 @@ Value *CodeGen_LLVM::call_intrin(llvm::Type *result_type, int intrin_lanes,
}

CallInst *call = builder->CreateCall(intrin, arg_values);

call->setDoesNotAccessMemory();
call->setDoesNotThrow();

return call;
}

Expand Down
2 changes: 1 addition & 1 deletion src/CodeGen_LLVM.h
Original file line number Diff line number Diff line change
Expand Up @@ -446,7 +446,7 @@ class CodeGen_LLVM : public IRVisitor {
llvm::Function *get_llvm_intrin(const Type &ret_type, const std::string &name, const std::vector<Type> &arg_types, bool scalars_are_vectors = false);
llvm::Function *get_llvm_intrin(llvm::Type *ret_type, const std::string &name, const std::vector<llvm::Type *> &arg_types);
/** Declare an intrinsic function that participates in overload resolution. */
void declare_intrin_overload(const std::string &name, const Type &ret_type, const std::string &impl_name, std::vector<Type> arg_types, bool scalars_are_vectors = false);
llvm::Function *declare_intrin_overload(const std::string &name, const Type &ret_type, const std::string &impl_name, std::vector<Type> arg_types, bool scalars_are_vectors = false);
void declare_intrin_overload(const std::string &name, const Type &ret_type, llvm::Function *impl, std::vector<Type> arg_types);
/** Call an overloaded intrinsic function. Returns nullptr if no suitable overload is found. */
llvm::Value *call_overloaded_intrin(const Type &result_type, const std::string &name, const std::vector<Expr> &args);
Expand Down
31 changes: 23 additions & 8 deletions src/CodeGen_PTX_Dev.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -228,14 +228,29 @@ void CodeGen_PTX_Dev::init_module() {

module = get_initial_module_for_ptx_device(target, context);

declare_intrin_overload("dp4a", Int(32), "dp4a_s32_s32", {Int(8, 4), Int(8, 4), Int(32)});
declare_intrin_overload("dp4a", Int(32), "dp4a_s32_u32", {Int(8, 4), UInt(8, 4), Int(32)});
declare_intrin_overload("dp4a", Int(32), "dp4a_u32_s32", {UInt(8, 4), Int(8, 4), Int(32)});
declare_intrin_overload("dp4a", UInt(32), "dp4a_u32_u32", {UInt(8, 4), UInt(8, 4), UInt(32)});
declare_intrin_overload("dp2a", Int(32), "dp2a_s32_s32", {Int(16, 4), Int(8, 4), Int(32)});
declare_intrin_overload("dp2a", Int(32), "dp2a_s32_u32", {Int(16, 4), UInt(8, 4), Int(32)});
declare_intrin_overload("dp2a", Int(32), "dp2a_u32_s32", {UInt(16, 4), Int(8, 4), Int(32)});
declare_intrin_overload("dp2a", UInt(32), "dp2a_u32_u32", {UInt(16, 4), UInt(8, 4), UInt(32)});
struct Intrinsic {
const char *name;
Type ret_type;
const char *intrin_name;
vector<Type> arg_types;
};

Intrinsic ptx_intrins[] = {
{"dp4a", Int(32), "dp4a_s32_s32", {Int(8, 4), Int(8, 4), Int(32)}},
{"dp4a", Int(32), "dp4a_s32_u32", {Int(8, 4), UInt(8, 4), Int(32)}},
{"dp4a", Int(32), "dp4a_u32_s32", {UInt(8, 4), Int(8, 4), Int(32)}},
{"dp4a", UInt(32), "dp4a_u32_u32", {UInt(8, 4), UInt(8, 4), UInt(32)}},
{"dp2a", Int(32), "dp2a_s32_s32", {Int(16, 4), Int(8, 4), Int(32)}},
{"dp2a", Int(32), "dp2a_s32_u32", {Int(16, 4), UInt(8, 4), Int(32)}},
{"dp2a", Int(32), "dp2a_u32_s32", {UInt(16, 4), Int(8, 4), Int(32)}},
{"dp2a", UInt(32), "dp2a_u32_u32", {UInt(16, 4), UInt(8, 4), UInt(32)}},
};

for (auto &&i : ptx_intrins) {
auto *fn = declare_intrin_overload(i.name, i.ret_type, i.intrin_name, std::move(i.arg_types));
fn->addFnAttr(llvm::Attribute::ReadNone);
fn->addFnAttr(llvm::Attribute::NoUnwind);
}
}

void CodeGen_PTX_Dev::visit(const Call *op) {
Expand Down
6 changes: 5 additions & 1 deletion src/CodeGen_PowerPC.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
#include "CodeGen_Posix.h"

#include "LLVM_Headers.h"

namespace Halide {
namespace Internal {

Expand Down Expand Up @@ -113,7 +115,9 @@ void CodeGen_PowerPC::init_module() {
arg_types.emplace_back(j);
}

declare_intrin_overload(i.name, ret_type, i.intrin_name, std::move(arg_types));
auto *fn = declare_intrin_overload(i.name, ret_type, i.intrin_name, std::move(arg_types));
fn->addFnAttr(llvm::Attribute::ReadNone);
fn->addFnAttr(llvm::Attribute::NoUnwind);
}
}

Expand Down
5 changes: 4 additions & 1 deletion src/CodeGen_WebAssembly.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#include "CodeGen_Posix.h"

#include "LLVM_Headers.h"
#include <sstream>

namespace Halide {
Expand Down Expand Up @@ -87,7 +88,9 @@ void CodeGen_WebAssembly::init_module() {
arg_types.emplace_back(i);
}

declare_intrin_overload(i.name, ret_type, i.intrin_name, std::move(arg_types));
auto *fn = declare_intrin_overload(i.name, ret_type, i.intrin_name, std::move(arg_types));
fn->addFnAttr(llvm::Attribute::ReadNone);
fn->addFnAttr(llvm::Attribute::NoUnwind);
}
}

Expand Down
4 changes: 3 additions & 1 deletion src/CodeGen_X86.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -205,7 +205,9 @@ void CodeGen_X86::init_module() {
arg_types.emplace_back(j);
}

declare_intrin_overload(i.name, ret_type, i.intrin_name, std::move(arg_types));
auto *fn = declare_intrin_overload(i.name, ret_type, i.intrin_name, std::move(arg_types));
fn->addFnAttr(llvm::Attribute::ReadNone);
fn->addFnAttr(llvm::Attribute::NoUnwind);
}
}

Expand Down