Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
141 changes: 121 additions & 20 deletions src/CodeGen_Hexagon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
#include "AlignLoads.h"
#include "CSE.h"
#include "CodeGen_Internal.h"
#include "CodeGen_Posix.h"
#include "Debug.h"
#include "HexagonOptimize.h"
#include "IREquality.h"
Expand All @@ -32,11 +33,111 @@ using std::vector;

using namespace llvm;

#ifdef WITH_HEXAGON

namespace {

/** A code generator that emits Hexagon code from a given Halide stmt. */
class CodeGen_Hexagon : public CodeGen_Posix {
public:
/** Create a Hexagon code generator for the given Hexagon target. */
CodeGen_Hexagon(Target);

protected:
void compile_func(const LoweredFunc &f,
const std::string &simple_name, const std::string &extern_name) override;

void init_module() override;

std::string mcpu() const override;
std::string mattrs() const override;
int isa_version;
bool use_soft_float_abi() const override;
int native_vector_bits() const override;

llvm::Function *define_hvx_intrinsic(llvm::Function *intrin, Type ret_ty,
const std::string &name,
std::vector<Type> arg_types,
int flags);

int is_hvx_v65_or_later() const {
return (isa_version >= 65);
}

using CodeGen_Posix::visit;

/** Nodes for which we want to emit specific hexagon intrinsics */
///@{
void visit(const Max *) override;
void visit(const Min *) override;
void visit(const Call *) override;
void visit(const Mul *) override;
void visit(const Select *) override;
void visit(const Allocate *) override;
///@}

/** We ask for an extra vector on each allocation to enable fast
* clamped ramp loads. */
int allocation_padding(Type type) const override {
return CodeGen_Posix::allocation_padding(type) + native_vector_bits() / 8;
}

/** Call an LLVM intrinsic, potentially casting the operands to
* match the type of the function. */
///@{
llvm::Value *call_intrin_cast(llvm::Type *ret_ty, llvm::Function *F,
std::vector<llvm::Value *> Ops);
llvm::Value *call_intrin_cast(llvm::Type *ret_ty, int id,
std::vector<llvm::Value *> Ops);
///@}

/** Define overloads of CodeGen_LLVM::call_intrin that determine
* the intrin_lanes from the type, and allows the function to
* return null if the maybe option is true and the intrinsic is
* not found. */
///@{
using CodeGen_LLVM::call_intrin;
llvm::Value *call_intrin(Type t, const std::string &name,
std::vector<Expr>, bool maybe = false);
llvm::Value *call_intrin(llvm::Type *t, const std::string &name,
std::vector<llvm::Value *>, bool maybe = false);
///@}

/** Override CodeGen_LLVM to use hexagon intrinics when possible. */
///@{
llvm::Value *interleave_vectors(const std::vector<llvm::Value *> &v) override;
llvm::Value *shuffle_vectors(llvm::Value *a, llvm::Value *b,
const std::vector<int> &indices) override;
using CodeGen_Posix::shuffle_vectors;
///@}

/** Generate a LUT lookup using vlut instructions. */
///@{
llvm::Value *vlut(llvm::Value *lut, llvm::Value *indices, int min_index = 0, int max_index = 1 << 30);
llvm::Value *vlut(llvm::Value *lut, const std::vector<int> &indices);
///@}

llvm::Value *vdelta(llvm::Value *lut, const std::vector<int> &indices);

/** Because HVX intrinsics operate on vectors of i32, using them
* requires a lot of extraneous bitcasts, which make it difficult
* to manipulate the IR. This function avoids generating redundant
* bitcasts. */
llvm::Value *create_bitcast(llvm::Value *v, llvm::Type *ty);

private:
/** Generates code for computing the size of an allocation from a
* list of its extents and its size. Fires a runtime assert
* (halide_error) if the size overflows 2^31 -1, the maximum
* positive number an int32_t can hold. */
llvm::Value *codegen_cache_allocation_size(const std::string &name, Type type, const std::vector<Expr> &extents);

/** Generate a LUT (8/16 bit, max_index < 256) lookup using vlut instructions. */
llvm::Value *vlut256(llvm::Value *lut, llvm::Value *indices, int min_index = 0, int max_index = 255);
};

CodeGen_Hexagon::CodeGen_Hexagon(Target t)
: CodeGen_Posix(t) {
#if !defined(WITH_HEXAGON)
user_error << "hexagon not enabled for this build of Halide.\n";
#endif
user_assert(llvm_Hexagon_enabled)
<< "llvm build not configured with Hexagon target enabled.\n";
if (target.has_feature(Halide::Target::HVX_v66)) {
Expand All @@ -50,8 +151,6 @@ CodeGen_Hexagon::CodeGen_Hexagon(Target t)
<< "Creating a Codegen target for Hexagon without the hvx target feature.\n";
}

namespace {

Stmt call_halide_qurt_hvx_lock(const Target &target) {
Expr hvx_lock =
Call::make(Int(32), "halide_qurt_hvx_lock", {}, Call::Extern);
Expand Down Expand Up @@ -420,8 +519,6 @@ Stmt inject_hvx_lock_unlock(Stmt body, const Target &target) {
return body;
}

} // namespace

void CodeGen_Hexagon::compile_func(const LoweredFunc &f,
const string &simple_name,
const string &extern_name) {
Expand Down Expand Up @@ -489,8 +586,6 @@ void CodeGen_Hexagon::compile_func(const LoweredFunc &f,
CodeGen_Posix::end_func(f.args);
}

namespace {

struct HvxIntrinsic {
enum {
BroadcastScalarsToWords = 1 << 0, // Some intrinsics need scalar arguments
Expand Down Expand Up @@ -788,8 +883,6 @@ const HvxIntrinsic intrinsic_wrappers[] = {
// need to be implemented in the runtime module, or via
// fall-through to CodeGen_LLVM.

} // namespace

void CodeGen_Hexagon::init_module() {
CodeGen_Posix::init_module();

Expand Down Expand Up @@ -1022,8 +1115,6 @@ Value *CodeGen_Hexagon::interleave_vectors(const vector<llvm::Value *> &v) {
return CodeGen_Posix::interleave_vectors(v);
}

namespace {

// Check if indices form a strided ramp, allowing undef elements to
// pretend to be part of the ramp.
bool is_strided_ramp(const vector<int> &indices, int &start, int &stride) {
Expand Down Expand Up @@ -1086,8 +1177,6 @@ bool is_concat_or_slice(const vector<int> &indices) {
return true;
}

} // namespace

Value *CodeGen_Hexagon::shuffle_vectors(Value *a, Value *b,
const vector<int> &indices) {
llvm::Type *a_ty = a->getType();
Expand Down Expand Up @@ -1559,7 +1648,7 @@ Value *CodeGen_Hexagon::vdelta(Value *lut, const vector<int> &indices) {
return vlut(lut, indices);
}

static Value *create_vector(llvm::Type *ty, int val) {
Value *create_vector(llvm::Type *ty, int val) {
llvm::Type *scalar_ty = ty->getScalarType();
Constant *value = ConstantInt::get(scalar_ty, val);
return ConstantVector::getSplat(element_count(get_vector_num_elements(ty)), value);
Expand Down Expand Up @@ -1755,8 +1844,6 @@ int CodeGen_Hexagon::native_vector_bits() const {
return 128 * 8;
}

namespace {

Expr maybe_scalar(Expr x) {
const Broadcast *xb = x.as<Broadcast>();
if (xb) {
Expand All @@ -1766,8 +1853,6 @@ Expr maybe_scalar(Expr x) {
}
}

} // namespace

void CodeGen_Hexagon::visit(const Mul *op) {
if (op->type.is_vector()) {
value =
Expand Down Expand Up @@ -2225,5 +2310,21 @@ void CodeGen_Hexagon::visit(const Allocate *alloc) {
}
}

} // namespace

CodeGen_Posix *new_CodeGen_Hexagon(const Target &target, llvm::LLVMContext &context) {
CodeGen_Hexagon *ret = new CodeGen_Hexagon(target);
ret->set_context(context);
return ret;
}

#else // WITH_HEXAGON

CodeGen_Posix *new_CodeGen_Hexagon(const Target &target, llvm::LLVMContext &context) {
user_error << "hexagon not enabled for this build of Halide.\n";
}

#endif // WITH_HEXAGON

} // namespace Internal
} // namespace Halide
104 changes: 8 additions & 96 deletions src/CodeGen_Hexagon.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,109 +5,21 @@
* Defines the code-generator for producing Hexagon machine code
*/

#include "CodeGen_Posix.h"
namespace llvm {

namespace Halide {
namespace Internal {

/** A code generator that emits Hexagon code from a given Halide stmt. */
class CodeGen_Hexagon : public CodeGen_Posix {
public:
/** Create a Hexagon code generator for the given Hexagon target. */
CodeGen_Hexagon(Target);

protected:
void compile_func(const LoweredFunc &f,
const std::string &simple_name, const std::string &extern_name) override;

void init_module() override;

std::string mcpu() const override;
std::string mattrs() const override;
int isa_version;
bool use_soft_float_abi() const override;
int native_vector_bits() const override;

llvm::Function *define_hvx_intrinsic(llvm::Function *intrin, Type ret_ty,
const std::string &name,
std::vector<Type> arg_types,
int flags);

int is_hvx_v65_or_later() const {
return (isa_version >= 65);
}
class LLVMContext;

using CodeGen_Posix::visit;
}

/** Nodes for which we want to emit specific hexagon intrinsics */
///@{
void visit(const Max *) override;
void visit(const Min *) override;
void visit(const Call *) override;
void visit(const Mul *) override;
void visit(const Select *) override;
void visit(const Allocate *) override;
///@}

/** We ask for an extra vector on each allocation to enable fast
* clamped ramp loads. */
int allocation_padding(Type type) const override {
return CodeGen_Posix::allocation_padding(type) + native_vector_bits() / 8;
}

/** Call an LLVM intrinsic, potentially casting the operands to
* match the type of the function. */
///@{
llvm::Value *call_intrin_cast(llvm::Type *ret_ty, llvm::Function *F,
std::vector<llvm::Value *> Ops);
llvm::Value *call_intrin_cast(llvm::Type *ret_ty, int id,
std::vector<llvm::Value *> Ops);
///@}

/** Define overloads of CodeGen_LLVM::call_intrin that determine
* the intrin_lanes from the type, and allows the function to
* return null if the maybe option is true and the intrinsic is
* not found. */
///@{
using CodeGen_LLVM::call_intrin;
llvm::Value *call_intrin(Type t, const std::string &name,
std::vector<Expr>, bool maybe = false);
llvm::Value *call_intrin(llvm::Type *t, const std::string &name,
std::vector<llvm::Value *>, bool maybe = false);
///@}

/** Override CodeGen_LLVM to use hexagon intrinics when possible. */
///@{
llvm::Value *interleave_vectors(const std::vector<llvm::Value *> &v) override;
llvm::Value *shuffle_vectors(llvm::Value *a, llvm::Value *b,
const std::vector<int> &indices) override;
using CodeGen_Posix::shuffle_vectors;
///@}

/** Generate a LUT lookup using vlut instructions. */
///@{
llvm::Value *vlut(llvm::Value *lut, llvm::Value *indices, int min_index = 0, int max_index = 1 << 30);
llvm::Value *vlut(llvm::Value *lut, const std::vector<int> &indices);
///@}
namespace Halide {

llvm::Value *vdelta(llvm::Value *lut, const std::vector<int> &indices);
struct Target;

/** Because HVX intrinsics operate on vectors of i32, using them
* requires a lot of extraneous bitcasts, which make it difficult
* to manipulate the IR. This function avoids generating redundant
* bitcasts. */
llvm::Value *create_bitcast(llvm::Value *v, llvm::Type *ty);
namespace Internal {

private:
/** Generates code for computing the size of an allocation from a
* list of its extents and its size. Fires a runtime assert
* (halide_error) if the size overflows 2^31 -1, the maximum
* positive number an int32_t can hold. */
llvm::Value *codegen_cache_allocation_size(const std::string &name, Type type, const std::vector<Expr> &extents);
class CodeGen_Posix;

/** Generate a LUT (8/16 bit, max_index < 256) lookup using vlut instructions. */
llvm::Value *vlut256(llvm::Value *lut, llvm::Value *indices, int min_index = 0, int max_index = 255);
};
CodeGen_Posix *new_CodeGen_Hexagon(const Target &target, llvm::LLVMContext &context);

} // namespace Internal
} // namespace Halide
Expand Down
2 changes: 1 addition & 1 deletion src/CodeGen_LLVM.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -373,7 +373,7 @@ CodeGen_LLVM *CodeGen_LLVM::new_for_target(const Target &target,
} else if (target.arch == Target::POWERPC) {
return make_codegen<CodeGen_PowerPC>(target, context);
} else if (target.arch == Target::Hexagon) {
return make_codegen<CodeGen_Hexagon>(target, context);
return new_CodeGen_Hexagon(target, context);
} else if (target.arch == Target::WebAssembly) {
return make_codegen<CodeGen_WebAssembly>(target, context);
} else if (target.arch == Target::RISCV) {
Expand Down