Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
7d70656
Remove unused vertex buffer parameters.
dsharletg Feb 26, 2021
041b9fe
Offload GPU code in a lowering pass instead of via CodeGen_GPU_Host. …
dsharletg Feb 26, 2021
c87f37e
clang-format.
dsharletg Feb 27, 2021
797e43e
clang-format sorting is case sensitive!?
dsharletg Feb 27, 2021
da6effb
clang-tidy
dsharletg Feb 27, 2021
ebbb9a5
Move codegen backends into anonymous namespaces in source files.
dsharletg Feb 27, 2021
2ab52cb
clang-format
dsharletg Feb 27, 2021
9e574ea
Merge branch 'master' of github.com:halide/Halide into dsharletg/hide…
dsharletg Feb 27, 2021
38a6c43
Merge branch 'master' of github.com:halide/Halide into dsharletg/offl…
dsharletg Feb 27, 2021
7c0c5dd
Pass type arguments correctly.
dsharletg Feb 27, 2021
9024533
Update OffloadGPULoops.cpp
steven-johnson Feb 27, 2021
25107fb
trigger buildbots
steven-johnson Feb 28, 2021
571fda3
trigger buildbots
steven-johnson Feb 28, 2021
a84435f
Merge branch 'master' into dsharletg/hide-codegen2
steven-johnson Feb 28, 2021
8282586
Merge branch 'master' into dsharletg/offload-gpu
steven-johnson Feb 28, 2021
bf62dc4
Hack around tests that rely on the IR for offloaded GPU loops.
dsharletg Feb 28, 2021
1f739a4
Merge branch 'dsharletg/offload-gpu' of github.com:halide/Halide into…
dsharletg Feb 28, 2021
5ee9236
Fix missing include.
dsharletg Feb 28, 2021
dc7e61d
Remove unused include.
dsharletg Feb 28, 2021
4a59095
clang-tidy
dsharletg Feb 28, 2021
7fb6cb2
Merge branch 'dsharletg/offload-gpu' of github.com:halide/Halide into…
dsharletg Feb 28, 2021
db88411
Use custom lowering pass to see code before GPU offloading
dsharletg Mar 1, 2021
18b459b
Speculative fix for segfault
dsharletg Mar 1, 2021
10bb8d8
Merge branch 'dsharletg/offload-gpu' into dsharletg/hide-codegen2
dsharletg Mar 1, 2021
04d4a1f
Fix const correctness
dsharletg Mar 1, 2021
7abf29d
Fix error on unused variables in generated code.
dsharletg Mar 1, 2021
895b355
Merge branch 'dsharletg/offload-gpu' into dsharletg/hide-codegen2
dsharletg Mar 1, 2021
ebf7cad
Merge branch 'master' of github.com:halide/Halide into dsharletg/hide…
dsharletg Mar 2, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 2 additions & 7 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -591,23 +591,18 @@ HEADER_FILES = \
Buffer.h \
CanonicalizeGPUVars.h \
Closure.h \
CodeGen_ARM.h \
CodeGen_C.h \
CodeGen_D3D12Compute_Dev.h \
CodeGen_GPU_Dev.h \
CodeGen_Internal.h \
CodeGen_LLVM.h \
CodeGen_Metal_Dev.h \
CodeGen_MIPS.h \
CodeGen_OpenCL_Dev.h \
CodeGen_OpenGLCompute_Dev.h \
CodeGen_Posix.h \
CodeGen_PowerPC.h \
CodeGen_PTX_Dev.h \
CodeGen_PyTorch.h \
CodeGen_RISCV.h \
CodeGen_WebAssembly.h \
CodeGen_X86.h \
CodeGen_Targets.h \
CompilerLogger.h \
ConciseCasts.h \
CPlusPlusMangle.h \
Expand Down Expand Up @@ -1091,7 +1086,7 @@ $(BUILD_DIR)/initmod_ptx.%_ll.o: $(BUILD_DIR)/initmod_ptx.%_ll.cpp
$(BUILD_DIR)/initmod.%.o: $(BUILD_DIR)/initmod.%.cpp
$(CXX) -c $< -o $@ -MMD -MP -MF $(BUILD_DIR)/$*.d -MT $(BUILD_DIR)/$*.o

$(BUILD_DIR)/%.o: $(SRC_DIR)/%.cpp $(SRC_DIR)/%.h $(BUILD_DIR)/llvm_ok
$(BUILD_DIR)/%.o: $(SRC_DIR)/%.cpp $(BUILD_DIR)/llvm_ok
@mkdir -p $(@D)
$(CXX) $(CXX_FLAGS) -c $< -o $@ -MMD -MP -MF $(BUILD_DIR)/$*.d -MT $(BUILD_DIR)/$*.o

Expand Down
7 changes: 1 addition & 6 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -23,23 +23,18 @@ set(HEADER_FILES
Buffer.h
CanonicalizeGPUVars.h
Closure.h
CodeGen_ARM.h
CodeGen_C.h
CodeGen_D3D12Compute_Dev.h
CodeGen_GPU_Dev.h
CodeGen_Internal.h
CodeGen_LLVM.h
CodeGen_Metal_Dev.h
CodeGen_MIPS.h
CodeGen_OpenCL_Dev.h
CodeGen_OpenGLCompute_Dev.h
CodeGen_Posix.h
CodeGen_PowerPC.h
CodeGen_PTX_Dev.h
CodeGen_PyTorch.h
CodeGen_RISCV.h
CodeGen_WebAssembly.h
CodeGen_X86.h
CodeGen_Targets.h
CompilerLogger.h
ConciseCasts.h
CPlusPlusMangle.h
Expand Down
106 changes: 78 additions & 28 deletions src/CodeGen_ARM.cpp
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
#include <iostream>
#include <sstream>

#include "CSE.h"
#include "CodeGen_ARM.h"
#include "CodeGen_Internal.h"
#include "CodeGen_Posix.h"
#include "ConciseCasts.h"
#include "Debug.h"
#include "IREquality.h"
Expand All @@ -25,28 +24,68 @@ using std::vector;
using namespace Halide::ConciseCasts;
using namespace llvm;

#if defined(WITH_ARM) || defined(WITH_AARCH64)

namespace {

// Broadcast to an unknown number of lanes, for making patterns.
Expr bc(Expr x) {
return Broadcast::make(std::move(x), 0);
}

} // namespace
/** A code generator that emits ARM code from a given Halide stmt. */
class CodeGen_ARM : public CodeGen_Posix {
public:
/** Create an ARM code generator for the given arm target. */
CodeGen_ARM(const Target &);

protected:
using CodeGen_Posix::visit;

/** Assuming 'inner' is a function that takes two vector arguments, define a wrapper that
* takes one vector argument and splits it into two to call inner. */
llvm::Function *define_concat_args_wrapper(llvm::Function *inner, const string &name);
void init_module() override;

/** Nodes for which we want to emit specific neon intrinsics */
// @{
void visit(const Cast *) override;
void visit(const Sub *) override;
void visit(const Mul *) override;
void visit(const Min *) override;
void visit(const Max *) override;
void visit(const Store *) override;
void visit(const Load *) override;
void visit(const Call *) override;
void visit(const LT *) override;
void visit(const LE *) override;
void codegen_vector_reduce(const VectorReduce *, const Expr &) override;
// @}

/** Various patterns to peephole match against */
struct Pattern {
string intrin; ///< Name of the intrinsic
Expr pattern; ///< The pattern to match against
Pattern() = default;
Pattern(const string &intrin, Expr p)
: intrin(intrin), pattern(std::move(p)) {
}
};
vector<Pattern> casts, averagings, negations;

string mcpu() const override;
string mattrs() const override;
bool use_soft_float_abi() const override;
int native_vector_bits() const override;

// NEON can be disabled for older processors.
bool neon_intrinsics_disabled() {
return target.has_feature(Target::NoNEON);
}
};

CodeGen_ARM::CodeGen_ARM(const Target &target)
: CodeGen_Posix(target) {
if (target.bits == 32) {
#if !defined(WITH_ARM)
user_error << "arm not enabled for this build of Halide.";
#endif
user_assert(llvm_ARM_enabled) << "llvm build not configured with ARM target enabled\n.";
} else {
#if !defined(WITH_AARCH64)
user_error << "aarch64 not enabled for this build of Halide.";
#endif
user_assert(llvm_AArch64_enabled) << "llvm build not configured with AArch64 target enabled.\n";
}

// RADDHN - Add and narrow with rounding
// These must come before other narrowing rounding shift patterns
Expand Down Expand Up @@ -162,8 +201,6 @@ CodeGen_ARM::CodeGen_ARM(const Target &target)
// clang-format on
}

namespace {

constexpr int max_intrinsic_args = 4;

struct ArmIntrinsic {
Expand Down Expand Up @@ -512,9 +549,7 @@ const ArmIntrinsic intrinsic_defs[] = {
};
// clang-format on

} // namespace

llvm::Function *CodeGen_ARM::define_concat_args_wrapper(llvm::Function *inner, const std::string &name) {
llvm::Function *CodeGen_ARM::define_concat_args_wrapper(llvm::Function *inner, const string &name) {
llvm::FunctionType *inner_ty = inner->getFunctionType();

internal_assert(inner_ty->getNumParams() == 2);
Expand Down Expand Up @@ -558,7 +593,7 @@ void CodeGen_ARM::init_module() {
return;
}

std::string prefix = target.bits == 32 ? "llvm.arm.neon." : "llvm.aarch64.neon.";
string prefix = target.bits == 32 ? "llvm.arm.neon." : "llvm.aarch64.neon.";
for (const ArmIntrinsic &intrin : intrinsic_defs) {
// Get the name of the intrinsic with the appropriate prefix.
const char *intrin_name = nullptr;
Expand All @@ -570,13 +605,13 @@ void CodeGen_ARM::init_module() {
if (!intrin_name) {
continue;
}
std::string full_name = intrin_name;
string full_name = intrin_name;
if (!starts_with(full_name, "llvm.")) {
full_name = prefix + full_name;
}

// We might have to generate versions of this intrinsic with multiple widths.
std::vector<int> width_factors = {1};
vector<int> width_factors = {1};
if (intrin.flags & ArmIntrinsic::HalfWidth) {
width_factors.push_back(2);
}
Expand All @@ -585,7 +620,7 @@ void CodeGen_ARM::init_module() {
Type ret_type = intrin.ret_type;
ret_type = ret_type.with_lanes(ret_type.lanes() * width_factor);
internal_assert(ret_type.bits() * ret_type.lanes() <= 128) << full_name << "\n";
std::vector<Type> arg_types;
vector<Type> arg_types;
arg_types.reserve(4);
for (halide_type_t i : intrin.arg_types) {
if (i.bits == 0) {
Expand All @@ -603,7 +638,7 @@ void CodeGen_ARM::init_module() {
mangled_name_builder << full_name;
if (starts_with(full_name, "llvm.") && (intrin.flags & ArmIntrinsic::NoMangle) == 0) {
// Append LLVM name mangling for either the return type or the arguments, or both.
std::vector<Type> types;
vector<Type> types;
if (intrin.flags & ArmIntrinsic::MangleArgs) {
types = arg_types;
} else if (intrin.flags & ArmIntrinsic::MangleRetArgs) {
Expand All @@ -622,12 +657,12 @@ void CodeGen_ARM::init_module() {
mangled_name_builder << t.bits();
}
}
std::string mangled_name = mangled_name_builder.str();
string mangled_name = mangled_name_builder.str();

llvm::Function *intrin_impl = nullptr;
if (intrin.flags & ArmIntrinsic::SplitArg0) {
// This intrinsic needs a wrapper to split the argument.
std::string wrapper_name = intrin.name + unique_name("_wrapper");
string wrapper_name = intrin.name + unique_name("_wrapper");
Type split_arg_type = arg_types[0].with_lanes(arg_types[0].lanes() / 2);
llvm::Function *to_wrap = get_llvm_intrin(ret_type, mangled_name, {split_arg_type, split_arg_type});
intrin_impl = define_concat_args_wrapper(to_wrap, wrapper_name);
Expand Down Expand Up @@ -1178,7 +1213,7 @@ void CodeGen_ARM::codegen_vector_reduce(const VectorReduce *op, const Expr &init
// clang-format on

int factor = op->value.type().lanes() / op->type.lanes();
std::vector<Expr> matches;
vector<Expr> matches;
for (const Pattern &p : patterns) {
if (op->op != p.reduce_op || factor % p.factor != 0) {
continue;
Expand Down Expand Up @@ -1208,7 +1243,7 @@ void CodeGen_ARM::codegen_vector_reduce(const VectorReduce *op, const Expr &init
// TODO: Move this to be patterns? The patterns are pretty trivial, but some
// of the other logic is tricky.
const char *intrin = nullptr;
std::vector<Expr> intrin_args;
vector<Expr> intrin_args;
Expr accumulator = init;
if (op->op == VectorReduce::Add && factor == 2) {
Type narrow_type = op->type.narrow().with_lanes(op->value.type().lanes());
Expand Down Expand Up @@ -1340,5 +1375,20 @@ int CodeGen_ARM::native_vector_bits() const {
return 128;
}

} // namespace

std::unique_ptr<CodeGen_Posix> new_CodeGen_ARM(const Target &target) {
return std::make_unique<CodeGen_ARM>(target);
}

#else // WITH_ARM || WITH_AARCH64

std::unique_ptr<CodeGen_Posix> new_CodeGen_ARM(const Target &target) {
user_error << "ARM not enabled for this build of Halide.\n";
return nullptr;
}

#endif // WITH_ARM || WITH_AARCH64

} // namespace Internal
} // namespace Halide
72 changes: 0 additions & 72 deletions src/CodeGen_ARM.h

This file was deleted.

17 changes: 3 additions & 14 deletions src/CodeGen_Hexagon.cpp
Original file line number Diff line number Diff line change
@@ -1,7 +1,3 @@
#include "CodeGen_Hexagon.h"

#include <iostream>
#include <mutex>
#include <sstream>
#include <utility>

Expand All @@ -12,14 +8,11 @@
#include "Debug.h"
#include "HexagonOptimize.h"
#include "IREquality.h"
#include "IRMatch.h"
#include "IRMutator.h"
#include "IROperator.h"
#include "IRPrinter.h"
#include "LICM.h"
#include "LLVM_Headers.h"
#include "LoopCarry.h"
#include "Monotonic.h"
#include "Simplify.h"
#include "Substitute.h"
#include "Target.h"
Expand Down Expand Up @@ -138,8 +131,6 @@ class CodeGen_Hexagon : public CodeGen_Posix {

CodeGen_Hexagon::CodeGen_Hexagon(const Target &t)
: CodeGen_Posix(t) {
user_assert(llvm_Hexagon_enabled)
<< "llvm build not configured with Hexagon target enabled.\n";
if (target.has_feature(Halide::Target::HVX_v66)) {
isa_version = 66;
} else if (target.has_feature(Halide::Target::HVX_v65)) {
Expand Down Expand Up @@ -2326,15 +2317,13 @@ void CodeGen_Hexagon::visit(const Allocate *alloc) {

} // namespace

std::unique_ptr<CodeGen_Posix> new_CodeGen_Hexagon(const Target &target, llvm::LLVMContext &context) {
std::unique_ptr<CodeGen_Posix> ret(std::make_unique<CodeGen_Hexagon>(target));
ret->set_context(context);
return ret;
std::unique_ptr<CodeGen_Posix> new_CodeGen_Hexagon(const Target &target) {
return std::make_unique<CodeGen_Hexagon>(target);
}

#else // WITH_HEXAGON

std::unique_ptr<CodeGen_Posix> new_CodeGen_Hexagon(const Target &target, llvm::LLVMContext &context) {
std::unique_ptr<CodeGen_Posix> new_CodeGen_Hexagon(const Target &target) {
user_error << "hexagon not enabled for this build of Halide.\n";
return nullptr;
}
Expand Down
Loading