Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions python_bindings/src/PyImageParam.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ void define_image_param(py::module &m) {
.def("host_alignment", &OutputImageParam::host_alignment)
.def("set_estimates", &OutputImageParam::set_estimates, py::arg("estimates"))
.def("set_host_alignment", &OutputImageParam::set_host_alignment)
.def("is_host_aligned", &OutputImageParam::is_host_aligned)
.def("store_in", &OutputImageParam::store_in, py::arg("memory_type"))
.def("dimensions", &OutputImageParam::dimensions)
.def("left", &OutputImageParam::left)
Expand Down
9 changes: 6 additions & 3 deletions src/AddImageChecks.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -637,8 +637,11 @@ Stmt add_image_checks_inner(Stmt s,
int alignment_required = param.host_alignment();
Expr u64t_host_ptr = reinterpret<uint64_t>(host_ptr);
Expr align_condition = (u64t_host_ptr % alignment_required) == 0;
Expr error = Call::make(Int(32), "halide_error_unaligned_host_ptr",
{name, alignment_required}, Call::Extern);
Expr error = 0;
if (!no_asserts) {
error = Call::make(Int(32), "halide_error_unaligned_host_ptr",
{name, alignment_required}, Call::Extern);
}
asserts_host_alignment.push_back(AssertStmt::make(align_condition, error));
}
}
Expand All @@ -661,7 +664,6 @@ Stmt add_image_checks_inner(Stmt s,
if (!no_asserts) {
// Inject the code that checks the host pointers.
prepend_stmts(&asserts_host_non_null);
prepend_stmts(&asserts_host_alignment);
prepend_stmts(&asserts_device_not_dirty);
prepend_stmts(&dims_no_overflow_asserts);
prepend_lets(&lets_overflow);
Expand All @@ -680,6 +682,7 @@ Stmt add_image_checks_inner(Stmt s,
// Inject the code that checks the constraints are correct. We
// need these regardless of how NoAsserts is set, because they are
// what gets Halide to actually exploit the constraint.
prepend_stmts(&asserts_host_alignment);
prepend_stmts(&asserts_constrained);

if (!no_asserts) {
Expand Down
15 changes: 5 additions & 10 deletions src/AlignLoads.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

#include "AlignLoads.h"
#include "Bounds.h"
#include "HexagonAlignment.h"
#include "IRMutator.h"
#include "IROperator.h"
#include "ModulusRemainder.h"
Expand All @@ -22,12 +21,10 @@ namespace {
class AlignLoads : public IRMutator {
public:
AlignLoads(int alignment)
: alignment_analyzer(alignment), required_alignment(alignment) {
: required_alignment(alignment) {
}

private:
HexagonAlignmentAnalyzer alignment_analyzer;

// Loads and stores should ideally be aligned to the vector width in bytes.
int required_alignment;

Expand Down Expand Up @@ -75,14 +72,12 @@ class AlignLoads : public IRMutator {
return IRMutator::visit(op);
}

int64_t aligned_offset = 0;
bool is_aligned =
alignment_analyzer.is_aligned(op, &aligned_offset);
// We know the alignment_analyzer has been able to reason about alignment
// if the following is true.
bool known_alignment = is_aligned || (!is_aligned && aligned_offset != 0);
int lanes = ramp->lanes;
int native_lanes = required_alignment / op->type.bytes();
int64_t aligned_offset =
op->alignment.modulus % native_lanes == 0 ? op->alignment.remainder % native_lanes : 0;
bool is_aligned = op->alignment.contains(native_lanes);
bool known_alignment = is_aligned || aligned_offset != 0;
int stride = static_cast<int>(*const_stride);
if (stride != 1) {
internal_assert(stride >= 0);
Expand Down
35 changes: 0 additions & 35 deletions src/CodeGen_LLVM.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -581,9 +581,6 @@ void CodeGen_LLVM::begin_func(LinkageType linkage, const std::string &name,
size_t i = 0;
for (auto &arg : function->args()) {
if (args[i].is_buffer()) {
// Track this buffer name so that loads and stores from it
// don't try to be too aligned.
external_buffer.insert(args[i].name);
sym_push(args[i].name + ".buffer", &arg);
} else {
Type passed_type = upgrade_type_for_argument_passing(args[i].type);
Expand Down Expand Up @@ -2169,7 +2166,6 @@ void CodeGen_LLVM::codegen_predicated_vector_store(const Store *op) {
Value *vpred = codegen(op->predicate);
Halide::Type value_type = op->value.type();
Value *val = codegen(op->value);
bool is_external = (external_buffer.find(op->name) != external_buffer.end());
int alignment = value_type.bytes();
int native_bits = native_vector_bits();
int native_bytes = native_bits / 8;
Expand All @@ -2184,14 +2180,6 @@ void CodeGen_LLVM::codegen_predicated_vector_store(const Store *op) {
alignment *= 2;
}

// If it is an external buffer, then we cannot assume that the host pointer
// is aligned to at least the native vector width. However, we may be able to do
// better than just assuming that it is unaligned.
if (is_external && op->param.defined()) {
int host_alignment = op->param.host_alignment();
alignment = gcd(alignment, host_alignment);
}

// For dense vector stores wider than the native vector
// width, bust them up into native vectors.
int store_lanes = value_type.lanes();
Expand Down Expand Up @@ -2255,7 +2243,6 @@ Value *CodeGen_LLVM::codegen_dense_vector_load(const Load *load, Value *vpred) {
const Ramp *ramp = load->index.as<Ramp>();
internal_assert(ramp && is_const_one(ramp->stride)) << "Should be dense vector load\n";

bool is_external = (external_buffer.find(load->name) != external_buffer.end());
int alignment = load->type.bytes(); // The size of a single element

int native_bits = native_vector_bits();
Expand All @@ -2275,19 +2262,6 @@ Value *CodeGen_LLVM::codegen_dense_vector_load(const Load *load, Value *vpred) {
alignment *= 2;
}

// If it is an external buffer, then we cannot assume that the host pointer
// is aligned to at least native vector width. However, we may be able to do
// better than just assuming that it is unaligned.
if (is_external) {
if (load->param.defined()) {
int host_alignment = load->param.host_alignment();
alignment = gcd(alignment, host_alignment);
} else if (get_target().has_feature(Target::JIT) && load->image.defined()) {
// If we're JITting, use the actual pointer value to determine alignment for embedded buffers.
alignment = gcd(alignment, (int)(((uintptr_t)load->image.data()) & std::numeric_limits<int>::max()));
}
}

// For dense vector loads wider than the native vector
// width, bust them up into native vectors
int load_lanes = load->type.lanes();
Expand Down Expand Up @@ -3979,7 +3953,6 @@ void CodeGen_LLVM::visit(const Store *op) {
}

Value *val = codegen(op->value);
bool is_external = (external_buffer.find(op->name) != external_buffer.end());
// Scalar
if (value_type.is_scalar()) {
Value *ptr = codegen_buffer_pointer(op->name, value_type, op->index);
Expand All @@ -4006,14 +3979,6 @@ void CodeGen_LLVM::visit(const Store *op) {
alignment *= 2;
}

// If it is an external buffer, then we cannot assume that the host pointer
// is aligned to at least the native vector width. However, we may be able to do
// better than just assuming that it is unaligned.
if (is_external && op->param.defined()) {
int host_alignment = op->param.host_alignment();
alignment = gcd(alignment, host_alignment);
}

// For dense vector stores wider than the native vector
// width, bust them up into native vectors.
int store_lanes = value_type.lanes();
Expand Down
4 changes: 0 additions & 4 deletions src/CodeGen_LLVM.h
Original file line number Diff line number Diff line change
Expand Up @@ -414,10 +414,6 @@ class CodeGen_LLVM : public IRVisitor {
*/
size_t requested_alloca_total = 0;

/** Which buffers came in from the outside world (and so we can't
* guarantee their alignment) */
std::set<std::string> external_buffer;

/** The user_context argument. May be a constant null if the
* function is being compiled without a user context. */
llvm::Value *get_user_context() const;
Expand Down
6 changes: 2 additions & 4 deletions src/CodeGen_OpenCL_Dev.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -491,8 +491,7 @@ void CodeGen_OpenCL_Dev::CodeGen_OpenCL_C::visit(const Load *op) {
internal_assert(op->type.is_vector());

ostringstream rhs;
if ((op->alignment.modulus % op->type.lanes() == 0) &&
(op->alignment.remainder % op->type.lanes() == 0)) {
if (op->alignment.contains(op->type.lanes())) {
// Get the rhs just for the cache.
string id_ramp_base = print_expr(ramp_base / op->type.lanes());
string array_indexing = print_array_access(op->name, op->type, id_ramp_base);
Expand Down Expand Up @@ -658,8 +657,7 @@ void CodeGen_OpenCL_Dev::CodeGen_OpenCL_C::visit(const Store *op) {
if (ramp_base.defined()) {
internal_assert(op->value.type().is_vector());

if ((op->alignment.modulus % op->value.type().lanes() == 0) &&
(op->alignment.remainder % op->value.type().lanes() == 0)) {
if (op->alignment.contains(op->value.type().lanes())) {
string id_ramp_base = print_expr(ramp_base / op->value.type().lanes());
string array_indexing = print_array_access(op->name, t, id_ramp_base);
stream << get_indent() << array_indexing << " = " << id_value << ";\n";
Expand Down
7 changes: 3 additions & 4 deletions src/CodeGen_PTX_Dev.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -346,7 +346,7 @@ void CodeGen_PTX_Dev::visit(const Load *op) {
// TODO: lanes >= 4, not lanes == 4
if (is_const_one(op->predicate) && r && is_const_one(r->stride) && r->lanes == 4 && op->type.bits() == 32) {
ModulusRemainder align = op->alignment;
if (align.modulus % 4 == 0 && align.remainder % 4 == 0) {
if (align.contains(4)) {
Expr index = simplify(r->base / 4);
Expr equiv = Load::make(UInt(128), op->name, index,
op->image, op->param, const_true(), align / 4);
Expand All @@ -371,7 +371,7 @@ void CodeGen_PTX_Dev::visit(const Store *op) {
// TODO: lanes >= 4, not lanes == 4
if (is_const_one(op->predicate) && r && is_const_one(r->stride) && r->lanes == 4 && op->value.type().bits() == 32) {
ModulusRemainder align = op->alignment;
if (align.modulus % 4 == 0 && align.remainder % 4 == 0) {
if (align.contains(4)) {
Expr index = simplify(r->base / 4);
Expr value = reinterpret(UInt(128), op->value);
Stmt equiv = Store::make(op->name, value, index, op->param, const_true(), align / 4);
Expand Down Expand Up @@ -411,8 +411,7 @@ class RewriteLoadsAs32Bit : public IRMutator {
if (idx &&
is_const_one(op->predicate) &&
is_const_one(idx->stride) &&
op->alignment.modulus % sub_lanes == 0 &&
op->alignment.remainder % sub_lanes == 0) {
op->alignment.contains(sub_lanes)) {
Expr new_idx = simplify(idx->base / sub_lanes);
int load_lanes = op->type.lanes() / sub_lanes;
if (op->type.lanes() > sub_lanes) {
Expand Down
2 changes: 2 additions & 0 deletions src/Generator.h
Original file line number Diff line number Diff line change
Expand Up @@ -1778,6 +1778,7 @@ class GeneratorInput_Buffer : public GeneratorInputImpl<T, Func> {
HALIDE_FORWARD_METHOD_CONST(ImageParam, dim)
HALIDE_FORWARD_METHOD_CONST(ImageParam, host_alignment)
HALIDE_FORWARD_METHOD(ImageParam, set_host_alignment)
HALIDE_FORWARD_METHOD(ImageParam, is_host_aligned)
HALIDE_FORWARD_METHOD(ImageParam, store_in)
HALIDE_FORWARD_METHOD_CONST(ImageParam, dimensions)
HALIDE_FORWARD_METHOD_CONST(ImageParam, left)
Expand Down Expand Up @@ -2521,6 +2522,7 @@ class GeneratorOutput_Buffer : public GeneratorOutputImpl<T> {
HALIDE_FORWARD_METHOD_CONST(OutputImageParam, dim)
HALIDE_FORWARD_METHOD_CONST(OutputImageParam, host_alignment)
HALIDE_FORWARD_METHOD(OutputImageParam, set_host_alignment)
HALIDE_FORWARD_METHOD(OutputImageParam, is_host_aligned)
HALIDE_FORWARD_METHOD(OutputImageParam, store_in)
HALIDE_FORWARD_METHOD_CONST(OutputImageParam, dimensions)
HALIDE_FORWARD_METHOD_CONST(OutputImageParam, left)
Expand Down
69 changes: 0 additions & 69 deletions src/HexagonAlignment.h

This file was deleted.

20 changes: 7 additions & 13 deletions src/HexagonOptimize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
#include "ConciseCasts.h"
#include "ExprUsesVar.h"
#include "FindIntrinsics.h"
#include "HexagonAlignment.h"
#include "IREquality.h"
#include "IRMatch.h"
#include "IRMutator.h"
Expand Down Expand Up @@ -1419,10 +1418,7 @@ class EliminateInterleaves : public IRMutator {
Scope<bool> vars;

// We need to know when loads are a multiple of 2 native vectors.
int native_vector_bits;

// Alignment analyzer for loads and stores
HexagonAlignmentAnalyzer alignment_analyzer;
const int native_vector_bytes;

// Check if x is an expression that is either an interleave, or
// transitively is an interleave.
Expand Down Expand Up @@ -1884,9 +1880,8 @@ class EliminateInterleaves : public IRMutator {
}
internal_assert(aligned_buffer_access.contains(op->name) && "Buffer not found in scope");
bool &aligned_accesses = aligned_buffer_access.ref(op->name);
int64_t aligned_offset = 0;

if (!alignment_analyzer.is_aligned(op, &aligned_offset)) {
const int native_vector_lanes = native_vector_bytes / value.type().bytes();
if (op->alignment.contains(native_vector_lanes)) {
aligned_accesses = false;
}
}
Expand All @@ -1906,7 +1901,7 @@ class EliminateInterleaves : public IRMutator {

Expr visit(const Load *op) override {
if (buffers.contains(op->name)) {
if ((op->type.lanes() * op->type.bits()) % (native_vector_bits * 2) == 0) {
if ((op->type.lanes() * op->type.bytes()) % (native_vector_bytes * 2) == 0) {
// This is a double vector load, we might be able to
// deinterleave the storage of this buffer.
// We don't want to actually do anything to the buffer
Expand All @@ -1918,9 +1913,8 @@ class EliminateInterleaves : public IRMutator {
// interleave).
internal_assert(aligned_buffer_access.contains(op->name) && "Buffer not found in scope");
bool &aligned_accesses = aligned_buffer_access.ref(op->name);
int64_t aligned_offset = 0;

if (!alignment_analyzer.is_aligned(op, &aligned_offset)) {
const int native_vector_lanes = native_vector_bytes / op->type.bytes();
if (op->alignment.contains(native_vector_lanes)) {
aligned_accesses = false;
}
} else {
Expand All @@ -1941,7 +1935,7 @@ class EliminateInterleaves : public IRMutator {

public:
EliminateInterleaves(int native_vector_bytes)
: native_vector_bits(native_vector_bytes * 8), alignment_analyzer(native_vector_bytes) {
: native_vector_bytes(native_vector_bytes) {
}
};

Expand Down
6 changes: 3 additions & 3 deletions src/IR.h
Original file line number Diff line number Diff line change
Expand Up @@ -208,8 +208,8 @@ struct Load : public ExprNode<Load> {
// If it's a load from an image parameter, this points to that
Parameter param;

// The alignment of the index. If the index is a vector, this is
// the alignment of the first lane.
// The alignment of the loaded address. If the index is a vector,
// this is the alignment of the first lane.
ModulusRemainder alignment;

static Expr make(Type type, const std::string &name,
Expand Down Expand Up @@ -318,7 +318,7 @@ struct Store : public StmtNode<Store> {
// If it's a store to an output buffer, then this parameter points to it.
Parameter param;

// The alignment of the index. If the index is a vector, this is
// The alignment of the stored address. If the index is a vector, this is
// the alignment of the first lane.
ModulusRemainder alignment;

Expand Down
Loading