Skip to content
20 changes: 10 additions & 10 deletions src/AsyncProducers.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -172,9 +172,9 @@ class GenerateProducerBody : public NoOpCollapsingMutator {
} else {
// This semaphore will end up on both sides of the fork,
// so we'd better duplicate it.
string cloned_acquire = var->name + unique_name('_');
cloned_acquires[var->name] = cloned_acquire;
return Acquire::make(Variable::make(type_of<halide_semaphore_t *>(), cloned_acquire), op->count, body);
vector<string> &clones = cloned_acquires[var->name];
clones.push_back(var->name + unique_name('_'));
return Acquire::make(Variable::make(type_of<halide_semaphore_t *>(), clones.back()), op->count, body);
}
}

Expand All @@ -192,11 +192,11 @@ class GenerateProducerBody : public NoOpCollapsingMutator {
return op;
}

map<string, string> &cloned_acquires;
map<string, vector<string>> &cloned_acquires;
set<string> inner_semaphores;

public:
GenerateProducerBody(const string &f, const vector<Expr> &s, map<string, string> &a)
GenerateProducerBody(const string &f, const vector<Expr> &s, map<string, vector<string>> &a)
: func(f), sema(s), cloned_acquires(a) {
}
};
Expand Down Expand Up @@ -311,7 +311,7 @@ class ForkAsyncProducers : public IRMutator {

const map<string, Function> &env;

map<string, string> cloned_acquires;
map<string, vector<string>> cloned_acquires;

Stmt visit(const Realize *op) override {
auto it = env.find(op->name);
Expand Down Expand Up @@ -354,10 +354,10 @@ class ForkAsyncProducers : public IRMutator {
// If there's a nested async producer, we may have
// recursively cloned this semaphore inside the mutation
// of the producer and consumer.
auto it = cloned_acquires.find(sema_name);
if (it != cloned_acquires.end()) {
body = CloneAcquire(sema_name, it->second).mutate(body);
body = LetStmt::make(it->second, sema_space, body);
const vector<string> &clones = cloned_acquires[sema_name];
for (const auto &i : clones) {
body = CloneAcquire(sema_name, i).mutate(body);
body = LetStmt::make(i, sema_space, body);
}

body = LetStmt::make(sema_name, sema_space, body);
Expand Down
19 changes: 17 additions & 2 deletions src/PartitionLoops.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -980,12 +980,27 @@ class CollapseSelects : public IRMutator {
}
};

class ContainsLoop : public IRVisitor {
class ContainsHotLoop : public IRVisitor {
using IRVisitor::visit;
void visit(const For *op) override {
result = true;
}

void visit(const IfThenElse *op) override {
op->then_case.accept(this);

// Don't count loops that appear in cold paths
const Call *c = op->condition.as<Call>();
bool else_case_is_cold =
(c &&
(c->is_intrinsic(Call::likely_if_innermost) ||
c->is_intrinsic(Call::likely)));
if (op->else_case.defined() &&
!else_case_is_cold) {
op->else_case.accept(this);
}
}

public:
bool result = false;
};
Expand All @@ -1009,7 +1024,7 @@ class LowerLikelyIfInnermost : public IRMutator {
}

Stmt visit(const For *op) override {
ContainsLoop c;
ContainsHotLoop c;
op->body.accept(&c);
inside_innermost_loop = !c.result;
Stmt stmt = IRMutator::visit(op);
Expand Down
5 changes: 3 additions & 2 deletions src/Solve.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -417,9 +417,10 @@ class SolveExpression : public IRMutator {
}

Expr visit(const Call *op) override {
// Ignore likely intrinsics
// Ignore intrinsics that shouldn't affect the results.
if (op->is_intrinsic(Call::likely) ||
op->is_intrinsic(Call::likely_if_innermost)) {
op->is_intrinsic(Call::likely_if_innermost) ||
op->is_intrinsic(Call::promise_clamped)) {
return mutate(op->args[0]);
} else {
return IRMutator::visit(op);
Expand Down
8 changes: 8 additions & 0 deletions src/TrimNoOps.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,10 @@ class IsNoOp : public IRVisitor {
IRVisitor::visit(op);
}

void visit(const Acquire *op) override {
condition = const_false();
}

template<typename LetOrLetStmt>
void visit_let(const LetOrLetStmt *op) {
IRVisitor::visit(op);
Expand Down Expand Up @@ -371,6 +375,8 @@ class TrimNoOps : public IRMutator {

if (is_const_one(is_no_op.condition)) {
// This loop is definitely useless
debug(3) << "Removed empty loop.\n"
<< "Old: " << Stmt(op) << "\n";
return Evaluate::make(0);
} else if (is_const_zero(is_no_op.condition)) {
// This loop is definitely needed
Expand All @@ -391,6 +397,8 @@ class TrimNoOps : public IRMutator {

if (i.is_empty()) {
// Empty loop
debug(3) << "Removed empty loop.\n"
<< "Old: " << Stmt(op) << "\n";
return Evaluate::make(0);
}

Expand Down
2 changes: 1 addition & 1 deletion src/UniquifyVariableNames.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -243,7 +243,7 @@ void uniquify_variable_names_test() {
{{x, Let::make(y.name(), 3, y)},
{x_1, Let::make(y.name(), 4, y)}});

std::cout << "is_monotonic test passed" << std::endl;
std::cout << "uniquify_variable_names test passed" << std::endl;
}

} // namespace Internal
Expand Down
79 changes: 56 additions & 23 deletions src/VectorizeLoops.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,86 +30,87 @@ Expr get_lane(const Expr &e, int l) {

/** Find the exact max and min lanes of a vector expression. Not
* conservative like bounds_of_expr, but uses similar rules for some
* common node types where it can be exact. */
Interval bounds_of_lanes(const Expr &e) {
* common node types where it can be exact. If e is a nested vector,
* the result will be the bounds of the vectors in each lane. */
Interval bounds_of_nested_lanes(const Expr &e) {
if (const Add *add = e.as<Add>()) {
if (const Broadcast *b = add->b.as<Broadcast>()) {
Interval ia = bounds_of_lanes(add->a);
Interval ia = bounds_of_nested_lanes(add->a);
return {ia.min + b->value, ia.max + b->value};
} else if (const Broadcast *b = add->a.as<Broadcast>()) {
Interval ia = bounds_of_lanes(add->b);
Interval ia = bounds_of_nested_lanes(add->b);
return {b->value + ia.min, b->value + ia.max};
}
} else if (const Sub *sub = e.as<Sub>()) {
if (const Broadcast *b = sub->b.as<Broadcast>()) {
Interval ia = bounds_of_lanes(sub->a);
Interval ia = bounds_of_nested_lanes(sub->a);
return {ia.min - b->value, ia.max - b->value};
} else if (const Broadcast *b = sub->a.as<Broadcast>()) {
Interval ia = bounds_of_lanes(sub->b);
Interval ia = bounds_of_nested_lanes(sub->b);
return {b->value - ia.max, b->value - ia.max};
}
} else if (const Mul *mul = e.as<Mul>()) {
if (const Broadcast *b = mul->b.as<Broadcast>()) {
if (is_positive_const(b->value)) {
Interval ia = bounds_of_lanes(mul->a);
Interval ia = bounds_of_nested_lanes(mul->a);
return {ia.min * b->value, ia.max * b->value};
} else if (is_negative_const(b->value)) {
Interval ia = bounds_of_lanes(mul->a);
Interval ia = bounds_of_nested_lanes(mul->a);
return {ia.max * b->value, ia.min * b->value};
}
} else if (const Broadcast *b = mul->a.as<Broadcast>()) {
if (is_positive_const(b->value)) {
Interval ia = bounds_of_lanes(mul->b);
Interval ia = bounds_of_nested_lanes(mul->b);
return {b->value * ia.min, b->value * ia.max};
} else if (is_negative_const(b->value)) {
Interval ia = bounds_of_lanes(mul->b);
Interval ia = bounds_of_nested_lanes(mul->b);
return {b->value * ia.max, b->value * ia.min};
}
}
} else if (const Div *div = e.as<Div>()) {
if (const Broadcast *b = div->b.as<Broadcast>()) {
if (is_positive_const(b->value)) {
Interval ia = bounds_of_lanes(div->a);
Interval ia = bounds_of_nested_lanes(div->a);
return {ia.min / b->value, ia.max / b->value};
} else if (is_negative_const(b->value)) {
Interval ia = bounds_of_lanes(div->a);
Interval ia = bounds_of_nested_lanes(div->a);
return {ia.max / b->value, ia.min / b->value};
}
}
} else if (const And *and_ = e.as<And>()) {
if (const Broadcast *b = and_->b.as<Broadcast>()) {
Interval ia = bounds_of_lanes(and_->a);
Interval ia = bounds_of_nested_lanes(and_->a);
return {ia.min && b->value, ia.max && b->value};
} else if (const Broadcast *b = and_->a.as<Broadcast>()) {
Interval ia = bounds_of_lanes(and_->b);
Interval ia = bounds_of_nested_lanes(and_->b);
return {ia.min && b->value, ia.max && b->value};
}
} else if (const Or *or_ = e.as<Or>()) {
if (const Broadcast *b = or_->b.as<Broadcast>()) {
Interval ia = bounds_of_lanes(or_->a);
Interval ia = bounds_of_nested_lanes(or_->a);
return {ia.min && b->value, ia.max && b->value};
} else if (const Broadcast *b = or_->a.as<Broadcast>()) {
Interval ia = bounds_of_lanes(or_->b);
Interval ia = bounds_of_nested_lanes(or_->b);
return {ia.min && b->value, ia.max && b->value};
}
} else if (const Min *min = e.as<Min>()) {
if (const Broadcast *b = min->b.as<Broadcast>()) {
Interval ia = bounds_of_lanes(min->a);
Interval ia = bounds_of_nested_lanes(min->a);
return {Min::make(ia.min, b->value), Min::make(ia.max, b->value)};
} else if (const Broadcast *b = min->a.as<Broadcast>()) {
Interval ia = bounds_of_lanes(min->b);
Interval ia = bounds_of_nested_lanes(min->b);
return {Min::make(ia.min, b->value), Min::make(ia.max, b->value)};
}
} else if (const Max *max = e.as<Max>()) {
if (const Broadcast *b = max->b.as<Broadcast>()) {
Interval ia = bounds_of_lanes(max->a);
Interval ia = bounds_of_nested_lanes(max->a);
return {Max::make(ia.min, b->value), Max::make(ia.max, b->value)};
} else if (const Broadcast *b = max->a.as<Broadcast>()) {
Interval ia = bounds_of_lanes(max->b);
Interval ia = bounds_of_nested_lanes(max->b);
return {Max::make(ia.min, b->value), Max::make(ia.max, b->value)};
}
} else if (const Not *not_ = e.as<Not>()) {
Interval ia = bounds_of_lanes(not_->a);
Interval ia = bounds_of_nested_lanes(not_->a);
return {!ia.max, !ia.min};
} else if (const Ramp *r = e.as<Ramp>()) {
Expr last_lane_idx = make_const(r->base.type(), r->lanes - 1);
Expand All @@ -118,11 +119,30 @@ Interval bounds_of_lanes(const Expr &e) {
} else if (is_negative_const(r->stride)) {
return {r->base + last_lane_idx * r->stride, r->base};
}
} else if (const LE *le = e.as<LE>()) {
// The least true this can be is if we maximize the LHS and minimize the RHS.
// The most true this can be is if we minimize the LHS and maximize the RHS.
// This is only exact if one of the two sides is a Broadcast.
Interval ia = bounds_of_nested_lanes(le->a);
Interval ib = bounds_of_nested_lanes(le->b);
if (ia.is_single_point() || ib.is_single_point()) {
return {ia.max <= ib.min, ia.min <= ib.max};
}
} else if (const LT *lt = e.as<LT>()) {
// The least true this can be is if we maximize the LHS and minimize the RHS.
// The most true this can be is if we minimize the LHS and maximize the RHS.
// This is only exact if one of the two sides is a Broadcast.
Interval ia = bounds_of_nested_lanes(lt->a);
Interval ib = bounds_of_nested_lanes(lt->b);
if (ia.is_single_point() || ib.is_single_point()) {
return {ia.max < ib.min, ia.min < ib.max};
}

} else if (const Broadcast *b = e.as<Broadcast>()) {
return {b->value, b->value};
} else if (const Let *let = e.as<Let>()) {
Interval ia = bounds_of_lanes(let->value);
Interval ib = bounds_of_lanes(let->body);
Interval ia = bounds_of_nested_lanes(let->value);
Interval ib = bounds_of_nested_lanes(let->body);
if (expr_uses_var(ib.min, let->name)) {
ib.min = Let::make(let->name, let->value, ib.min);
}
Expand All @@ -145,6 +165,19 @@ Interval bounds_of_lanes(const Expr &e) {
}
};

/** Similar to bounds_of_nested_lanes, but it recursively reduces
* the bounds of nested vectors to scalars. */
Interval bounds_of_lanes(const Expr &e) {
Interval bounds = bounds_of_nested_lanes(e);
if (!bounds.min.type().is_scalar()) {
bounds.min = bounds_of_lanes(bounds.min).min;
}
if (!bounds.max.type().is_scalar()) {
bounds.max = bounds_of_lanes(bounds.max).max;
}
return bounds;
}

// A ramp with the lanes repeated inner_repetitions times, and then
// the whole vector repeated outer_repetitions times.
// E.g: <0 0 2 2 4 4 6 6 0 0 2 2 4 4 6 6>.
Expand Down