From c8f47457c36b4b088c81946f1a0e4e9ee8adf9b5 Mon Sep 17 00:00:00 2001 From: Dillon Sharlet Date: Thu, 25 Feb 2021 13:48:33 -0700 Subject: [PATCH 01/10] Fix bug when a semaphore is cloned more than once. --- src/AsyncProducers.cpp | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/AsyncProducers.cpp b/src/AsyncProducers.cpp index 40ae31c8021f..c064e7f2fac2 100644 --- a/src/AsyncProducers.cpp +++ b/src/AsyncProducers.cpp @@ -172,9 +172,9 @@ class GenerateProducerBody : public NoOpCollapsingMutator { } else { // This semaphore will end up on both sides of the fork, // so we'd better duplicate it. - string cloned_acquire = var->name + unique_name('_'); - cloned_acquires[var->name] = cloned_acquire; - return Acquire::make(Variable::make(type_of(), cloned_acquire), op->count, body); + vector &clones = cloned_acquires[var->name]; + clones.push_back(var->name + unique_name('_')); + return Acquire::make(Variable::make(type_of(), clones.back()), op->count, body); } } @@ -192,11 +192,11 @@ class GenerateProducerBody : public NoOpCollapsingMutator { return op; } - map &cloned_acquires; + map> &cloned_acquires; set inner_semaphores; public: - GenerateProducerBody(const string &f, const vector &s, map &a) + GenerateProducerBody(const string &f, const vector &s, map> &a) : func(f), sema(s), cloned_acquires(a) { } }; @@ -311,7 +311,7 @@ class ForkAsyncProducers : public IRMutator { const map &env; - map cloned_acquires; + map> cloned_acquires; Stmt visit(const Realize *op) override { auto it = env.find(op->name); @@ -354,10 +354,10 @@ class ForkAsyncProducers : public IRMutator { // If there's a nested async producer, we may have // recursively cloned this semaphore inside the mutation // of the producer and consumer. - auto it = cloned_acquires.find(sema_name); - if (it != cloned_acquires.end()) { - body = CloneAcquire(sema_name, it->second).mutate(body); - body = LetStmt::make(it->second, sema_space, body); + const vector &clones = cloned_acquires[sema_name]; + for (const auto &i : clones) { + body = CloneAcquire(sema_name, i).mutate(body); + body = LetStmt::make(i, sema_space, body); } body = LetStmt::make(sema_name, sema_space, body); From 84c1cdd7ec7d26fbc1314a646f50ec4dd23c4aa3 Mon Sep 17 00:00:00 2001 From: Dillon Sharlet Date: Thu, 25 Feb 2021 13:51:37 -0700 Subject: [PATCH 02/10] (Originally by abadams) Don't count unlikely loops as inner loops for likely_if_innermost. --- src/PartitionLoops.cpp | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/src/PartitionLoops.cpp b/src/PartitionLoops.cpp index 0a5381972000..c1e0d1fb7bfb 100644 --- a/src/PartitionLoops.cpp +++ b/src/PartitionLoops.cpp @@ -980,12 +980,27 @@ class CollapseSelects : public IRMutator { } }; -class ContainsLoop : public IRVisitor { +class ContainsHotLoop : public IRVisitor { using IRVisitor::visit; void visit(const For *op) override { result = true; } + void visit(const IfThenElse *op) override { + op->then_case.accept(this); + + // Don't count loops that appear in cold paths + const Call *c = op->condition.as(); + bool else_case_is_cold = + (c && + (c->is_intrinsic(Call::likely_if_innermost) || + c->is_intrinsic(Call::likely))); + if (op->else_case.defined() && + !else_case_is_cold) { + op->else_case.accept(this); + } + } + public: bool result = false; }; @@ -1009,7 +1024,7 @@ class LowerLikelyIfInnermost : public IRMutator { } Stmt visit(const For *op) override { - ContainsLoop c; + ContainsHotLoop c; op->body.accept(&c); inside_innermost_loop = !c.result; Stmt stmt = IRMutator::visit(op); From 01608a417ffeb5cab72e4d464bb57c0f38e2d421 Mon Sep 17 00:00:00 2001 From: Dillon Sharlet Date: Thu, 25 Feb 2021 13:52:10 -0700 Subject: [PATCH 03/10] Ignore promise_clamped when solving. --- src/Solve.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/Solve.cpp b/src/Solve.cpp index 553e3a91fa30..b5e381436e4b 100644 --- a/src/Solve.cpp +++ b/src/Solve.cpp @@ -417,9 +417,10 @@ class SolveExpression : public IRMutator { } Expr visit(const Call *op) override { - // Ignore likely intrinsics + // Ignore intrinsics that shouldn't affect the results. if (op->is_intrinsic(Call::likely) || - op->is_intrinsic(Call::likely_if_innermost)) { + op->is_intrinsic(Call::likely_if_innermost) || + op->is_intrinsic(Call::promise_clamped)) { return mutate(op->args[0]); } else { return IRMutator::visit(op); From 2b2470834ede2bc2b0b13ec7e6f6eea2e15dd880 Mon Sep 17 00:00:00 2001 From: Dillon Sharlet Date: Thu, 25 Feb 2021 13:52:32 -0700 Subject: [PATCH 04/10] Acquires are not no-ops. --- src/TrimNoOps.cpp | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/TrimNoOps.cpp b/src/TrimNoOps.cpp index 059a6236f35b..459ea62fbb90 100644 --- a/src/TrimNoOps.cpp +++ b/src/TrimNoOps.cpp @@ -163,6 +163,10 @@ class IsNoOp : public IRVisitor { IRVisitor::visit(op); } + void visit(const Acquire *op) override { + condition = const_false(); + } + template void visit_let(const LetOrLetStmt *op) { IRVisitor::visit(op); @@ -371,6 +375,8 @@ class TrimNoOps : public IRMutator { if (is_const_one(is_no_op.condition)) { // This loop is definitely useless + debug(3) << "Removed empty loop.\n" + << "Old: " << Stmt(op) << "\n"; return Evaluate::make(0); } else if (is_const_zero(is_no_op.condition)) { // This loop is definitely needed @@ -391,6 +397,8 @@ class TrimNoOps : public IRMutator { if (i.is_empty()) { // Empty loop + debug(3) << "Removed empty loop.\n" + << "Old: " << Stmt(op) << "\n"; return Evaluate::make(0); } From 9dba2fa05546b13782c3d4db10c340c12b22bb60 Mon Sep 17 00:00:00 2001 From: Dillon Sharlet Date: Thu, 25 Feb 2021 13:52:50 -0700 Subject: [PATCH 05/10] Fix test name --- src/UniquifyVariableNames.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/UniquifyVariableNames.cpp b/src/UniquifyVariableNames.cpp index 10483a823bcc..800fe4723a4f 100644 --- a/src/UniquifyVariableNames.cpp +++ b/src/UniquifyVariableNames.cpp @@ -243,7 +243,7 @@ void uniquify_variable_names_test() { {{x, Let::make(y.name(), 3, y)}, {x_1, Let::make(y.name(), 4, y)}}); - std::cout << "is_monotonic test passed" << std::endl; + std::cout << "uniquify_variable_names_test test passed" << std::endl; } } // namespace Internal From 44fa177b54cd5337cbbec62020a7c9407306abe8 Mon Sep 17 00:00:00 2001 From: Dillon Sharlet Date: Thu, 25 Feb 2021 13:53:29 -0700 Subject: [PATCH 06/10] Handle nested vectors in bounds_of_lanes and (by abadams) Handle LE/LT in bounds of lanes in vectorize --- src/VectorizeLoops.cpp | 79 ++++++++++++++++++++++++++++++------------ 1 file changed, 56 insertions(+), 23 deletions(-) diff --git a/src/VectorizeLoops.cpp b/src/VectorizeLoops.cpp index ac6ca2d495ec..b39acd343d5f 100644 --- a/src/VectorizeLoops.cpp +++ b/src/VectorizeLoops.cpp @@ -30,86 +30,87 @@ Expr get_lane(const Expr &e, int l) { /** Find the exact max and min lanes of a vector expression. Not * conservative like bounds_of_expr, but uses similar rules for some - * common node types where it can be exact. */ -Interval bounds_of_lanes(const Expr &e) { + * common node types where it can be exact. If e is a nested vector, + * the result will be the bounds of the vectors in each lane. */ +Interval bounds_of_nested_lanes(const Expr &e) { if (const Add *add = e.as()) { if (const Broadcast *b = add->b.as()) { - Interval ia = bounds_of_lanes(add->a); + Interval ia = bounds_of_nested_lanes(add->a); return {ia.min + b->value, ia.max + b->value}; } else if (const Broadcast *b = add->a.as()) { - Interval ia = bounds_of_lanes(add->b); + Interval ia = bounds_of_nested_lanes(add->b); return {b->value + ia.min, b->value + ia.max}; } } else if (const Sub *sub = e.as()) { if (const Broadcast *b = sub->b.as()) { - Interval ia = bounds_of_lanes(sub->a); + Interval ia = bounds_of_nested_lanes(sub->a); return {ia.min - b->value, ia.max - b->value}; } else if (const Broadcast *b = sub->a.as()) { - Interval ia = bounds_of_lanes(sub->b); + Interval ia = bounds_of_nested_lanes(sub->b); return {b->value - ia.max, b->value - ia.max}; } } else if (const Mul *mul = e.as()) { if (const Broadcast *b = mul->b.as()) { if (is_positive_const(b->value)) { - Interval ia = bounds_of_lanes(mul->a); + Interval ia = bounds_of_nested_lanes(mul->a); return {ia.min * b->value, ia.max * b->value}; } else if (is_negative_const(b->value)) { - Interval ia = bounds_of_lanes(mul->a); + Interval ia = bounds_of_nested_lanes(mul->a); return {ia.max * b->value, ia.min * b->value}; } } else if (const Broadcast *b = mul->a.as()) { if (is_positive_const(b->value)) { - Interval ia = bounds_of_lanes(mul->b); + Interval ia = bounds_of_nested_lanes(mul->b); return {b->value * ia.min, b->value * ia.max}; } else if (is_negative_const(b->value)) { - Interval ia = bounds_of_lanes(mul->b); + Interval ia = bounds_of_nested_lanes(mul->b); return {b->value * ia.max, b->value * ia.min}; } } } else if (const Div *div = e.as
()) { if (const Broadcast *b = div->b.as()) { if (is_positive_const(b->value)) { - Interval ia = bounds_of_lanes(div->a); + Interval ia = bounds_of_nested_lanes(div->a); return {ia.min / b->value, ia.max / b->value}; } else if (is_negative_const(b->value)) { - Interval ia = bounds_of_lanes(div->a); + Interval ia = bounds_of_nested_lanes(div->a); return {ia.max / b->value, ia.min / b->value}; } } } else if (const And *and_ = e.as()) { if (const Broadcast *b = and_->b.as()) { - Interval ia = bounds_of_lanes(and_->a); + Interval ia = bounds_of_nested_lanes(and_->a); return {ia.min && b->value, ia.max && b->value}; } else if (const Broadcast *b = and_->a.as()) { - Interval ia = bounds_of_lanes(and_->b); + Interval ia = bounds_of_nested_lanes(and_->b); return {ia.min && b->value, ia.max && b->value}; } } else if (const Or *or_ = e.as()) { if (const Broadcast *b = or_->b.as()) { - Interval ia = bounds_of_lanes(or_->a); + Interval ia = bounds_of_nested_lanes(or_->a); return {ia.min && b->value, ia.max && b->value}; } else if (const Broadcast *b = or_->a.as()) { - Interval ia = bounds_of_lanes(or_->b); + Interval ia = bounds_of_nested_lanes(or_->b); return {ia.min && b->value, ia.max && b->value}; } } else if (const Min *min = e.as()) { if (const Broadcast *b = min->b.as()) { - Interval ia = bounds_of_lanes(min->a); + Interval ia = bounds_of_nested_lanes(min->a); return {Min::make(ia.min, b->value), Min::make(ia.max, b->value)}; } else if (const Broadcast *b = min->a.as()) { - Interval ia = bounds_of_lanes(min->b); + Interval ia = bounds_of_nested_lanes(min->b); return {Min::make(ia.min, b->value), Min::make(ia.max, b->value)}; } } else if (const Max *max = e.as()) { if (const Broadcast *b = max->b.as()) { - Interval ia = bounds_of_lanes(max->a); + Interval ia = bounds_of_nested_lanes(max->a); return {Max::make(ia.min, b->value), Max::make(ia.max, b->value)}; } else if (const Broadcast *b = max->a.as()) { - Interval ia = bounds_of_lanes(max->b); + Interval ia = bounds_of_nested_lanes(max->b); return {Max::make(ia.min, b->value), Max::make(ia.max, b->value)}; } } else if (const Not *not_ = e.as()) { - Interval ia = bounds_of_lanes(not_->a); + Interval ia = bounds_of_nested_lanes(not_->a); return {!ia.max, !ia.min}; } else if (const Ramp *r = e.as()) { Expr last_lane_idx = make_const(r->base.type(), r->lanes - 1); @@ -118,11 +119,30 @@ Interval bounds_of_lanes(const Expr &e) { } else if (is_negative_const(r->stride)) { return {r->base + last_lane_idx * r->stride, r->base}; } + } else if (const LE *le = e.as()) { + // The least true this can be is if we maximize the LHS and minimize the RHS + // The most true this can be is if we minimize the LHS and maximize the RHS + // This is only exact if one of the two sides is a Broadcast + Interval ia = bounds_of_nested_lanes(le->a); + Interval ib = bounds_of_nested_lanes(le->b); + if (ia.is_single_point() || ib.is_single_point()) { + return {ia.max <= ib.min, ia.min <= ib.max}; + } + } else if (const LT *lt = e.as()) { + // The least true this can be is if we maximize the LHS and minimize the RHS + // The most true this can be is if we minimize the LHS and maximize the RHS + // This is only exact if one of the two sides is a Broadcast + Interval ia = bounds_of_nested_lanes(lt->a); + Interval ib = bounds_of_nested_lanes(lt->b); + if (ia.is_single_point() || ib.is_single_point()) { + return {ia.max < ib.min, ia.min < ib.max}; + } + } else if (const Broadcast *b = e.as()) { return {b->value, b->value}; } else if (const Let *let = e.as()) { - Interval ia = bounds_of_lanes(let->value); - Interval ib = bounds_of_lanes(let->body); + Interval ia = bounds_of_nested_lanes(let->value); + Interval ib = bounds_of_nested_lanes(let->body); if (expr_uses_var(ib.min, let->name)) { ib.min = Let::make(let->name, let->value, ib.min); } @@ -145,6 +165,19 @@ Interval bounds_of_lanes(const Expr &e) { } }; +/** Similar to bounds_of_nested_lanes, but it recursively reduces + * the bounds of nested vectors to scalars. */ +Interval bounds_of_lanes(const Expr &e) { + Interval bounds = bounds_of_nested_lanes(e); + if (!bounds.min.type().is_scalar()) { + bounds.min = bounds_of_nested_lanes(bounds.min).min; + } + if (!bounds.max.type().is_scalar()) { + bounds.max = bounds_of_nested_lanes(bounds.max).max; + } + return bounds; +} + // A ramp with the lanes repeated inner_repetitions times, and then // the whole vector repeated outer_repetitions times. // E.g: <0 0 2 2 4 4 6 6 0 0 2 2 4 4 6 6>. From fb5460434213a8c37074760297ebb6a69d47d84d Mon Sep 17 00:00:00 2001 From: Dillon Sharlet Date: Fri, 26 Feb 2021 16:47:19 -0700 Subject: [PATCH 07/10] Fix test name. --- src/UniquifyVariableNames.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/UniquifyVariableNames.cpp b/src/UniquifyVariableNames.cpp index 800fe4723a4f..34ebe72603ef 100644 --- a/src/UniquifyVariableNames.cpp +++ b/src/UniquifyVariableNames.cpp @@ -243,7 +243,7 @@ void uniquify_variable_names_test() { {{x, Let::make(y.name(), 3, y)}, {x_1, Let::make(y.name(), 4, y)}}); - std::cout << "uniquify_variable_names_test test passed" << std::endl; + std::cout << "uniquify_variable_names test passed" << std::endl; } } // namespace Internal From 5e4559a99c8dc241d8e9abd4c74b71a559021ae9 Mon Sep 17 00:00:00 2001 From: Dillon Sharlet Date: Fri, 26 Feb 2021 16:47:26 -0700 Subject: [PATCH 08/10] Allow any level of nested vectorization. --- src/VectorizeLoops.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/VectorizeLoops.cpp b/src/VectorizeLoops.cpp index b39acd343d5f..05dfb838ba49 100644 --- a/src/VectorizeLoops.cpp +++ b/src/VectorizeLoops.cpp @@ -170,10 +170,10 @@ Interval bounds_of_nested_lanes(const Expr &e) { Interval bounds_of_lanes(const Expr &e) { Interval bounds = bounds_of_nested_lanes(e); if (!bounds.min.type().is_scalar()) { - bounds.min = bounds_of_nested_lanes(bounds.min).min; + bounds.min = bounds_of_lanes(bounds.min).min; } if (!bounds.max.type().is_scalar()) { - bounds.max = bounds_of_nested_lanes(bounds.max).max; + bounds.max = bounds_of_lanes(bounds.max).max; } return bounds; } From 37f601e88e4c2aa5880ccde7643d07f0e3e92e87 Mon Sep 17 00:00:00 2001 From: Steven Johnson Date: Sun, 28 Feb 2021 09:50:47 -0800 Subject: [PATCH 09/10] trigger buildbots From ca5b83a943951f02228ffed81d4cf3abf4625e8b Mon Sep 17 00:00:00 2001 From: Dillon Sharlet Date: Mon, 1 Mar 2021 13:15:43 -0700 Subject: [PATCH 10/10] Grammar --- src/VectorizeLoops.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/VectorizeLoops.cpp b/src/VectorizeLoops.cpp index 05dfb838ba49..e31126b4949e 100644 --- a/src/VectorizeLoops.cpp +++ b/src/VectorizeLoops.cpp @@ -120,18 +120,18 @@ Interval bounds_of_nested_lanes(const Expr &e) { return {r->base + last_lane_idx * r->stride, r->base}; } } else if (const LE *le = e.as()) { - // The least true this can be is if we maximize the LHS and minimize the RHS - // The most true this can be is if we minimize the LHS and maximize the RHS - // This is only exact if one of the two sides is a Broadcast + // The least true this can be is if we maximize the LHS and minimize the RHS. + // The most true this can be is if we minimize the LHS and maximize the RHS. + // This is only exact if one of the two sides is a Broadcast. Interval ia = bounds_of_nested_lanes(le->a); Interval ib = bounds_of_nested_lanes(le->b); if (ia.is_single_point() || ib.is_single_point()) { return {ia.max <= ib.min, ia.min <= ib.max}; } } else if (const LT *lt = e.as()) { - // The least true this can be is if we maximize the LHS and minimize the RHS - // The most true this can be is if we minimize the LHS and maximize the RHS - // This is only exact if one of the two sides is a Broadcast + // The least true this can be is if we maximize the LHS and minimize the RHS. + // The most true this can be is if we minimize the LHS and maximize the RHS. + // This is only exact if one of the two sides is a Broadcast. Interval ia = bounds_of_nested_lanes(lt->a); Interval ib = bounds_of_nested_lanes(lt->b); if (ia.is_single_point() || ib.is_single_point()) {