diff --git a/python_bindings/apps/interpolate_generator.py b/python_bindings/apps/interpolate_generator.py index c4d9e747c8d1..6797219b8cb2 100644 --- a/python_bindings/apps/interpolate_generator.py +++ b/python_bindings/apps/interpolate_generator.py @@ -89,7 +89,7 @@ def generate(self): pass elif g.target().has_gpu_feature(): # 0.86ms on a 2060 RTX - yo, yi, xo, xi, ci, xii, yii = hl.vars("yo yi xo xi ci xii yii") + yo, yi, xo, xi, xii, yii = hl.vars("yo yi xo xi xii yii") ( g.output_buf.bound(x, 0, g.input_buf.width()) diff --git a/python_bindings/test/correctness/basics.py b/python_bindings/test/correctness/basics.py index d8c88c839848..03d9d86220bb 100644 --- a/python_bindings/test/correctness/basics.py +++ b/python_bindings/test/correctness/basics.py @@ -286,8 +286,8 @@ def test_int_promotion(): def test_vector_tile(): # Test Func.tile() and Stage.tile() with vector arguments x, y, z = [hl.Var(c) for c in "xyz"] - xi, yi, zi = [hl.Var(c + "i") for c in "xyz"] - xo, yo, zo = [hl.Var(c + "o") for c in "xyz"] + xi, yi = hl.vars("xi yi") + xo, yo = hl.vars("xo yo") f = hl.Func("f") g = hl.Func("g") hl.Func("h") diff --git a/src/ImageParam.cpp b/src/ImageParam.cpp index ee321a7c3b8a..7dce672e14e4 100644 --- a/src/ImageParam.cpp +++ b/src/ImageParam.cpp @@ -85,13 +85,11 @@ Func ImageParam::in() { } void ImageParam::trace_loads() { - internal_assert(func.defined()); - func.trace_loads(); + param.trace_loads(); } ImageParam &ImageParam::add_trace_tag(const std::string &trace_tag) { - internal_assert(func.defined()); - func.add_trace_tag(trace_tag); + param.add_trace_tag(trace_tag); return *this; } diff --git a/src/Parameter.cpp b/src/Parameter.cpp index 59416f859050..722929185e67 100644 --- a/src/Parameter.cpp +++ b/src/Parameter.cpp @@ -21,6 +21,8 @@ struct ParameterContents { Expr scalar_default, scalar_min, scalar_max, scalar_estimate; const bool is_buffer; MemoryType memory_type = MemoryType::Auto; + bool trace_loads{false}; + std::vector trace_tags; ParameterContents(Type t, bool b, int d, const std::string &n) : type(t), dimensions(d), name(n), buffer(Buffer<>()), @@ -494,6 +496,22 @@ MemoryType Parameter::memory_type() const { return contents->memory_type; } +void Parameter::trace_loads() { + contents->trace_loads = true; +} + +bool Parameter::is_tracing_loads() const { + return contents->trace_loads; +} + +void Parameter::add_trace_tag(const std::string &trace_tag) { + contents->trace_tags.push_back(trace_tag); +} + +std::vector Parameter::get_trace_tags() const { + return contents->trace_tags; +} + namespace Internal { void check_call_arg_types(const std::string &name, std::vector *args, int dims) { diff --git a/src/Parameter.h b/src/Parameter.h index 09840683980c..56a441f5ba35 100644 --- a/src/Parameter.h +++ b/src/Parameter.h @@ -219,6 +219,12 @@ class Parameter { void store_in(MemoryType memory_type); MemoryType memory_type() const; + + void trace_loads(); + bool is_tracing_loads() const; + + void add_trace_tag(const std::string &trace_tag); + std::vector get_trace_tags() const; }; namespace Internal { diff --git a/src/Tracing.cpp b/src/Tracing.cpp index 2b509e0e55d0..f44b1f1b11c4 100644 --- a/src/Tracing.cpp +++ b/src/Tracing.cpp @@ -6,14 +6,14 @@ #include "RealizationOrder.h" #include "runtime/HalideRuntime.h" -#include +#include +#include namespace Halide { namespace Internal { using std::map; using std::pair; -using std::set; using std::string; using std::vector; @@ -25,7 +25,7 @@ struct TraceEventBuilder { vector value; vector coordinates; Type type; - enum halide_trace_event_code_t event; + halide_trace_event_code_t event; Expr parent_id, value_index; Expr build() { @@ -57,10 +57,11 @@ class InjectTracing : public IRMutator { const bool trace_all_loads, trace_all_stores, trace_all_realizations; // We want to preserve the order, so use a vector rather than a map vector>> trace_tags; - set trace_tags_added; + std::unordered_set trace_tags_added; // The funcs that will have any tracing info emitted (not just trace tags), // and the Type(s) of their elements. map> funcs_touched; + map> images_touched; InjectTracing(const map &e, const Target &t) : env(e), @@ -79,12 +80,12 @@ class InjectTracing : public IRMutator { } } - void add_func_touched(const string &name, int value_index, const Type &type) { - auto it = funcs_touched.find(name); - if (it == funcs_touched.end()) { + void touch(map> &touched, const string &name, int value_index, const Type &type) { + auto it = touched.find(name); + if (it == touched.end()) { vector types(value_index + 1); types[value_index] = type; - funcs_touched[name] = types; + touched[name] = types; } else { // If the type already present is missing, or "handle0" (aka "we don't know yet", // replace it with the given type. Otherwise, assert the types match. @@ -114,36 +115,26 @@ class InjectTracing : public IRMutator { Function f = it->second; internal_assert(!f.can_be_inlined() || !f.schedule().compute_level().is_inlined()); - trace_it = f.is_tracing_loads() || trace_all_loads; + trace_it = trace_all_loads || f.is_tracing_loads(); trace_parent = Variable::make(Int(32), op->name + ".trace_id"); if (trace_it) { add_trace_tags(op->name, f.get_trace_tags()); + touch(funcs_touched, op->name, op->value_index, op->type); } } else if (op->call_type == Call::Image) { - trace_it = trace_all_loads; - // If there is a Function in the env named "name_im", assume that - // this image is an ImageParam, so sniff that Function to see - // if we want to trace loads on it. (This allows us to trace - // loads on inputs without having to enable them globally.) - auto it = env.find(op->name + "_im"); - if (it != env.end()) { - Function f = it->second; - // f could be scheduled and have actual loads from it (via ImageParam::in), - // so only honor trace the loads if it is inlined. - if ((f.is_tracing_loads() || trace_all_loads) && - f.can_be_inlined() && - f.schedule().compute_level().is_inlined()) { - trace_it = true; - add_trace_tags(op->name, f.get_trace_tags()); + // op->param is defined when we're loading from an ImageParam, and undefined + // when we're loading from an inlined Buffer. + trace_it = trace_all_loads || (op->param.defined() && op->param.is_tracing_loads()); + trace_parent = Variable::make(Int(32), "pipeline.trace_id"); + if (trace_it) { + if (op->param.defined()) { + add_trace_tags(op->name, op->param.get_trace_tags()); } + touch(images_touched, op->name, op->value_index, op->type); } - - trace_parent = Variable::make(Int(32), "pipeline.trace_id"); } if (trace_it) { - add_func_touched(op->name, op->value_index, op->type); - string value_var_name = unique_name('t'); Expr value_var = Variable::make(op->type, value_var_name); @@ -189,7 +180,7 @@ class InjectTracing : public IRMutator { builder.parent_id = Variable::make(Int(32), op->name + ".trace_id"); for (size_t i = 0; i < values.size(); i++) { Type t = values[i].type(); - add_func_touched(f.name(), (int)i, t); + touch(funcs_touched, f.name(), (int)i, t); string value_var_name = unique_name('t'); Expr value_var = Variable::make(t, value_var_name); @@ -241,7 +232,7 @@ class InjectTracing : public IRMutator { if (f.is_tracing_realizations() || trace_all_realizations) { add_trace_tags(op->name, f.get_trace_tags()); for (size_t i = 0; i < op->types.size(); i++) { - add_func_touched(op->name, i, op->types[i]); + touch(funcs_touched, op->name, i, op->types[i]); } // Throw a tracing call before and after the realize body @@ -381,12 +372,13 @@ Stmt inject_tracing(Stmt s, const string &pipeline_name, bool trace_pipeline, // For a given realization/input/output, we output them in the order // we encounter them (which is to say, the order they were added); however, // we don't attempt to preserve a particular order between functions. - for (const auto &trace_tags : tracing.trace_tags) { + builder.event = halide_trace_tag; + + for (const auto &[func, tags] : tracing.trace_tags) { // builder.parent_id is already set correctly - builder.func = trace_tags.first; // func name - builder.event = halide_trace_tag; + builder.func = func; // We must reverse-iterate to preserve order - for (const auto &tag : reverse_view(trace_tags.second)) { + for (const auto &tag : reverse_view(tags)) { user_assert(tag.find('\0') == string::npos) << "add_trace_tag() may not contain the null character."; builder.trace_tag_expr = Expr(tag); @@ -394,74 +386,80 @@ Stmt inject_tracing(Stmt s, const string &pipeline_name, bool trace_pipeline, } } - builder.event = halide_trace_tag; - - vector order = topological_order(outputs, env); - // Compute boxes_touched and send a func_type_and_dim trace-tag for // everything that we actually touched, in topological order. // We include the type(s) of each Func (could be multiple for Tuple-valued - // Funcs), and the dimensions and guess-at-ranges-rouched. Note that the + // Funcs), and the dimensions and guess-at-ranges-touched. Note that the // dimensions should be exact, but the ranges-touched is a conservative estimate; // that's ok, as we just want to send these as rough guesses for a tracing tool to use for // automatic layout. (Note that we deliberately send these // before any user-specified trace-tags.) - Expr space = Expr(" "); - + vector order = topological_order(outputs, env); std::map bt = boxes_touched(s); - for (const auto &o : reverse_view(order)) { - auto p = tracing.funcs_touched.find(o); - if (p == tracing.funcs_touched.end() && ends_with(o, "_im")) { - p = tracing.funcs_touched.find(o.substr(0, o.size() - 3)); - } - if (p == tracing.funcs_touched.end()) { - continue; - } - const string &func_name = p->first; - const vector &func_types = p->second; - builder.func = func_name; - - vector strings; - strings.emplace_back("func_type_and_dim:"); - strings.push_back(space); - strings.emplace_back((int)func_types.size()); - for (const auto &func_type : func_types) { - strings.push_back(space); - strings.emplace_back((int)func_type.code()); - strings.push_back(space); - strings.emplace_back(func_type.bits()); + + const auto add_func_type_and_dim = + [&builder, &bt]( + Stmt s, + const string &func_name, + const vector &func_types // + ) { + Expr space = Expr(" "); + + vector strings; + strings.emplace_back("func_type_and_dim:"); strings.push_back(space); - strings.emplace_back(func_type.lanes()); - } - auto it = bt.find(func_name); - internal_assert(it != bt.end()); - const Box &box = it->second; - strings.push_back(space); - strings.emplace_back((int)box.bounds.size()); - for (const Interval &i : box.bounds) { - internal_assert(i.min.defined() && i.max.defined()); - if (i.is_bounded()) { - strings.push_back(space); - strings.push_back(i.min); + strings.emplace_back((int)func_types.size()); + for (const auto &func_type : func_types) { strings.push_back(space); - // Emit as (min, extent) rather than (min, max) - strings.push_back(i.max - i.min + Expr(1)); - } else { - // This should really only happen for unusual cases - // that we won't end up realizing, so we can just - // use any numeric values. + strings.emplace_back((int)func_type.code()); strings.push_back(space); - strings.emplace_back(0); + strings.emplace_back(func_type.bits()); strings.push_back(space); - strings.emplace_back(0); + strings.emplace_back(func_type.lanes()); } + auto it = bt.find(func_name); + internal_assert(it != bt.end()); + const Box &box = it->second; + strings.push_back(space); + strings.emplace_back((int)box.bounds.size()); + for (const Interval &i : box.bounds) { + internal_assert(i.min.defined() && i.max.defined()); + if (i.is_bounded()) { + strings.push_back(space); + strings.push_back(i.min); + strings.push_back(space); + // Emit as (min, extent) rather than (min, max) + strings.push_back(i.max - i.min + Expr(1)); + } else { + // This should really only happen for unusual cases + // that we won't end up realizing, so we can just + // use any numeric values. + strings.push_back(space); + strings.emplace_back(0); + strings.push_back(space); + strings.emplace_back(0); + } + } + + builder.func = func_name; + builder.trace_tag_expr = + Call::make(type_of(), Call::stringify, strings, Call::PureIntrinsic); + return Block::make(Evaluate::make(builder.build()), std::move(s)); + }; + + for (const auto &o : reverse_view(order)) { + if (auto p = tracing.funcs_touched.find(o); p != tracing.funcs_touched.end()) { + const auto &[func_name, func_types] = *p; + s = add_func_type_and_dim(s, func_name, func_types); } - builder.trace_tag_expr = - Internal::Call::make(type_of(), - Internal::Call::stringify, - strings, - Internal::Call::PureIntrinsic); - s = Block::make(Evaluate::make(builder.build()), s); + } + + // Add the ImageParams outermost (these aren't in env, so won't be in the + // topological order computed above). Fortunately, as params, they all have + // in-degree zero, so they can come first in the list in any order with respect + // to one another. + for (const auto &[im_name, im_types] : tracing.images_touched) { + s = add_func_type_and_dim(s, im_name, im_types); } s = LetStmt::make("pipeline.trace_id", pipeline_start, s); diff --git a/test/correctness/tracing.cpp b/test/correctness/tracing.cpp index 7c4f0d8e5fca..27fa4a3f6e4f 100644 --- a/test/correctness/tracing.cpp +++ b/test/correctness/tracing.cpp @@ -227,8 +227,8 @@ int main(int argc, char **argv) { // The golden trace, recorded when this test was written event correct_trace[] = { {102, 0, 8, 3, 0, 0, 0, 0, {0, 0, 0, 0}, {0.000000f, 0.000000f, 0.000000f, 0.000000f}, ""}, - {103, 1, 10, 3, 0, 0, 0, 0, {0, 0, 0, 0}, {0.000000f, 0.000000f, 0.000000f, 0.000000f}, "func_type_and_dim: 2 2 32 1 2 32 1 1 0 11"}, {105, 1, 10, 3, 0, 0, 0, 0, {0, 0, 0, 0}, {0.000000f, 0.000000f, 0.000000f, 0.000000f}, "func_type_and_dim: 1 2 32 1 1 0 10"}, + {103, 1, 10, 3, 0, 0, 0, 0, {0, 0, 0, 0}, {0.000000f, 0.000000f, 0.000000f, 0.000000f}, "func_type_and_dim: 2 2 32 1 2 32 1 1 0 11"}, {102, 1, 10, 3, 0, 0, 0, 0, {0, 0, 0, 0}, {0.000000f, 0.000000f, 0.000000f, 0.000000f}, "func_type_and_dim: 1 2 32 1 1 0 10"}, {102, 1, 10, 3, 0, 0, 0, 0, {0, 0, 0, 0}, {0.000000f, 0.000000f, 0.000000f, 0.000000f}, "arbitrary data on f"}, {102, 1, 10, 3, 0, 0, 0, 0, {0, 0, 0, 0}, {0.000000f, 0.000000f, 0.000000f, 0.000000f}, "more:arbitrary \xff data on f?"},