From 8b74f72f6af35582f91887ac4090457e57d1e9aa Mon Sep 17 00:00:00 2001 From: Dillon Sharlet Date: Fri, 11 Dec 2020 10:48:04 -0700 Subject: [PATCH 1/3] Hide inaccessible symbols in internal linkage. --- src/AddImageChecks.cpp | 4 + src/AddParameterChecks.cpp | 4 + src/AllocationBoundsInference.cpp | 4 + src/AsyncProducers.cpp | 4 + src/BoundSmallAllocations.cpp | 4 + src/Bounds.cpp | 8 + src/BoundsInference.cpp | 3 +- src/CodeGen_C.cpp | 3 +- src/CodeGen_D3D12Compute_Dev.cpp | 94 +++++++ src/CodeGen_D3D12Compute_Dev.h | 91 +------ src/CodeGen_GPU_Host.cpp | 10 +- src/CodeGen_Hexagon.cpp | 3 + src/CodeGen_Metal_Dev.cpp | 89 +++++++ src/CodeGen_Metal_Dev.h | 86 +------ src/CodeGen_OpenCL_Dev.cpp | 91 +++++++ src/CodeGen_OpenCL_Dev.h | 88 +------ src/CodeGen_OpenGLCompute_Dev.cpp | 69 +++++ src/CodeGen_OpenGLCompute_Dev.h | 68 +---- src/CodeGen_PTX_Dev.cpp | 80 ++++++ src/CodeGen_PTX_Dev.h | 82 +----- src/DebugToFile.cpp | 4 + src/DerivativeUtils.cpp | 28 +++ src/EliminateBoolVectors.cpp | 4 + src/FastIntegerDivide.cpp | 16 +- src/FastIntegerDivide.h | 13 - src/Function.cpp | 6 +- src/FuseGPUThreadLoops.cpp | 8 +- src/IRMatch.cpp | 4 + src/InjectOpenGLIntrinsics.cpp | 4 + src/InlineReductions.cpp | 3 + src/Introspection.cpp | 6 +- src/LICM.cpp | 4 + src/LLVM_Output.cpp | 4 + src/LLVM_Runtime_Linker.cpp | 4 - src/Memoization.cpp | 8 +- src/ModulusRemainder.cpp | 402 +++++++++++++++--------------- src/Monotonic.cpp | 4 + src/PrintLoopNest.cpp | 4 + src/Profiling.cpp | 4 + src/PurifyIndexMath.cpp | 4 + src/PythonExtensionGen.cpp | 10 +- src/Qualify.cpp | 4 + src/Random.cpp | 4 + src/RemoveDeadAllocations.cpp | 4 + src/RemoveExternLoops.cpp | 4 + src/RemoveUndef.cpp | 4 + src/ScheduleFunctions.cpp | 5 +- src/ScheduleFunctions.h | 4 +- src/SelectGPUAPI.cpp | 4 + src/SkipStages.cpp | 4 +- src/SlidingWindow.cpp | 4 +- src/StorageFolding.cpp | 4 +- src/StrictifyFloat.cpp | 4 + src/Substitute.cpp | 16 ++ src/Tracing.cpp | 4 + src/UnifyDuplicateLets.cpp | 4 + src/UnrollLoops.cpp | 4 + src/VaryingAttributes.cpp | 22 +- 58 files changed, 878 insertions(+), 650 deletions(-) diff --git a/src/AddImageChecks.cpp b/src/AddImageChecks.cpp index a2345339b645..b98fd62ff86d 100644 --- a/src/AddImageChecks.cpp +++ b/src/AddImageChecks.cpp @@ -16,6 +16,8 @@ using std::pair; using std::string; using std::vector; +namespace { + /* Find all the externally referenced buffers in a stmt */ class FindBuffers : public IRGraphVisitor { public: @@ -710,6 +712,8 @@ Stmt add_image_checks_inner(Stmt s, return s; } +} // namespace + // The following function repeats the arguments list it just passes // through six times. Surely there is a better way? Stmt add_image_checks(const Stmt &s, diff --git a/src/AddParameterChecks.cpp b/src/AddParameterChecks.cpp index 8706485430c0..e1024dd5caa8 100644 --- a/src/AddParameterChecks.cpp +++ b/src/AddParameterChecks.cpp @@ -12,6 +12,8 @@ using std::pair; using std::string; using std::vector; +namespace { + // Find all the externally referenced scalar parameters class FindParameters : public IRGraphVisitor { public: @@ -26,6 +28,8 @@ class FindParameters : public IRGraphVisitor { } }; +} // namespace + // Insert checks to make sure that parameters are within their // declared range. Stmt add_parameter_checks(const vector &preconditions, Stmt s, const Target &t) { diff --git a/src/AllocationBoundsInference.cpp b/src/AllocationBoundsInference.cpp index 3a3874f1a35e..076cb74f86af 100644 --- a/src/AllocationBoundsInference.cpp +++ b/src/AllocationBoundsInference.cpp @@ -14,6 +14,8 @@ using std::set; using std::string; using std::vector; +namespace { + // Figure out the region touched of each buffer, and deposit them as // let statements outside of each realize node, or at the top level if // they're not internal allocations. @@ -154,6 +156,8 @@ class StripDeclareBoxTouched : public IRMutator { } }; +} // namespace + Stmt allocation_bounds_inference(Stmt s, const map &env, const FuncValueBounds &fb) { diff --git a/src/AsyncProducers.cpp b/src/AsyncProducers.cpp index ff7356409c58..40ae31c8021f 100644 --- a/src/AsyncProducers.cpp +++ b/src/AsyncProducers.cpp @@ -14,6 +14,8 @@ using std::set; using std::string; using std::vector; +namespace { + /** A mutator which eagerly folds no-op stmts */ class NoOpCollapsingMutator : public IRMutator { protected: @@ -651,6 +653,8 @@ class TightenForkNodes : public IRMutator { // TODO: merge semaphores? +} // namespace + Stmt fork_async_producers(Stmt s, const map &env) { s = TightenProducerConsumerNodes(env).mutate(s); s = ForkAsyncProducers(env).mutate(s); diff --git a/src/BoundSmallAllocations.cpp b/src/BoundSmallAllocations.cpp index b2ad80d78de5..6dff63c25d3a 100644 --- a/src/BoundSmallAllocations.cpp +++ b/src/BoundSmallAllocations.cpp @@ -8,6 +8,8 @@ namespace Halide { namespace Internal { +namespace { + // Find a constant upper bound on the size of each thread-local allocation class BoundSmallAllocations : public IRMutator { using IRMutator::visit; @@ -147,6 +149,8 @@ class BoundSmallAllocations : public IRMutator { } }; +} // namespace + Stmt bound_small_allocations(const Stmt &s) { return BoundSmallAllocations().mutate(s); } diff --git a/src/Bounds.cpp b/src/Bounds.cpp index ba1a51d3c23b..3749f5006c59 100644 --- a/src/Bounds.cpp +++ b/src/Bounds.cpp @@ -98,6 +98,8 @@ std::ostream &operator<<(std::ostream &stream, const Box &b) { return stream; } +namespace { + class Bounds : public IRVisitor { public: Interval interval; @@ -1577,6 +1579,8 @@ class Bounds : public IRVisitor { } }; +} // namespace + Interval bounds_of_expr_in_scope(const Expr &expr, const Scope &scope, const FuncValueBounds &fb, bool const_bound) { //debug(3) << "computing bounds_of_expr_in_scope " << expr << "\n"; Bounds b(&scope, fb, const_bound); @@ -1782,6 +1786,8 @@ bool box_contains(const Box &outer, const Box &inner) { return can_prove(condition); } +namespace { + class FindInnermostVar : public IRVisitor { public: const Scope &vars_depth; @@ -2630,6 +2636,8 @@ class BoxesTouched : public IRGraphVisitor { } }; +} // namespace + map boxes_touched(const Expr &e, Stmt s, bool consider_calls, bool consider_provides, const string &fn, const Scope &scope, const FuncValueBounds &fb) { if (!fn.empty() && s.defined()) { diff --git a/src/BoundsInference.cpp b/src/BoundsInference.cpp index 414c4e2ada9a..71ef6ef5221d 100644 --- a/src/BoundsInference.cpp +++ b/src/BoundsInference.cpp @@ -198,7 +198,6 @@ bool is_fused_with_others(const vector> &fused_groups, } return false; } -} // namespace class BoundsInference : public IRMutator { public: @@ -1255,6 +1254,8 @@ class BoundsInference : public IRMutator { } }; +} // namespace + Stmt bounds_inference(Stmt s, const vector &outputs, const vector &order, diff --git a/src/CodeGen_C.cpp b/src/CodeGen_C.cpp index 77a294abdb4f..809556873fa0 100644 --- a/src/CodeGen_C.cpp +++ b/src/CodeGen_C.cpp @@ -220,7 +220,6 @@ class HalideFreeHelper { }; } // namespace )INLINE_CODE"; -} // namespace class TypeInfoGatherer : public IRGraphVisitor { private: @@ -311,6 +310,8 @@ class TypeInfoGatherer : public IRGraphVisitor { std::set vector_types_used; }; +} // namespace + CodeGen_C::CodeGen_C(ostream &s, Target t, OutputKind output_kind, const std::string &guard) : IRPrinter(s), id("$$ BAD ID $$"), target(t), output_kind(output_kind), extern_c_open(false), inside_atomic_mutex_node(false), emit_atomic_stores(false), using_vector_typedefs(false) { diff --git a/src/CodeGen_D3D12Compute_Dev.cpp b/src/CodeGen_D3D12Compute_Dev.cpp index 10675ad546d1..3335bf0ca482 100644 --- a/src/CodeGen_D3D12Compute_Dev.cpp +++ b/src/CodeGen_D3D12Compute_Dev.cpp @@ -3,7 +3,9 @@ #include #include +#include "CodeGen_C.h" #include "CodeGen_D3D12Compute_Dev.h" +#include "CodeGen_GPU_Dev.h" #include "CodeGen_Internal.h" #include "Debug.h" #include "DeviceArgument.h" @@ -23,6 +25,92 @@ using std::vector; static ostringstream nil; +namespace { + +class CodeGen_D3D12Compute_Dev : public CodeGen_GPU_Dev { +public: + CodeGen_D3D12Compute_Dev(Target target); + + /** Compile a GPU kernel into the module. This may be called many times + * with different kernels, which will all be accumulated into a single + * source module shared by a given Halide pipeline. */ + void add_kernel(Stmt stmt, + const std::string &name, + const std::vector &args) override; + + /** (Re)initialize the GPU kernel module. This is separate from compile, + * since a GPU device module will often have many kernels compiled into it + * for a single pipeline. */ + void init_module() override; + + std::vector compile_to_src() override; + + std::string get_current_kernel_name() override; + + void dump() override; + + std::string print_gpu_name(const std::string &name) override; + + std::string api_unique_name() override { + return "d3d12compute"; + } + +protected: + friend struct StoragePackUnpack; + + class CodeGen_D3D12Compute_C : public CodeGen_C { + public: + CodeGen_D3D12Compute_C(std::ostream &s, Target t) + : CodeGen_C(s, t) { + integer_suffix_style = IntegerSuffixStyle::HLSL; + } + void add_kernel(Stmt stmt, + const std::string &name, + const std::vector &args); + + protected: + friend struct StoragePackUnpack; + + std::string print_type(Type type, AppendSpaceIfNeeded space_option = DoNotAppendSpace) override; + std::string print_storage_type(Type type); + std::string print_type_maybe_storage(Type type, bool storage, AppendSpaceIfNeeded space); + std::string print_reinterpret(Type type, const Expr &e) override; + std::string print_extern_call(const Call *op) override; + + std::string print_vanilla_cast(Type type, const std::string &value_expr); + std::string print_reinforced_cast(Type type, const std::string &value_expr); + std::string print_cast(Type target_type, Type source_type, const std::string &value_expr); + std::string print_reinterpret_cast(Type type, const std::string &value_expr); + + std::string print_assignment(Type t, const std::string &rhs) override; + + using CodeGen_C::visit; + void visit(const Evaluate *op) override; + void visit(const Min *) override; + void visit(const Max *) override; + void visit(const Div *) override; + void visit(const Mod *) override; + void visit(const For *) override; + void visit(const Ramp *op) override; + void visit(const Broadcast *op) override; + void visit(const Call *op) override; + void visit(const Load *op) override; + void visit(const Store *op) override; + void visit(const Select *op) override; + void visit(const Allocate *op) override; + void visit(const Free *op) override; + void visit(const Cast *op) override; + void visit(const Atomic *op) override; + void visit(const FloatImm *op) override; + + Scope<> groupshared_allocations; + }; + + std::ostringstream src_stream; + std::string cur_kernel_name; + CodeGen_D3D12Compute_C d3d12compute_c; +}; + CodeGen_D3D12Compute_Dev::CodeGen_D3D12Compute_Dev(Target t) : d3d12compute_c(src_stream, t) { } @@ -1279,5 +1367,11 @@ std::string CodeGen_D3D12Compute_Dev::print_gpu_name(const std::string &name) { return name; } +} // namespace + +CodeGen_GPU_Dev *new_CodeGen_D3D12Compute_Dev(const Target &target) { + return new CodeGen_D3D12Compute_Dev(target); +} + } // namespace Internal } // namespace Halide diff --git a/src/CodeGen_D3D12Compute_Dev.h b/src/CodeGen_D3D12Compute_Dev.h index e08f9f5b6aaa..295efeff05e1 100644 --- a/src/CodeGen_D3D12Compute_Dev.h +++ b/src/CodeGen_D3D12Compute_Dev.h @@ -5,98 +5,15 @@ * Defines the code-generator for producing D3D12-compatible HLSL kernel code */ -#include - -#include "CodeGen_C.h" -#include "CodeGen_GPU_Dev.h" -#include "Target.h" - namespace Halide { -namespace Internal { - -class CodeGen_D3D12Compute_Dev : public CodeGen_GPU_Dev { -public: - CodeGen_D3D12Compute_Dev(Target target); - - /** Compile a GPU kernel into the module. This may be called many times - * with different kernels, which will all be accumulated into a single - * source module shared by a given Halide pipeline. */ - void add_kernel(Stmt stmt, - const std::string &name, - const std::vector &args) override; - - /** (Re)initialize the GPU kernel module. This is separate from compile, - * since a GPU device module will often have many kernels compiled into it - * for a single pipeline. */ - void init_module() override; - - std::vector compile_to_src() override; - - std::string get_current_kernel_name() override; - - void dump() override; - std::string print_gpu_name(const std::string &name) override; +struct Target; - std::string api_unique_name() override { - return "d3d12compute"; - } - -protected: - friend struct StoragePackUnpack; - - class CodeGen_D3D12Compute_C : public CodeGen_C { - public: - CodeGen_D3D12Compute_C(std::ostream &s, Target t) - : CodeGen_C(s, t) { - integer_suffix_style = IntegerSuffixStyle::HLSL; - } - void add_kernel(Stmt stmt, - const std::string &name, - const std::vector &args); - - protected: - friend struct StoragePackUnpack; - - std::string print_type(Type type, AppendSpaceIfNeeded space_option = DoNotAppendSpace) override; - std::string print_storage_type(Type type); - std::string print_type_maybe_storage(Type type, bool storage, AppendSpaceIfNeeded space); - std::string print_reinterpret(Type type, const Expr &e) override; - std::string print_extern_call(const Call *op) override; - - std::string print_vanilla_cast(Type type, const std::string &value_expr); - std::string print_reinforced_cast(Type type, const std::string &value_expr); - std::string print_cast(Type target_type, Type source_type, const std::string &value_expr); - std::string print_reinterpret_cast(Type type, const std::string &value_expr); - - std::string print_assignment(Type t, const std::string &rhs) override; - - using CodeGen_C::visit; - void visit(const Evaluate *op) override; - void visit(const Min *) override; - void visit(const Max *) override; - void visit(const Div *) override; - void visit(const Mod *) override; - void visit(const For *) override; - void visit(const Ramp *op) override; - void visit(const Broadcast *op) override; - void visit(const Call *op) override; - void visit(const Load *op) override; - void visit(const Store *op) override; - void visit(const Select *op) override; - void visit(const Allocate *op) override; - void visit(const Free *op) override; - void visit(const Cast *op) override; - void visit(const Atomic *op) override; - void visit(const FloatImm *op) override; +namespace Internal { - Scope<> groupshared_allocations; - }; +struct CodeGen_GPU_Dev; - std::ostringstream src_stream; - std::string cur_kernel_name; - CodeGen_D3D12Compute_C d3d12compute_c; -}; +CodeGen_GPU_Dev *new_CodeGen_D3D12Compute_Dev(const Target &target); } // namespace Internal } // namespace Halide diff --git a/src/CodeGen_GPU_Host.cpp b/src/CodeGen_GPU_Host.cpp index 4cb09da4712d..8c5a1e061197 100644 --- a/src/CodeGen_GPU_Host.cpp +++ b/src/CodeGen_GPU_Host.cpp @@ -108,23 +108,23 @@ CodeGen_GPU_Host::CodeGen_GPU_Host(Target target) } if (target.has_feature(Target::OpenGLCompute)) { debug(1) << "Constructing OpenGL Compute device codegen\n"; - cgdev[DeviceAPI::OpenGLCompute] = new CodeGen_OpenGLCompute_Dev(target); + cgdev[DeviceAPI::OpenGLCompute] = new_CodeGen_OpenGLCompute_Dev(target); } if (target.has_feature(Target::CUDA)) { debug(1) << "Constructing CUDA device codegen\n"; - cgdev[DeviceAPI::CUDA] = new CodeGen_PTX_Dev(target); + cgdev[DeviceAPI::CUDA] = new_CodeGen_PTX_Dev(target); } if (target.has_feature(Target::OpenCL)) { debug(1) << "Constructing OpenCL device codegen\n"; - cgdev[DeviceAPI::OpenCL] = new CodeGen_OpenCL_Dev(target); + cgdev[DeviceAPI::OpenCL] = new_CodeGen_OpenCL_Dev(target); } if (target.has_feature(Target::Metal)) { debug(1) << "Constructing Metal device codegen\n"; - cgdev[DeviceAPI::Metal] = new CodeGen_Metal_Dev(target); + cgdev[DeviceAPI::Metal] = new_CodeGen_Metal_Dev(target); } if (target.has_feature(Target::D3D12Compute)) { debug(1) << "Constructing Direct3D 12 Compute device codegen\n"; - cgdev[DeviceAPI::D3D12Compute] = new CodeGen_D3D12Compute_Dev(target); + cgdev[DeviceAPI::D3D12Compute] = new_CodeGen_D3D12Compute_Dev(target); } if (cgdev.empty()) { diff --git a/src/CodeGen_Hexagon.cpp b/src/CodeGen_Hexagon.cpp index 55783989d897..1103c18d973b 100644 --- a/src/CodeGen_Hexagon.cpp +++ b/src/CodeGen_Hexagon.cpp @@ -62,6 +62,7 @@ Stmt call_halide_qurt_hvx_lock(const Target &target) { AssertStmt::make(EQ::make(hvx_lock_result_var, 0), hvx_lock_result_var)); return check_hvx_lock; } + Stmt call_halide_qurt_hvx_unlock() { Expr hvx_unlock = Call::make(Int(32), "halide_qurt_hvx_unlock", {}, Call::Extern); @@ -73,6 +74,7 @@ Stmt call_halide_qurt_hvx_unlock() { hvx_unlock_result_var)); return check_hvx_unlock; } + // Wrap the stmt in a call to qurt_hvx_lock, calling qurt_hvx_unlock // as a destructor if successful. Stmt acquire_hvx_context(Stmt stmt, const Target &target) { @@ -89,6 +91,7 @@ Stmt acquire_hvx_context(Stmt stmt, const Target &target) { stmt = Block::make(check_hvx_lock, stmt); return stmt; } + bool is_dense_ramp(const Expr &x) { const Ramp *r = x.as(); if (!r) { diff --git a/src/CodeGen_Metal_Dev.cpp b/src/CodeGen_Metal_Dev.cpp index 87195e72bb69..1b465791f9a8 100644 --- a/src/CodeGen_Metal_Dev.cpp +++ b/src/CodeGen_Metal_Dev.cpp @@ -2,6 +2,8 @@ #include #include +#include "CodeGen_C.h" +#include "CodeGen_GPU_Dev.h" #include "CodeGen_Internal.h" #include "CodeGen_Metal_Dev.h" #include "Debug.h" @@ -17,6 +19,87 @@ using std::vector; static ostringstream nil; +namespace { + +class CodeGen_Metal_Dev : public CodeGen_GPU_Dev { +public: + CodeGen_Metal_Dev(Target target); + + /** Compile a GPU kernel into the module. This may be called many times + * with different kernels, which will all be accumulated into a single + * source module shared by a given Halide pipeline. */ + void add_kernel(Stmt stmt, + const std::string &name, + const std::vector &args) override; + + /** (Re)initialize the GPU kernel module. This is separate from compile, + * since a GPU device module will often have many kernels compiled into it + * for a single pipeline. */ + void init_module() override; + + std::vector compile_to_src() override; + + std::string get_current_kernel_name() override; + + void dump() override; + + std::string print_gpu_name(const std::string &name) override; + + std::string api_unique_name() override { + return "metal"; + } + +protected: + class CodeGen_Metal_C : public CodeGen_C { + public: + CodeGen_Metal_C(std::ostream &s, Target t) + : CodeGen_C(s, t) { + } + void add_kernel(const Stmt &stmt, + const std::string &name, + const std::vector &args); + + protected: + using CodeGen_C::visit; + std::string print_type(Type type, AppendSpaceIfNeeded space_option = DoNotAppendSpace) override; + // Vectors in Metal come in two varieties, regular and packed. + // For storage allocations and pointers used in address arithmetic, + // packed types must be used. For temporaries, constructors, etc. + // regular types must be used. + // This concept also potentially applies to half types, which are + // often only supported for storage, not arithmetic, + // hence the method name. + std::string print_storage_type(Type type); + std::string print_type_maybe_storage(Type type, bool storage, AppendSpaceIfNeeded space); + std::string print_reinterpret(Type type, const Expr &e) override; + std::string print_extern_call(const Call *op) override; + + std::string get_memory_space(const std::string &); + + std::string shared_name; + + void visit(const Min *) override; + void visit(const Max *) override; + void visit(const Div *) override; + void visit(const Mod *) override; + void visit(const For *) override; + void visit(const Ramp *op) override; + void visit(const Broadcast *op) override; + void visit(const Call *op) override; + void visit(const Load *op) override; + void visit(const Store *op) override; + void visit(const Select *op) override; + void visit(const Allocate *op) override; + void visit(const Free *op) override; + void visit(const Cast *op) override; + void visit(const Atomic *op) override; + }; + + std::ostringstream src_stream; + std::string cur_kernel_name; + CodeGen_Metal_C metal_c; +}; + CodeGen_Metal_Dev::CodeGen_Metal_Dev(Target t) : metal_c(src_stream, t) { } @@ -726,5 +809,11 @@ std::string CodeGen_Metal_Dev::print_gpu_name(const std::string &name) { return name; } +} // namespace + +CodeGen_GPU_Dev *new_CodeGen_Metal_Dev(const Target &target) { + return new CodeGen_Metal_Dev(target); +} + } // namespace Internal } // namespace Halide diff --git a/src/CodeGen_Metal_Dev.h b/src/CodeGen_Metal_Dev.h index a8dcce1ed30f..02d09cb80e05 100644 --- a/src/CodeGen_Metal_Dev.h +++ b/src/CodeGen_Metal_Dev.h @@ -5,93 +5,15 @@ * Defines the code-generator for producing Apple Metal shading language kernel code */ -#include - -#include "CodeGen_C.h" -#include "CodeGen_GPU_Dev.h" -#include "Target.h" - namespace Halide { -namespace Internal { - -class CodeGen_Metal_Dev : public CodeGen_GPU_Dev { -public: - CodeGen_Metal_Dev(Target target); - - /** Compile a GPU kernel into the module. This may be called many times - * with different kernels, which will all be accumulated into a single - * source module shared by a given Halide pipeline. */ - void add_kernel(Stmt stmt, - const std::string &name, - const std::vector &args) override; - - /** (Re)initialize the GPU kernel module. This is separate from compile, - * since a GPU device module will often have many kernels compiled into it - * for a single pipeline. */ - void init_module() override; - - std::vector compile_to_src() override; - std::string get_current_kernel_name() override; +struct Target; - void dump() override; - - std::string print_gpu_name(const std::string &name) override; - - std::string api_unique_name() override { - return "metal"; - } - -protected: - class CodeGen_Metal_C : public CodeGen_C { - public: - CodeGen_Metal_C(std::ostream &s, Target t) - : CodeGen_C(s, t) { - } - void add_kernel(const Stmt &stmt, - const std::string &name, - const std::vector &args); - - protected: - using CodeGen_C::visit; - std::string print_type(Type type, AppendSpaceIfNeeded space_option = DoNotAppendSpace) override; - // Vectors in Metal come in two varieties, regular and packed. - // For storage allocations and pointers used in address arithmetic, - // packed types must be used. For temporaries, constructors, etc. - // regular types must be used. - // This concept also potentially applies to half types, which are - // often only supported for storage, not arithmetic, - // hence the method name. - std::string print_storage_type(Type type); - std::string print_type_maybe_storage(Type type, bool storage, AppendSpaceIfNeeded space); - std::string print_reinterpret(Type type, const Expr &e) override; - std::string print_extern_call(const Call *op) override; - - std::string get_memory_space(const std::string &); - - std::string shared_name; +namespace Internal { - void visit(const Min *) override; - void visit(const Max *) override; - void visit(const Div *) override; - void visit(const Mod *) override; - void visit(const For *) override; - void visit(const Ramp *op) override; - void visit(const Broadcast *op) override; - void visit(const Call *op) override; - void visit(const Load *op) override; - void visit(const Store *op) override; - void visit(const Select *op) override; - void visit(const Allocate *op) override; - void visit(const Free *op) override; - void visit(const Cast *op) override; - void visit(const Atomic *op) override; - }; +struct CodeGen_GPU_Dev; - std::ostringstream src_stream; - std::string cur_kernel_name; - CodeGen_Metal_C metal_c; -}; +CodeGen_GPU_Dev *new_CodeGen_Metal_Dev(const Target &target); } // namespace Internal } // namespace Halide diff --git a/src/CodeGen_OpenCL_Dev.cpp b/src/CodeGen_OpenCL_Dev.cpp index d6e26894a14d..856eae556be5 100644 --- a/src/CodeGen_OpenCL_Dev.cpp +++ b/src/CodeGen_OpenCL_Dev.cpp @@ -4,6 +4,8 @@ #include #include "CSE.h" +#include "CodeGen_C.h" +#include "CodeGen_GPU_Dev.h" #include "CodeGen_Internal.h" #include "CodeGen_OpenCL_Dev.h" #include "Debug.h" @@ -22,6 +24,89 @@ using std::sort; using std::string; using std::vector; +namespace { + +class CodeGen_OpenCL_Dev : public CodeGen_GPU_Dev { +public: + CodeGen_OpenCL_Dev(Target target); + + /** Compile a GPU kernel into the module. This may be called many times + * with different kernels, which will all be accumulated into a single + * source module shared by a given Halide pipeline. */ + void add_kernel(Stmt stmt, + const std::string &name, + const std::vector &args) override; + + /** (Re)initialize the GPU kernel module. This is separate from compile, + * since a GPU device module will often have many kernels compiled into it + * for a single pipeline. */ + void init_module() override; + + std::vector compile_to_src() override; + + std::string get_current_kernel_name() override; + + void dump() override; + + std::string print_gpu_name(const std::string &name) override; + + std::string api_unique_name() override { + return "opencl"; + } + +protected: + class CodeGen_OpenCL_C : public CodeGen_C { + public: + CodeGen_OpenCL_C(std::ostream &s, Target t) + : CodeGen_C(s, t) { + integer_suffix_style = IntegerSuffixStyle::OpenCL; + } + void add_kernel(Stmt stmt, + const std::string &name, + const std::vector &args); + + protected: + using CodeGen_C::visit; + std::string print_type(Type type, AppendSpaceIfNeeded append_space = DoNotAppendSpace) override; + std::string print_reinterpret(Type type, const Expr &e) override; + std::string print_extern_call(const Call *op) override; + std::string print_array_access(const std::string &name, + const Type &type, + const std::string &id_index); + void add_vector_typedefs(const std::set &vector_types) override; + + std::string get_memory_space(const std::string &); + + std::string shared_name; + + void visit(const For *) override; + void visit(const Ramp *op) override; + void visit(const Broadcast *op) override; + void visit(const Call *op) override; + void visit(const Load *op) override; + void visit(const Store *op) override; + void visit(const Cast *op) override; + void visit(const Select *op) override; + void visit(const EQ *) override; + void visit(const NE *) override; + void visit(const LT *) override; + void visit(const LE *) override; + void visit(const GT *) override; + void visit(const GE *) override; + void visit(const Allocate *op) override; + void visit(const Free *op) override; + void visit(const AssertStmt *op) override; + void visit(const Shuffle *op) override; + void visit(const Min *op) override; + void visit(const Max *op) override; + void visit(const Atomic *op) override; + }; + + std::ostringstream src_stream; + std::string cur_kernel_name; + CodeGen_OpenCL_C clc; +}; + CodeGen_OpenCL_Dev::CodeGen_OpenCL_Dev(Target t) : clc(src_stream, t) { } @@ -1133,5 +1218,11 @@ std::string CodeGen_OpenCL_Dev::print_gpu_name(const std::string &name) { return name; } +} // namespace + +CodeGen_GPU_Dev *new_CodeGen_OpenCL_Dev(const Target &target) { + return new CodeGen_OpenCL_Dev(target); +} + } // namespace Internal } // namespace Halide diff --git a/src/CodeGen_OpenCL_Dev.h b/src/CodeGen_OpenCL_Dev.h index dd20efec1e88..5743e20343ed 100644 --- a/src/CodeGen_OpenCL_Dev.h +++ b/src/CodeGen_OpenCL_Dev.h @@ -5,95 +5,15 @@ * Defines the code-generator for producing OpenCL C kernel code */ -#include - -#include "CodeGen_C.h" -#include "CodeGen_GPU_Dev.h" -#include "Target.h" - namespace Halide { -namespace Internal { - -class CodeGen_OpenCL_Dev : public CodeGen_GPU_Dev { -public: - CodeGen_OpenCL_Dev(Target target); - - /** Compile a GPU kernel into the module. This may be called many times - * with different kernels, which will all be accumulated into a single - * source module shared by a given Halide pipeline. */ - void add_kernel(Stmt stmt, - const std::string &name, - const std::vector &args) override; - - /** (Re)initialize the GPU kernel module. This is separate from compile, - * since a GPU device module will often have many kernels compiled into it - * for a single pipeline. */ - void init_module() override; - - std::vector compile_to_src() override; - std::string get_current_kernel_name() override; +struct Target; - void dump() override; - - std::string print_gpu_name(const std::string &name) override; - - std::string api_unique_name() override { - return "opencl"; - } - -protected: - class CodeGen_OpenCL_C : public CodeGen_C { - public: - CodeGen_OpenCL_C(std::ostream &s, Target t) - : CodeGen_C(s, t) { - integer_suffix_style = IntegerSuffixStyle::OpenCL; - } - void add_kernel(Stmt stmt, - const std::string &name, - const std::vector &args); - - protected: - using CodeGen_C::visit; - std::string print_type(Type type, AppendSpaceIfNeeded append_space = DoNotAppendSpace) override; - std::string print_reinterpret(Type type, const Expr &e) override; - std::string print_extern_call(const Call *op) override; - std::string print_array_access(const std::string &name, - const Type &type, - const std::string &id_index); - void add_vector_typedefs(const std::set &vector_types) override; - - std::string get_memory_space(const std::string &); - - std::string shared_name; +namespace Internal { - void visit(const For *) override; - void visit(const Ramp *op) override; - void visit(const Broadcast *op) override; - void visit(const Call *op) override; - void visit(const Load *op) override; - void visit(const Store *op) override; - void visit(const Cast *op) override; - void visit(const Select *op) override; - void visit(const EQ *) override; - void visit(const NE *) override; - void visit(const LT *) override; - void visit(const LE *) override; - void visit(const GT *) override; - void visit(const GE *) override; - void visit(const Allocate *op) override; - void visit(const Free *op) override; - void visit(const AssertStmt *op) override; - void visit(const Shuffle *op) override; - void visit(const Min *op) override; - void visit(const Max *op) override; - void visit(const Atomic *op) override; - }; +struct CodeGen_GPU_Dev; - std::ostringstream src_stream; - std::string cur_kernel_name; - CodeGen_OpenCL_C clc; -}; +CodeGen_GPU_Dev *new_CodeGen_OpenCL_Dev(const Target &target); } // namespace Internal } // namespace Halide diff --git a/src/CodeGen_OpenGLCompute_Dev.cpp b/src/CodeGen_OpenGLCompute_Dev.cpp index 8bafed22cbd3..7e5033668f88 100644 --- a/src/CodeGen_OpenGLCompute_Dev.cpp +++ b/src/CodeGen_OpenGLCompute_Dev.cpp @@ -1,3 +1,5 @@ +#include "CodeGen_GPU_Dev.h" +#include "CodeGen_OpenGL_Dev.h" #include "CodeGen_OpenGLCompute_Dev.h" #include "Debug.h" #include "Deinterleave.h" @@ -17,6 +19,67 @@ using std::ostringstream; using std::string; using std::vector; +namespace { + +class CodeGen_OpenGLCompute_Dev : public CodeGen_GPU_Dev { +public: + CodeGen_OpenGLCompute_Dev(Target target); + + // CodeGen_GPU_Dev interface + void add_kernel(Stmt stmt, + const std::string &name, + const std::vector &args) override; + + void init_module() override; + + std::vector compile_to_src() override; + + std::string get_current_kernel_name() override; + + void dump() override; + + std::string print_gpu_name(const std::string &name) override; + + std::string api_unique_name() override { + return "openglcompute"; + } + bool kernel_run_takes_types() const override { + return true; + } + +protected: + class CodeGen_OpenGLCompute_C : public CodeGen_GLSLBase { + public: + CodeGen_OpenGLCompute_C(std::ostream &s, Target t); + void add_kernel(const Stmt &stmt, + const std::string &name, + const std::vector &args); + + protected: + std::string print_type(Type type, AppendSpaceIfNeeded space_option = DoNotAppendSpace) override; + + using CodeGen_GLSLBase::visit; + void visit(const For *) override; + void visit(const Ramp *op) override; + void visit(const Broadcast *op) override; + void visit(const Load *op) override; + void visit(const Store *op) override; + void visit(const Call *op) override; + void visit(const Allocate *op) override; + void visit(const Free *op) override; + void visit(const Select *op) override; + void visit(const Evaluate *op) override; + void visit(const IntImm *op) override; + + public: + int workgroup_size[3]; + }; + + std::ostringstream src_stream; + std::string cur_kernel_name; + CodeGen_OpenGLCompute_C glc; +}; + CodeGen_OpenGLCompute_Dev::CodeGen_OpenGLCompute_Dev(Target target) : glc(src_stream, target) { } @@ -392,5 +455,11 @@ std::string CodeGen_OpenGLCompute_Dev::print_gpu_name(const std::string &name) { return name; } +} // namespace + +CodeGen_GPU_Dev *new_CodeGen_OpenGLCompute_Dev(const Target &target) { + return new CodeGen_OpenGLCompute_Dev(target); +} + } // namespace Internal } // namespace Halide diff --git a/src/CodeGen_OpenGLCompute_Dev.h b/src/CodeGen_OpenGLCompute_Dev.h index 141e9e1ae1e9..e91e3a4134b1 100644 --- a/src/CodeGen_OpenGLCompute_Dev.h +++ b/src/CodeGen_OpenGLCompute_Dev.h @@ -5,75 +5,15 @@ * Defines the code-generator for producing GLSL kernel code for OpenGL Compute. */ -#include -#include - -#include "CodeGen_C.h" -#include "CodeGen_GPU_Dev.h" -#include "CodeGen_OpenGL_Dev.h" -#include "Target.h" - namespace Halide { -namespace Internal { - -class CodeGen_OpenGLCompute_Dev : public CodeGen_GPU_Dev { -public: - CodeGen_OpenGLCompute_Dev(Target target); - - // CodeGen_GPU_Dev interface - void add_kernel(Stmt stmt, - const std::string &name, - const std::vector &args) override; - - void init_module() override; - - std::vector compile_to_src() override; - std::string get_current_kernel_name() override; +struct Target; - void dump() override; - - std::string print_gpu_name(const std::string &name) override; - - std::string api_unique_name() override { - return "openglcompute"; - } - bool kernel_run_takes_types() const override { - return true; - } - -protected: - class CodeGen_OpenGLCompute_C : public CodeGen_GLSLBase { - public: - CodeGen_OpenGLCompute_C(std::ostream &s, Target t); - void add_kernel(const Stmt &stmt, - const std::string &name, - const std::vector &args); - - protected: - std::string print_type(Type type, AppendSpaceIfNeeded space_option = DoNotAppendSpace) override; - - using CodeGen_GLSLBase::visit; - void visit(const For *) override; - void visit(const Ramp *op) override; - void visit(const Broadcast *op) override; - void visit(const Load *op) override; - void visit(const Store *op) override; - void visit(const Call *op) override; - void visit(const Allocate *op) override; - void visit(const Free *op) override; - void visit(const Select *op) override; - void visit(const Evaluate *op) override; - void visit(const IntImm *op) override; +namespace Internal { - public: - int workgroup_size[3]; - }; +struct CodeGen_GPU_Dev; - std::ostringstream src_stream; - std::string cur_kernel_name; - CodeGen_OpenGLCompute_C glc; -}; +CodeGen_GPU_Dev *new_CodeGen_OpenGLCompute_Dev(const Target &target); } // namespace Internal } // namespace Halide diff --git a/src/CodeGen_PTX_Dev.cpp b/src/CodeGen_PTX_Dev.cpp index 7c3dd2405b25..d0032581008b 100644 --- a/src/CodeGen_PTX_Dev.cpp +++ b/src/CodeGen_PTX_Dev.cpp @@ -1,3 +1,5 @@ +#include "CodeGen_GPU_Dev.h" +#include "CodeGen_LLVM.h" #include "CodeGen_PTX_Dev.h" #include "CSE.h" #include "CodeGen_Internal.h" @@ -32,6 +34,78 @@ using std::vector; using namespace llvm; +namespace { + +/** A code generator that emits GPU code from a given Halide stmt. */ +class CodeGen_PTX_Dev : public CodeGen_LLVM, public CodeGen_GPU_Dev { +public: + /** Create a PTX device code generator. */ + CodeGen_PTX_Dev(Target host); + ~CodeGen_PTX_Dev() override; + + void add_kernel(Stmt stmt, + const std::string &name, + const std::vector &args) override; + + static void test(); + + std::vector compile_to_src() override; + std::string get_current_kernel_name() override; + + void dump() override; + + std::string print_gpu_name(const std::string &name) override; + + std::string api_unique_name() override { + return "cuda"; + } + +protected: + using CodeGen_LLVM::visit; + + /** (Re)initialize the PTX module. This is separate from compile, since + * a PTX device module will often have many kernels compiled into it for + * a single pipeline. */ + /* override */ void init_module() override; + + /** We hold onto the basic block at the start of the device + * function in order to inject allocas */ + llvm::BasicBlock *entry_block; + + /** Nodes for which we need to override default behavior for the GPU runtime */ + // @{ + void visit(const Call *) override; + void visit(const For *) override; + void visit(const Allocate *) override; + void visit(const Free *) override; + void visit(const AssertStmt *) override; + void visit(const Load *) override; + void visit(const Store *) override; + void visit(const Atomic *) override; + void codegen_vector_reduce(const VectorReduce *op, const Expr &init) override; + // @} + + std::string march() const; + std::string mcpu() const override; + std::string mattrs() const override; + bool use_soft_float_abi() const override; + int native_vector_bits() const override; + bool promote_indices() const override { + return false; + } + + Type upgrade_type_for_arithmetic(const Type &t) const override { + return t; + } + Type upgrade_type_for_storage(const Type &t) const override; + + /** Map from simt variable names (e.g. foo.__block_id_x) to the llvm + * ptx intrinsic functions to call to get them. */ + std::string simt_intrinsic(const std::string &name); + + bool supports_atomic_add(const Type &t) const override; +}; + CodeGen_PTX_Dev::CodeGen_PTX_Dev(Target host) : CodeGen_LLVM(host) { #if !defined(WITH_NVPTX) @@ -723,5 +797,11 @@ bool CodeGen_PTX_Dev::supports_atomic_add(const Type &t) const { return false; } +} // namespace + +CodeGen_GPU_Dev *new_CodeGen_PTX_Dev(const Target &target) { + return new CodeGen_PTX_Dev(target); +} + } // namespace Internal } // namespace Halide diff --git a/src/CodeGen_PTX_Dev.h b/src/CodeGen_PTX_Dev.h index 7f7c80669f47..eabf4d9cc2c4 100644 --- a/src/CodeGen_PTX_Dev.h +++ b/src/CodeGen_PTX_Dev.h @@ -5,89 +5,15 @@ * Defines the code-generator for producing CUDA host code */ -#include "CodeGen_GPU_Dev.h" -#include "CodeGen_GPU_Host.h" -#include "CodeGen_LLVM.h" - -namespace llvm { -class BasicBlock; -} - namespace Halide { -namespace Internal { - -/** A code generator that emits GPU code from a given Halide stmt. */ -class CodeGen_PTX_Dev : public CodeGen_LLVM, public CodeGen_GPU_Dev { -public: - friend class CodeGen_GPU_Host; - friend class CodeGen_GPU_Host; - - /** Create a PTX device code generator. */ - CodeGen_PTX_Dev(Target host); - ~CodeGen_PTX_Dev() override; - - void add_kernel(Stmt stmt, - const std::string &name, - const std::vector &args) override; - - static void test(); - - std::vector compile_to_src() override; - std::string get_current_kernel_name() override; - void dump() override; +struct Target; - std::string print_gpu_name(const std::string &name) override; - - std::string api_unique_name() override { - return "cuda"; - } - -protected: - using CodeGen_LLVM::visit; - - /** (Re)initialize the PTX module. This is separate from compile, since - * a PTX device module will often have many kernels compiled into it for - * a single pipeline. */ - /* override */ void init_module() override; - - /** We hold onto the basic block at the start of the device - * function in order to inject allocas */ - llvm::BasicBlock *entry_block; - - /** Nodes for which we need to override default behavior for the GPU runtime */ - // @{ - void visit(const Call *) override; - void visit(const For *) override; - void visit(const Allocate *) override; - void visit(const Free *) override; - void visit(const AssertStmt *) override; - void visit(const Load *) override; - void visit(const Store *) override; - void visit(const Atomic *) override; - void codegen_vector_reduce(const VectorReduce *op, const Expr &init) override; - // @} - - std::string march() const; - std::string mcpu() const override; - std::string mattrs() const override; - bool use_soft_float_abi() const override; - int native_vector_bits() const override; - bool promote_indices() const override { - return false; - } - - Type upgrade_type_for_arithmetic(const Type &t) const override { - return t; - } - Type upgrade_type_for_storage(const Type &t) const override; +namespace Internal { - /** Map from simt variable names (e.g. foo.__block_id_x) to the llvm - * ptx intrinsic functions to call to get them. */ - std::string simt_intrinsic(const std::string &name); +struct CodeGen_GPU_Dev; - bool supports_atomic_add(const Type &t) const override; -}; +CodeGen_GPU_Dev *new_CodeGen_PTX_Dev(const Target &target); } // namespace Internal } // namespace Halide diff --git a/src/DebugToFile.cpp b/src/DebugToFile.cpp index 1a8ff962a4fb..bbc04ea3b11b 100644 --- a/src/DebugToFile.cpp +++ b/src/DebugToFile.cpp @@ -15,6 +15,8 @@ using std::ostringstream; using std::string; using std::vector; +namespace { + class DebugToFile : public IRMutator { const map &env; @@ -146,6 +148,8 @@ class AddDummyRealizations : public IRMutator { } }; +} // namespace + Stmt debug_to_file(Stmt s, const vector &outputs, const map &env) { // Temporarily wrap the produce nodes for the output functions in // realize nodes so that we know when to write the debug outputs. diff --git a/src/DerivativeUtils.cpp b/src/DerivativeUtils.cpp index b5113e6062d0..c9028679a1af 100644 --- a/src/DerivativeUtils.cpp +++ b/src/DerivativeUtils.cpp @@ -28,6 +28,8 @@ using std::set; using std::string; using std::vector; +namespace { + class StripLets : public IRGraphMutator { public: using IRGraphMutator::visit; @@ -36,6 +38,8 @@ class StripLets : public IRGraphMutator { } }; +} // namespace + vector gather_variables(const Expr &expr, const vector &filter) { @@ -130,6 +134,8 @@ Expr add_let_expression(const Expr &expr, return ret; } +namespace { + /** Gather the expression DAG and sort them in topological order */ class ExpressionSorter : public IRGraphVisitor { @@ -198,6 +204,8 @@ void ExpressionSorter::include(const Expr &e) { } } +} // namespace + vector sort_expressions(const Expr &expr) { ExpressionSorter sorter; return sorter.sort(expr); @@ -330,6 +338,8 @@ vector vars_to_strings(const vector &vars) { return ret; } +namespace { + class RDomExtractor : public IRGraphVisitor { public: using IRGraphVisitor::visit; @@ -348,6 +358,8 @@ class RDomExtractor : public IRGraphVisitor { ReductionDomain rdom; }; +} // namespace + ReductionDomain extract_rdom(const Expr &expr) { RDomExtractor extractor; return extractor.gather(expr); @@ -383,6 +395,8 @@ pair solve_inverse(Expr expr, return {true, rmin + r.x}; } +namespace { + struct BufferDimensionsFinder : public IRGraphVisitor { public: using IRGraphVisitor::visit; @@ -429,11 +443,15 @@ struct BufferDimensionsFinder : public IRGraphVisitor { map buffer_calls; }; +} // namespace + map find_buffer_param_calls(const Func &func) { BufferDimensionsFinder finder; return finder.find(func); } +namespace { + struct ImplicitVariablesFinder : public IRGraphVisitor { public: using IRGraphVisitor::visit; @@ -453,6 +471,8 @@ struct ImplicitVariablesFinder : public IRGraphVisitor { set implicit_variables; }; +} // namespace + set find_implicit_variables(const Expr &expr) { ImplicitVariablesFinder finder; return finder.find(expr); @@ -477,6 +497,8 @@ Expr substitute_rdom_predicate( return substituted; } +namespace { + struct FunctionCallFinder : public IRGraphVisitor { public: using IRGraphVisitor::visit; @@ -524,6 +546,8 @@ struct FunctionCallFinder : public IRGraphVisitor { bool found; }; +} // namespace + bool is_calling_function( const string &func_name, const Expr &expr, const map &let_var_mapping) { @@ -538,6 +562,8 @@ bool is_calling_function( return finder.find(expr, let_var_mapping); } +namespace { + struct SubstituteCallArgWithPureArg : public IRMutator { public: SubstituteCallArgWithPureArg(Func f, int variable_id) @@ -560,6 +586,8 @@ struct SubstituteCallArgWithPureArg : public IRMutator { int variable_id; }; +} // namespace + Expr substitute_call_arg_with_pure_arg(Func f, int variable_id, const Expr &e) { return simplify(SubstituteCallArgWithPureArg(std::move(f), variable_id).mutate(e)); } diff --git a/src/EliminateBoolVectors.cpp b/src/EliminateBoolVectors.cpp index 16e6814e3423..f2f042f348da 100644 --- a/src/EliminateBoolVectors.cpp +++ b/src/EliminateBoolVectors.cpp @@ -6,6 +6,8 @@ namespace Halide { namespace Internal { +namespace { + class EliminateBoolVectors : public IRMutator { private: using IRMutator::visit; @@ -314,6 +316,8 @@ class EliminateBoolVectors : public IRMutator { } }; +} // namespace + Stmt eliminate_bool_vectors(const Stmt &s) { return EliminateBoolVectors().mutate(s); } diff --git a/src/FastIntegerDivide.cpp b/src/FastIntegerDivide.cpp index 09f581d19c8c..e5fbdf9bd2c6 100644 --- a/src/FastIntegerDivide.cpp +++ b/src/FastIntegerDivide.cpp @@ -8,7 +8,7 @@ namespace Halide { using namespace Halide::Internal::IntegerDivision; -namespace IntegerDivideTable { +namespace { Buffer integer_divide_table_u8() { static std::mutex initialize_lock; @@ -111,7 +111,7 @@ Buffer integer_divide_table_s32() { return im; } } -} // namespace IntegerDivideTable +} // namespace Expr fast_integer_divide(Expr numerator, Expr denominator) { if (is_const(denominator)) { @@ -133,20 +133,20 @@ Expr fast_integer_divide(Expr numerator, Expr denominator) { Expr mul, shift; switch (t.bits()) { case 8: { - Buffer table = IntegerDivideTable::integer_divide_table_u8(); + Buffer table = integer_divide_table_u8(); mul = table(denominator, 0); shift = table(denominator, 1); break; } case 16: { - Buffer table = IntegerDivideTable::integer_divide_table_u16(); + Buffer table = integer_divide_table_u16(); mul = table(denominator, 0); shift = table(denominator, 1); break; } default: // 32 { - Buffer table = IntegerDivideTable::integer_divide_table_u32(); + Buffer table = integer_divide_table_u32(); mul = table(denominator, 0); shift = table(denominator, 1); break; @@ -175,20 +175,20 @@ Expr fast_integer_divide(Expr numerator, Expr denominator) { Expr mul, shift; switch (t.bits()) { case 8: { - Buffer table = IntegerDivideTable::integer_divide_table_s8(); + Buffer table = integer_divide_table_s8(); mul = table(denominator, 0); shift = table(denominator, 1); break; } case 16: { - Buffer table = IntegerDivideTable::integer_divide_table_s16(); + Buffer table = integer_divide_table_s16(); mul = table(denominator, 0); shift = table(denominator, 1); break; } default: // 32 { - Buffer table = IntegerDivideTable::integer_divide_table_s32(); + Buffer table = integer_divide_table_s32(); mul = table(denominator, 0); shift = table(denominator, 1); break; diff --git a/src/FastIntegerDivide.h b/src/FastIntegerDivide.h index 9ed10c0105ad..7a802ababa75 100644 --- a/src/FastIntegerDivide.h +++ b/src/FastIntegerDivide.h @@ -6,19 +6,6 @@ namespace Halide { -/** Built-in images used for fast_integer_divide below. Use of - * fast_integer_divide will automatically embed the appropriate tables - * in your object file. They are declared here in case you want to do - * something non-default with them. */ -namespace IntegerDivideTable { -Buffer integer_divide_table_u8(); -Buffer integer_divide_table_s8(); -Buffer integer_divide_table_u16(); -Buffer integer_divide_table_s16(); -Buffer integer_divide_table_u32(); -Buffer integer_divide_table_s32(); -} // namespace IntegerDivideTable - /** Integer division by small values can be done exactly as multiplies * and shifts. This function does integer division for numerators of * various integer types (8, 16, 32 bit signed and unsigned) diff --git a/src/Function.cpp b/src/Function.cpp index 4b6a437a181d..74ab2723a55e 100644 --- a/src/Function.cpp +++ b/src/Function.cpp @@ -175,6 +175,8 @@ void destroy(const FunctionGroup *f) { delete f; } +namespace { + // All variables present in any part of a function definition must // either be pure args, elements of the reduction domain, parameters // (i.e. attached to some Parameter object), or part of a let node @@ -280,9 +282,9 @@ class FreezeFunctions : public IRGraphVisitor { }; // A counter to use in tagging random variables -namespace { std::atomic rand_counter{0}; -} + +} // namespace Function::Function(const FunctionPtr &ptr) : contents(ptr) { diff --git a/src/FuseGPUThreadLoops.cpp b/src/FuseGPUThreadLoops.cpp index 7a678a92b5b4..9faf0d7a41df 100644 --- a/src/FuseGPUThreadLoops.cpp +++ b/src/FuseGPUThreadLoops.cpp @@ -28,9 +28,9 @@ using std::string; using std::vector; namespace { + string thread_names[] = {"__thread_id_x", "__thread_id_y", "__thread_id_z", "__thread_id_w"}; string block_names[] = {"__block_id_x", "__block_id_y", "__block_id_z", "__block_id_w"}; -} // namespace class ExtractBlockSize : public IRVisitor { Expr block_extent[4], block_count[4]; @@ -1543,11 +1543,15 @@ class ValidateGPULoopNesting : public IRVisitor { } }; +} // namespace + // Also used by InjectImageIntrinsics Stmt zero_gpu_loop_mins(const Stmt &s) { return ZeroGPULoopMins().mutate(s); } +namespace { + // Find the inner most GPU block of a statement. class FindInnermostGPUBlock : public IRVisitor { using IRVisitor::visit; @@ -1615,6 +1619,8 @@ class NormalizeIfStatements : public IRMutator { } }; +} // namespace + Stmt fuse_gpu_thread_loops(Stmt s) { ValidateGPULoopNesting validate; s.accept(&validate); diff --git a/src/IRMatch.cpp b/src/IRMatch.cpp index ba7f59b1ab83..d990f7c3b2dc 100644 --- a/src/IRMatch.cpp +++ b/src/IRMatch.cpp @@ -50,6 +50,8 @@ void expr_match_test() { std::cout << "expr_match test passed" << std::endl; } +namespace { + class IRMatch : public IRVisitor { public: bool result; @@ -294,6 +296,8 @@ class IRMatch : public IRVisitor { } }; +} // namespace + bool expr_match(const Expr &pattern, const Expr &expr, vector &matches) { matches.clear(); if (!pattern.defined() && !expr.defined()) { diff --git a/src/InjectOpenGLIntrinsics.cpp b/src/InjectOpenGLIntrinsics.cpp index 9319c211558a..1a96cb6bff35 100644 --- a/src/InjectOpenGLIntrinsics.cpp +++ b/src/InjectOpenGLIntrinsics.cpp @@ -12,6 +12,8 @@ namespace Internal { using std::string; using std::vector; +namespace { + /** Normalizes image loads/stores and produces glsl_texture_load/stores. */ class InjectOpenGLIntrinsics : public IRMutator { public: @@ -90,6 +92,8 @@ class InjectOpenGLIntrinsics : public IRMutator { } }; +} // namespace + Stmt inject_opengl_intrinsics(const Stmt &s) { InjectOpenGLIntrinsics gl; return gl.mutate(s); diff --git a/src/InlineReductions.cpp b/src/InlineReductions.cpp index 515cca1d09d5..10714a4a1358 100644 --- a/src/InlineReductions.cpp +++ b/src/InlineReductions.cpp @@ -15,6 +15,7 @@ using std::string; using std::vector; namespace Internal { +namespace { class FindFreeVars : public IRMutator { public: @@ -103,6 +104,8 @@ class FindFreeVars : public IRMutator { return expr; } }; + +} // namespace } // namespace Internal Expr sum(Expr e, const std::string &name) { diff --git a/src/Introspection.cpp b/src/Introspection.cpp index 5b34a2471cb9..18218b550496 100644 --- a/src/Introspection.cpp +++ b/src/Introspection.cpp @@ -51,8 +51,6 @@ inline T load_misaligned(const T *p) { typedef uint64_t llvm_offset_t; -} // namespace - class DebugSections { bool calibrated; @@ -2215,9 +2213,9 @@ class DebugSections { } }; -namespace { DebugSections *debug_sections = nullptr; -} + +} // namespace bool dump_stack_frame() { if (!debug_sections || !debug_sections->working) { diff --git a/src/LICM.cpp b/src/LICM.cpp index 20f4a73da4d9..64360fd64b9d 100644 --- a/src/LICM.cpp +++ b/src/LICM.cpp @@ -17,6 +17,8 @@ using std::set; using std::string; using std::vector; +namespace { + // Is it safe to lift an Expr out of a loop (and potentially across a device boundary) class CanLift : public IRVisitor { using IRVisitor::visit; @@ -527,6 +529,8 @@ class GroupLoopInvariants : public IRMutator { } }; +} // namespace + Stmt hoist_loop_invariant_values(Stmt s) { s = GroupLoopInvariants().mutate(s); s = common_subexpression_elimination(s); diff --git a/src/LLVM_Output.cpp b/src/LLVM_Output.cpp index 0adab9e01f41..88ddb8f7e87e 100644 --- a/src/LLVM_Output.cpp +++ b/src/LLVM_Output.cpp @@ -25,6 +25,8 @@ namespace Halide { namespace Internal { namespace Archive { +namespace { + // This is a bare-bones Windows .lib file writer, based on inspection // of the LLVM ArchiveWriter class and the documentation at // https://www.microsoft.com/msj/0498/hood0498.aspx and @@ -300,6 +302,8 @@ void write_coff_archive(std::ostream &out, } } +} // namespace + } // namespace Archive } // namespace Internal diff --git a/src/LLVM_Runtime_Linker.cpp b/src/LLVM_Runtime_Linker.cpp index 048d2495c757..d6216fb7b67a 100644 --- a/src/LLVM_Runtime_Linker.cpp +++ b/src/LLVM_Runtime_Linker.cpp @@ -25,8 +25,6 @@ std::unique_ptr parse_bitcode_file(llvm::StringRef buf, llvm::LLVM return result; } -} // namespace - #define DECLARE_INITMOD(mod) \ extern "C" unsigned char halide_internal_initmod_##mod[]; \ extern "C" int halide_internal_initmod_##mod##_length; \ @@ -259,8 +257,6 @@ DECLARE_CPP_INITMOD(riscv_cpu_features) DECLARE_NO_INITMOD(riscv_cpu_features) #endif // WITH_RISCV -namespace { - llvm::DataLayout get_data_layout_for_target(Target target) { if (target.arch == Target::X86) { if (target.bits == 32) { diff --git a/src/Memoization.cpp b/src/Memoization.cpp index 164452684bfb..64e41978cb2f 100644 --- a/src/Memoization.cpp +++ b/src/Memoization.cpp @@ -311,8 +311,6 @@ class KeyInfo { } }; -} // namespace - // Inject caching structure around memoized realizations. class InjectMemoization : public IRMutator { public: @@ -436,6 +434,8 @@ class InjectMemoization : public IRMutator { } }; +} // namespace + Stmt inject_memoization(const Stmt &s, const std::map &env, const std::string &name, const std::vector &outputs) { @@ -450,6 +450,8 @@ Stmt inject_memoization(const Stmt &s, const std::map &en return injector.mutate(s); } +namespace { + class RewriteMemoizedAllocations : public IRMutator { public: RewriteMemoizedAllocations(const std::map &e) @@ -538,6 +540,8 @@ class RewriteMemoizedAllocations : public IRMutator { } }; +} // namespace + Stmt rewrite_memoized_allocations(const Stmt &s, const std::map &env) { RewriteMemoizedAllocations rewriter(env); diff --git a/src/ModulusRemainder.cpp b/src/ModulusRemainder.cpp index 926cc37c2cd3..1e7d49aa3e04 100644 --- a/src/ModulusRemainder.cpp +++ b/src/ModulusRemainder.cpp @@ -9,6 +9,7 @@ namespace Halide { namespace Internal { namespace { + // A version of mod where a % 0 == a int64_t mod(int64_t a, int64_t b) { if (b == 0) { @@ -17,7 +18,6 @@ int64_t mod(int64_t a, int64_t b) { return mod_imp(a, b); } } -} // namespace class ComputeModulusRemainder : public IRVisitor { public: @@ -78,6 +78,207 @@ class ComputeModulusRemainder : public IRVisitor { void visit(const Atomic *) override; }; +void ComputeModulusRemainder::visit(const IntImm *op) { + // Equal to op->value modulo anything. We'll use zero as the + // modulus to mark this special case. We'd better be able to + // handle zero in the rest of the code... + result = {0, op->value}; +} + +void ComputeModulusRemainder::visit(const UIntImm *op) { + internal_error << "modulus_remainder of uint\n"; +} + +void ComputeModulusRemainder::visit(const FloatImm *) { + internal_error << "modulus_remainder of float\n"; +} + +void ComputeModulusRemainder::visit(const StringImm *) { + internal_error << "modulus_remainder of string\n"; +} + +void ComputeModulusRemainder::visit(const Cast *) { + // TODO: Could probably do something reasonable for integer + // upcasts and downcasts where the modulus is a power of two. + result = ModulusRemainder{}; +} + +void ComputeModulusRemainder::visit(const Variable *op) { + if (scope.contains(op->name)) { + result = scope.get(op->name); + } else { + result = ModulusRemainder{}; + } +} + +void ComputeModulusRemainder::visit(const Add *op) { + result = analyze(op->a) + analyze(op->b); +} + +void ComputeModulusRemainder::visit(const Sub *op) { + result = analyze(op->a) - analyze(op->b); +} + +void ComputeModulusRemainder::visit(const Mul *op) { + result = analyze(op->a) * analyze(op->b); +} + +void ComputeModulusRemainder::visit(const Div *op) { + result = analyze(op->a) / analyze(op->b); +} + +void ComputeModulusRemainder::visit(const Min *op) { + result = ModulusRemainder::unify(analyze(op->a), analyze(op->b)); +} + +void ComputeModulusRemainder::visit(const Max *op) { + result = ModulusRemainder::unify(analyze(op->a), analyze(op->b)); +} + +void ComputeModulusRemainder::visit(const EQ *) { + internal_error << "modulus_remainder of bool\n"; +} + +void ComputeModulusRemainder::visit(const NE *) { + internal_error << "modulus_remainder of bool\n"; +} + +void ComputeModulusRemainder::visit(const LT *) { + internal_error << "modulus_remainder of bool\n"; +} + +void ComputeModulusRemainder::visit(const LE *) { + internal_error << "modulus_remainder of bool\n"; +} + +void ComputeModulusRemainder::visit(const GT *) { + internal_error << "modulus_remainder of bool\n"; +} + +void ComputeModulusRemainder::visit(const GE *) { + internal_error << "modulus_remainder of bool\n"; +} + +void ComputeModulusRemainder::visit(const And *) { + internal_error << "modulus_remainder of bool\n"; +} + +void ComputeModulusRemainder::visit(const Or *) { + internal_error << "modulus_remainder of bool\n"; +} + +void ComputeModulusRemainder::visit(const Not *) { + internal_error << "modulus_remainder of bool\n"; +} + +void ComputeModulusRemainder::visit(const Select *op) { + result = ModulusRemainder::unify(analyze(op->true_value), + analyze(op->false_value)); +} + +void ComputeModulusRemainder::visit(const Load *) { + result = ModulusRemainder{}; +} + +void ComputeModulusRemainder::visit(const Ramp *) { + internal_error << "modulus_remainder of vector\n"; +} + +void ComputeModulusRemainder::visit(const Broadcast *) { + internal_error << "modulus_remainder of vector\n"; +} + +void ComputeModulusRemainder::visit(const Call *) { + result = ModulusRemainder{}; +} + +void ComputeModulusRemainder::visit(const Let *op) { + if (op->value.type().is_int()) { + ScopedBinding bind(scope, op->name, analyze(op->value)); + result = analyze(op->body); + } else { + result = analyze(op->body); + } +} + +void ComputeModulusRemainder::visit(const Shuffle *op) { + // It's possible that scalar expressions are extracting a lane of + // a vector - don't fail in this case, but stop + internal_assert(op->indices.size() == 1) << "modulus_remainder of vector\n"; + result = ModulusRemainder{}; +} + +void ComputeModulusRemainder::visit(const VectorReduce *op) { + internal_assert(op->type.is_scalar()) << "modulus_remainder of vector\n"; + result = ModulusRemainder{}; +} + +void ComputeModulusRemainder::visit(const LetStmt *) { + internal_error << "modulus_remainder of statement\n"; +} + +void ComputeModulusRemainder::visit(const AssertStmt *) { + internal_error << "modulus_remainder of statement\n"; +} + +void ComputeModulusRemainder::visit(const ProducerConsumer *) { + internal_error << "modulus_remainder of statement\n"; +} + +void ComputeModulusRemainder::visit(const For *) { + internal_error << "modulus_remainder of statement\n"; +} + +void ComputeModulusRemainder::visit(const Acquire *) { + internal_error << "modulus_remainder of statement\n"; +} + +void ComputeModulusRemainder::visit(const Store *) { + internal_error << "modulus_remainder of statement\n"; +} + +void ComputeModulusRemainder::visit(const Provide *) { + internal_error << "modulus_remainder of statement\n"; +} + +void ComputeModulusRemainder::visit(const Allocate *) { + internal_error << "modulus_remainder of statement\n"; +} + +void ComputeModulusRemainder::visit(const Realize *) { + internal_error << "modulus_remainder of statement\n"; +} + +void ComputeModulusRemainder::visit(const Block *) { + internal_error << "modulus_remainder of statement\n"; +} + +void ComputeModulusRemainder::visit(const Fork *) { + internal_error << "modulus_remainder of statement\n"; +} + +void ComputeModulusRemainder::visit(const Free *) { + internal_error << "modulus_remainder of statement\n"; +} + +void ComputeModulusRemainder::visit(const IfThenElse *) { + internal_error << "modulus_remainder of statement\n"; +} + +void ComputeModulusRemainder::visit(const Evaluate *) { + internal_error << "modulus_remainder of statement\n"; +} + +void ComputeModulusRemainder::visit(const Prefetch *) { + internal_error << "modulus_remainder of statement\n"; +} + +void ComputeModulusRemainder::visit(const Atomic *) { + internal_error << "modulus_remainder of statement\n"; +} + +} // namespace + ModulusRemainder modulus_remainder(const Expr &e) { ComputeModulusRemainder mr(nullptr); return mr.analyze(e); @@ -153,39 +354,6 @@ void modulus_remainder_test() { std::cout << "modulus_remainder test passed\n"; } -void ComputeModulusRemainder::visit(const IntImm *op) { - // Equal to op->value modulo anything. We'll use zero as the - // modulus to mark this special case. We'd better be able to - // handle zero in the rest of the code... - result = {0, op->value}; -} - -void ComputeModulusRemainder::visit(const UIntImm *op) { - internal_error << "modulus_remainder of uint\n"; -} - -void ComputeModulusRemainder::visit(const FloatImm *) { - internal_error << "modulus_remainder of float\n"; -} - -void ComputeModulusRemainder::visit(const StringImm *) { - internal_error << "modulus_remainder of string\n"; -} - -void ComputeModulusRemainder::visit(const Cast *) { - // TODO: Could probably do something reasonable for integer - // upcasts and downcasts where the modulus is a power of two. - result = ModulusRemainder{}; -} - -void ComputeModulusRemainder::visit(const Variable *op) { - if (scope.contains(op->name)) { - result = scope.get(op->name); - } else { - result = ModulusRemainder{}; - } -} - int64_t gcd(int64_t a, int64_t b) { if (a < b) { std::swap(a, b); @@ -210,10 +378,6 @@ int64_t lcm(int64_t a, int64_t b) { } } -void ComputeModulusRemainder::visit(const Add *op) { - result = analyze(op->a) + analyze(op->b); -} - ModulusRemainder operator+(const ModulusRemainder &a, const ModulusRemainder &b) { if (add_would_overflow(64, a.remainder, b.remainder)) { return {1, 0}; @@ -224,10 +388,6 @@ ModulusRemainder operator+(const ModulusRemainder &a, const ModulusRemainder &b) } } -void ComputeModulusRemainder::visit(const Sub *op) { - result = analyze(op->a) - analyze(op->b); -} - ModulusRemainder operator-(const ModulusRemainder &a, const ModulusRemainder &b) { if (sub_would_overflow(64, a.remainder, b.remainder)) { return {1, 0}; @@ -238,10 +398,6 @@ ModulusRemainder operator-(const ModulusRemainder &a, const ModulusRemainder &b) } } -void ComputeModulusRemainder::visit(const Mul *op) { - result = analyze(op->a) * analyze(op->b); -} - ModulusRemainder operator*(const ModulusRemainder &a, const ModulusRemainder &b) { if (a.modulus == 0) { // a is constant @@ -280,10 +436,6 @@ ModulusRemainder operator*(const ModulusRemainder &a, const ModulusRemainder &b) return ModulusRemainder{}; } -void ComputeModulusRemainder::visit(const Div *op) { - result = analyze(op->a) / analyze(op->b); -} - ModulusRemainder operator/(const ModulusRemainder &a, const ModulusRemainder &b) { // What can we say about: // floor((m1 * x + r1) / (m2 * y + r2)) @@ -423,155 +575,5 @@ ModulusRemainder operator%(const ModulusRemainder &a, int64_t b) { return a % ModulusRemainder(0, b); } -void ComputeModulusRemainder::visit(const Min *op) { - result = ModulusRemainder::unify(analyze(op->a), analyze(op->b)); -} - -void ComputeModulusRemainder::visit(const Max *op) { - result = ModulusRemainder::unify(analyze(op->a), analyze(op->b)); -} - -void ComputeModulusRemainder::visit(const EQ *) { - internal_error << "modulus_remainder of bool\n"; -} - -void ComputeModulusRemainder::visit(const NE *) { - internal_error << "modulus_remainder of bool\n"; -} - -void ComputeModulusRemainder::visit(const LT *) { - internal_error << "modulus_remainder of bool\n"; -} - -void ComputeModulusRemainder::visit(const LE *) { - internal_error << "modulus_remainder of bool\n"; -} - -void ComputeModulusRemainder::visit(const GT *) { - internal_error << "modulus_remainder of bool\n"; -} - -void ComputeModulusRemainder::visit(const GE *) { - internal_error << "modulus_remainder of bool\n"; -} - -void ComputeModulusRemainder::visit(const And *) { - internal_error << "modulus_remainder of bool\n"; -} - -void ComputeModulusRemainder::visit(const Or *) { - internal_error << "modulus_remainder of bool\n"; -} - -void ComputeModulusRemainder::visit(const Not *) { - internal_error << "modulus_remainder of bool\n"; -} - -void ComputeModulusRemainder::visit(const Select *op) { - result = ModulusRemainder::unify(analyze(op->true_value), - analyze(op->false_value)); -} - -void ComputeModulusRemainder::visit(const Load *) { - result = ModulusRemainder{}; -} - -void ComputeModulusRemainder::visit(const Ramp *) { - internal_error << "modulus_remainder of vector\n"; -} - -void ComputeModulusRemainder::visit(const Broadcast *) { - internal_error << "modulus_remainder of vector\n"; -} - -void ComputeModulusRemainder::visit(const Call *) { - result = ModulusRemainder{}; -} - -void ComputeModulusRemainder::visit(const Let *op) { - if (op->value.type().is_int()) { - ScopedBinding bind(scope, op->name, analyze(op->value)); - result = analyze(op->body); - } else { - result = analyze(op->body); - } -} - -void ComputeModulusRemainder::visit(const Shuffle *op) { - // It's possible that scalar expressions are extracting a lane of - // a vector - don't fail in this case, but stop - internal_assert(op->indices.size() == 1) << "modulus_remainder of vector\n"; - result = ModulusRemainder{}; -} - -void ComputeModulusRemainder::visit(const VectorReduce *op) { - internal_assert(op->type.is_scalar()) << "modulus_remainder of vector\n"; - result = ModulusRemainder{}; -} - -void ComputeModulusRemainder::visit(const LetStmt *) { - internal_error << "modulus_remainder of statement\n"; -} - -void ComputeModulusRemainder::visit(const AssertStmt *) { - internal_error << "modulus_remainder of statement\n"; -} - -void ComputeModulusRemainder::visit(const ProducerConsumer *) { - internal_error << "modulus_remainder of statement\n"; -} - -void ComputeModulusRemainder::visit(const For *) { - internal_error << "modulus_remainder of statement\n"; -} - -void ComputeModulusRemainder::visit(const Acquire *) { - internal_error << "modulus_remainder of statement\n"; -} - -void ComputeModulusRemainder::visit(const Store *) { - internal_error << "modulus_remainder of statement\n"; -} - -void ComputeModulusRemainder::visit(const Provide *) { - internal_error << "modulus_remainder of statement\n"; -} - -void ComputeModulusRemainder::visit(const Allocate *) { - internal_error << "modulus_remainder of statement\n"; -} - -void ComputeModulusRemainder::visit(const Realize *) { - internal_error << "modulus_remainder of statement\n"; -} - -void ComputeModulusRemainder::visit(const Block *) { - internal_error << "modulus_remainder of statement\n"; -} - -void ComputeModulusRemainder::visit(const Fork *) { - internal_error << "modulus_remainder of statement\n"; -} - -void ComputeModulusRemainder::visit(const Free *) { - internal_error << "modulus_remainder of statement\n"; -} - -void ComputeModulusRemainder::visit(const IfThenElse *) { - internal_error << "modulus_remainder of statement\n"; -} - -void ComputeModulusRemainder::visit(const Evaluate *) { - internal_error << "modulus_remainder of statement\n"; -} - -void ComputeModulusRemainder::visit(const Prefetch *) { - internal_error << "modulus_remainder of statement\n"; -} - -void ComputeModulusRemainder::visit(const Atomic *) { - internal_error << "modulus_remainder of statement\n"; -} - } // namespace Internal } // namespace Halide diff --git a/src/Monotonic.cpp b/src/Monotonic.cpp index 86617c42565d..66277e4e8354 100644 --- a/src/Monotonic.cpp +++ b/src/Monotonic.cpp @@ -28,6 +28,8 @@ std::ostream &operator<<(std::ostream &stream, const Monotonic &m) { using std::string; +namespace { + class MonotonicVisitor : public IRVisitor { const string &var; @@ -462,6 +464,8 @@ class MonotonicVisitor : public IRVisitor { } }; +} // namespace + Monotonic is_monotonic(const Expr &e, const std::string &var, const Scope &scope) { if (!e.defined()) { return Monotonic::Unknown; diff --git a/src/PrintLoopNest.cpp b/src/PrintLoopNest.cpp index 9ffaeacd9f13..19b0bf9ba926 100644 --- a/src/PrintLoopNest.cpp +++ b/src/PrintLoopNest.cpp @@ -27,6 +27,8 @@ using std::map; using std::string; using std::vector; +namespace { + class PrintLoopNest : public IRVisitor { public: PrintLoopNest(std::ostream &output, const map &e) @@ -158,6 +160,8 @@ class PrintLoopNest : public IRVisitor { } }; +} // namespace + string print_loop_nest(const vector &output_funcs) { // Do the first part of lowering: diff --git a/src/Profiling.cpp b/src/Profiling.cpp index 5644bc97b76b..7d1a47d5e66a 100644 --- a/src/Profiling.cpp +++ b/src/Profiling.cpp @@ -19,6 +19,8 @@ using std::map; using std::string; using std::vector; +namespace { + class InjectProfiling : public IRMutator { public: map indices; // maps from func name -> index in buffer. @@ -292,6 +294,8 @@ class InjectProfiling : public IRMutator { } }; +} // namespace + Stmt inject_profiling(Stmt s, const string &pipeline_name) { InjectProfiling profiling(pipeline_name); s = profiling.mutate(s); diff --git a/src/PurifyIndexMath.cpp b/src/PurifyIndexMath.cpp index 6d3f94c73886..1ea205a6ff6b 100644 --- a/src/PurifyIndexMath.cpp +++ b/src/PurifyIndexMath.cpp @@ -6,6 +6,8 @@ namespace Halide { namespace Internal { +namespace { + class PurifyIndexMath : public IRMutator { using IRMutator::visit; @@ -21,6 +23,8 @@ class PurifyIndexMath : public IRMutator { } }; +} // namespace + Expr purify_index_math(const Expr &s) { return PurifyIndexMath().mutate(s); } diff --git a/src/PythonExtensionGen.cpp b/src/PythonExtensionGen.cpp index acf07759e293..4faf3cbb44a7 100644 --- a/src/PythonExtensionGen.cpp +++ b/src/PythonExtensionGen.cpp @@ -13,7 +13,9 @@ using std::ostream; using std::ostringstream; using std::string; -static string sanitize_name(const string &name) { +namespace { + +string sanitize_name(const string &name) { ostringstream oss; for (size_t i = 0; i < name.size(); i++) { if (name[i] == '.' || name[i] == '_') { @@ -27,7 +29,7 @@ static string sanitize_name(const string &name) { return oss.str(); } -static string remove_namespaces(const string &name) { +string remove_namespaces(const string &name) { size_t i = name.find_last_of(':'); if (i == string::npos) { return name; @@ -36,7 +38,7 @@ static string remove_namespaces(const string &name) { } } -static bool can_convert(const LoweredArgument *arg) { +bool can_convert(const LoweredArgument *arg) { if (arg->type.is_handle()) { if (arg->name == "__user_context") { /* __user_context is a void* pointer to a user supplied memory region. @@ -95,6 +97,8 @@ std::pair print_type(const LoweredArgument *arg) { } } +} // namespace + void PythonExtensionGen::convert_buffer(const string &name, const LoweredArgument *arg) { internal_assert(arg->is_buffer()); internal_assert(arg->dimensions); diff --git a/src/Qualify.cpp b/src/Qualify.cpp index 9e32868bc1f9..470162909f94 100644 --- a/src/Qualify.cpp +++ b/src/Qualify.cpp @@ -6,6 +6,8 @@ namespace Internal { using std::string; +namespace { + // Prefix all names in an expression with some string. class QualifyExpr : public IRMutator { using IRMutator::visit; @@ -31,6 +33,8 @@ class QualifyExpr : public IRMutator { } }; +} // namespace + Expr qualify(const string &prefix, const Expr &value) { QualifyExpr q(prefix); return q.mutate(value); diff --git a/src/Random.cpp b/src/Random.cpp index f9c0cea35baa..6f9e4bfa3e92 100644 --- a/src/Random.cpp +++ b/src/Random.cpp @@ -93,6 +93,8 @@ Expr random_float(const vector &e) { return clamp(reinterpret(Float(32), result) - 1.0f, 0.0f, 1.0f); } +namespace { + class LowerRandom : public IRMutator { using IRMutator::visit; @@ -130,6 +132,8 @@ class LowerRandom : public IRMutator { } }; +} // namespace + Expr lower_random(const Expr &e, const vector &free_vars, int tag) { LowerRandom r(free_vars, tag); return r.mutate(e); diff --git a/src/RemoveDeadAllocations.cpp b/src/RemoveDeadAllocations.cpp index 090138f46337..6c998e036460 100644 --- a/src/RemoveDeadAllocations.cpp +++ b/src/RemoveDeadAllocations.cpp @@ -6,6 +6,8 @@ namespace Halide { namespace Internal { +namespace { + class RemoveDeadAllocations : public IRMutator { using IRMutator::visit; @@ -83,6 +85,8 @@ class RemoveDeadAllocations : public IRMutator { } }; +} // namespace + Stmt remove_dead_allocations(const Stmt &s) { return RemoveDeadAllocations().mutate(s); } diff --git a/src/RemoveExternLoops.cpp b/src/RemoveExternLoops.cpp index 84e932b0a2ec..9fb0e187b3eb 100644 --- a/src/RemoveExternLoops.cpp +++ b/src/RemoveExternLoops.cpp @@ -4,6 +4,8 @@ namespace Halide { namespace Internal { +namespace { + class RemoveExternLoops : public IRMutator { private: using IRMutator::visit; @@ -17,6 +19,8 @@ class RemoveExternLoops : public IRMutator { } }; +} // namespace + Stmt remove_extern_loops(const Stmt &s) { return RemoveExternLoops().mutate(s); } diff --git a/src/RemoveUndef.cpp b/src/RemoveUndef.cpp index 85ec06ae09ba..48b8ba8320ce 100644 --- a/src/RemoveUndef.cpp +++ b/src/RemoveUndef.cpp @@ -10,6 +10,8 @@ namespace Internal { using std::vector; +namespace { + class RemoveUndef : public IRMutator { public: Expr predicate; @@ -576,6 +578,8 @@ class RemoveUndef : public IRMutator { } }; +} // namespace + Stmt remove_undef(Stmt s) { RemoveUndef r; s = r.mutate(s); diff --git a/src/ScheduleFunctions.cpp b/src/ScheduleFunctions.cpp index f21044caa840..114516d9b1a8 100644 --- a/src/ScheduleFunctions.cpp +++ b/src/ScheduleFunctions.cpp @@ -31,6 +31,7 @@ using std::string; using std::vector; namespace { + // A structure representing a containing LetStmt, IfThenElse, or For // loop. Used in build_provide_loop_nest below. Both If and IfInner represent // IfThenElse stmts, however, IfInner should not be reordered to outside of @@ -57,8 +58,6 @@ bool var_name_match(const string &v1, const string &v2) { Internal::ends_with(v2, "." + v1)); } -} // anonymous namespace - class ContainsImpureCall : public IRVisitor { using IRVisitor::visit; @@ -2285,6 +2284,8 @@ bool group_should_be_inlined(const vector &funcs) { funcs[0].schedule().compute_level().is_inlined()); } +} // namespace + std::ostream &operator<<(std::ostream &out, const std::vector &v) { out << "{ "; for (size_t i = 0; i < v.size(); ++i) { diff --git a/src/ScheduleFunctions.h b/src/ScheduleFunctions.h index f513210d3f0c..11324f77283f 100644 --- a/src/ScheduleFunctions.h +++ b/src/ScheduleFunctions.h @@ -12,9 +12,11 @@ #include #include "Expr.h" -#include "Target.h" namespace Halide { + +struct Target; + namespace Internal { class Function; diff --git a/src/SelectGPUAPI.cpp b/src/SelectGPUAPI.cpp index 0957bbbd59c5..066b3a07b860 100644 --- a/src/SelectGPUAPI.cpp +++ b/src/SelectGPUAPI.cpp @@ -5,6 +5,8 @@ namespace Halide { namespace Internal { +namespace { + class SelectGPUAPI : public IRMutator { using IRMutator::visit; @@ -48,6 +50,8 @@ class SelectGPUAPI : public IRMutator { }; }; +} // namespace + Stmt select_gpu_api(const Stmt &s, Target t) { return SelectGPUAPI(t).mutate(s); } diff --git a/src/SkipStages.cpp b/src/SkipStages.cpp index e4b519c0ea02..d756d768d23f 100644 --- a/src/SkipStages.cpp +++ b/src/SkipStages.cpp @@ -39,8 +39,6 @@ bool extern_call_uses_buffer(const Call *op, const std::string &func) { return false; } -} // namespace - class PredicateFinder : public IRVisitor { public: Expr predicate; @@ -508,6 +506,8 @@ class MightBeSkippable : public IRVisitor { set candidates; }; +} // namespace + Stmt skip_stages(Stmt stmt, const vector &order) { // Don't consider the last stage, because it's the output, so it's // never skippable. diff --git a/src/SlidingWindow.cpp b/src/SlidingWindow.cpp index 38488d08eb03..e55848db5783 100644 --- a/src/SlidingWindow.cpp +++ b/src/SlidingWindow.cpp @@ -82,8 +82,6 @@ Expr expand_expr(const Expr &e, const Scope &scope) { return result; } -} // namespace - // Perform sliding window optimization for a function over a // particular serial for loop class SlidingWindowOnFunctionAndLoop : public IRMutator { @@ -423,6 +421,8 @@ class SlidingWindow : public IRMutator { } }; +} // namespace + Stmt sliding_window(const Stmt &s, const map &env) { return SlidingWindow(env).mutate(s); } diff --git a/src/StorageFolding.cpp b/src/StorageFolding.cpp index 01af13ce806e..5f7f2b14bf70 100644 --- a/src/StorageFolding.cpp +++ b/src/StorageFolding.cpp @@ -21,8 +21,6 @@ int64_t next_power_of_two(int64_t x) { return static_cast(1) << static_cast(std::ceil(std::log2(x))); } -} // namespace - using std::map; using std::string; using std::vector; @@ -998,6 +996,8 @@ class StorageFolding : public IRMutator { } }; +} // namespace + Stmt storage_folding(const Stmt &s, const std::map &env) { return StorageFolding(env).mutate(s); } diff --git a/src/StrictifyFloat.cpp b/src/StrictifyFloat.cpp index 7947eb00ebe8..161e549f7c17 100644 --- a/src/StrictifyFloat.cpp +++ b/src/StrictifyFloat.cpp @@ -7,6 +7,8 @@ namespace Halide { namespace Internal { +namespace { + class StrictifyFloat : public IRMutator { enum Strictness { FastMath, @@ -59,6 +61,8 @@ class StrictifyFloat : public IRMutator { } }; +} // namespace + bool strictify_float(std::map &env, const Target &t) { bool any_strict_float = false; for (auto &iter : env) { diff --git a/src/Substitute.cpp b/src/Substitute.cpp index 5e4ffb289683..e188842923fd 100644 --- a/src/Substitute.cpp +++ b/src/Substitute.cpp @@ -9,6 +9,8 @@ namespace Internal { using std::map; using std::string; +namespace { + class Substitute : public IRMutator { const map &replace; Scope<> hidden; @@ -83,6 +85,8 @@ class Substitute : public IRMutator { } }; +} // namespace + Expr substitute(const string &name, const Expr &replacement, const Expr &expr) { map m; m[name] = replacement; @@ -107,6 +111,8 @@ Stmt substitute(const map &m, const Stmt &stmt) { return s.mutate(stmt); } +namespace { + class SubstituteExpr : public IRMutator { public: Expr find, replacement; @@ -122,6 +128,8 @@ class SubstituteExpr : public IRMutator { } }; +} // namespace + Expr substitute(const Expr &find, const Expr &replacement, const Expr &expr) { SubstituteExpr s; s.find = find; @@ -136,6 +144,8 @@ Stmt substitute(const Expr &find, const Expr &replacement, const Stmt &stmt) { return s.mutate(stmt); } +namespace { + /** Substitute an expr for a var in a graph. */ class GraphSubstitute : public IRGraphMutator { string var; @@ -187,6 +197,8 @@ class GraphSubstituteExpr : public IRGraphMutator { } }; +} // namespace + Expr graph_substitute(const string &name, const Expr &replacement, const Expr &expr) { return GraphSubstitute(name, replacement).mutate(expr); } @@ -203,6 +215,8 @@ Stmt graph_substitute(const Expr &find, const Expr &replacement, const Stmt &stm return GraphSubstituteExpr(find, replacement).mutate(stmt); } +namespace { + class SubstituteInAllLets : public IRGraphMutator { using IRGraphMutator::visit; @@ -214,6 +228,8 @@ class SubstituteInAllLets : public IRGraphMutator { } }; +} // namespace + Expr substitute_in_all_lets(const Expr &expr) { return SubstituteInAllLets().mutate(expr); } diff --git a/src/Tracing.cpp b/src/Tracing.cpp index 1af6d2cec4ac..5e36a347bb30 100644 --- a/src/Tracing.cpp +++ b/src/Tracing.cpp @@ -15,6 +15,8 @@ using std::set; using std::string; using std::vector; +namespace { + struct TraceEventBuilder { string func; Expr trace_tag_expr = Expr(""); @@ -326,6 +328,8 @@ class RemoveRealizeOverOutput : public IRMutator { } }; +} // namespace + Stmt inject_tracing(Stmt s, const string &pipeline_name, bool trace_pipeline, const map &env, const vector &outputs, const Target &t) { diff --git a/src/UnifyDuplicateLets.cpp b/src/UnifyDuplicateLets.cpp index 87e0b1e08b8c..5f6e120d6e76 100644 --- a/src/UnifyDuplicateLets.cpp +++ b/src/UnifyDuplicateLets.cpp @@ -9,6 +9,8 @@ namespace Internal { using std::map; using std::string; +namespace { + class UnifyDuplicateLets : public IRMutator { using IRMutator::visit; @@ -112,6 +114,8 @@ class UnifyDuplicateLets : public IRMutator { } }; +} // namespace + Stmt unify_duplicate_lets(const Stmt &s) { return UnifyDuplicateLets().mutate(s); } diff --git a/src/UnrollLoops.cpp b/src/UnrollLoops.cpp index 5b503d780785..2b80f5b7cde6 100644 --- a/src/UnrollLoops.cpp +++ b/src/UnrollLoops.cpp @@ -12,6 +12,8 @@ using std::vector; namespace Halide { namespace Internal { +namespace { + class UnrollLoops : public IRMutator { using IRMutator::visit; @@ -113,6 +115,8 @@ class UnrollLoops : public IRMutator { } }; +} // namespace + Stmt unroll_loops(const Stmt &s) { return UnrollLoops().mutate(s); } diff --git a/src/VaryingAttributes.cpp b/src/VaryingAttributes.cpp index ffff2b398ceb..df9ebf94f6b1 100644 --- a/src/VaryingAttributes.cpp +++ b/src/VaryingAttributes.cpp @@ -12,6 +12,8 @@ namespace Halide { namespace Internal { +namespace { + Stmt make_block(Stmt first, Stmt rest) { if (first.defined() && rest.defined()) { return Block::make(first, rest); @@ -414,11 +416,15 @@ class FindLinearExpressions : public IRMutator { FindLinearExpressions() = default; }; +} // namespace + Stmt find_linear_expressions(const Stmt &s) { return FindLinearExpressions().mutate(s); } +namespace { + // This visitor produces a map containing name and expression pairs from varying // tagged intrinsics class FindVaryingAttributeTags : public IRVisitor { @@ -455,10 +461,14 @@ class RemoveVaryingAttributeTags : public IRMutator { } }; +} // namespace + Stmt remove_varying_attributes(const Stmt &s) { return RemoveVaryingAttributeTags().mutate(s); } +namespace { + // This visitor removes glsl_varying intrinsics and replaces them with // variables. After this visitor is called, the varying attribute expressions // will no longer appear in the IR tree, only variables with the .varying tag @@ -482,10 +492,14 @@ class ReplaceVaryingAttributeTags : public IRMutator { } }; +} // namespace + Stmt replace_varying_attributes(const Stmt &s) { return ReplaceVaryingAttributeTags().mutate(s); } +namespace { + // This visitor produces a set of variable names that are tagged with // ".varying". class FindVaryingAttributeVars : public IRVisitor { @@ -501,6 +515,8 @@ class FindVaryingAttributeVars : public IRVisitor { std::set variables; }; +} // namespace + // Remove varying attributes from the varying's map if they do not appear in the // loop_stmt because they were simplified away. void prune_varying_attributes(const Stmt &loop_stmt, std::map &varying) { @@ -522,6 +538,8 @@ void prune_varying_attributes(const Stmt &loop_stmt, std::map } } +namespace { + // This visitor changes the type of variables tagged with .varying to float, // since GLSL will only interpolate floats. In the case that the type of the // varying attribute was integer, the interpolated float value is snapped to the @@ -808,7 +826,6 @@ Stmt IRFilter::mutate(const Stmt &s) { return stmt; } -namespace { template void mutate_operator(IRFilter *mutator, const T *op, const A op_a, Stmt *stmt) { Stmt a = mutator->mutate(op_a); @@ -827,7 +844,6 @@ void mutate_operator(IRFilter *mutator, const T *op, const A op_a, const B op_b, Stmt c = mutator->mutate(op_c); *stmt = make_block(make_block(a, b), c); } -} // namespace void IRFilter::visit(const IntImm *op) { stmt = Stmt(); @@ -1362,6 +1378,8 @@ class CreateVertexBufferHostLoops : public IRMutator { } }; +} // namespace + Stmt setup_gpu_vertex_buffer(const Stmt &s) { CreateVertexBufferHostLoops vb; return vb.mutate(s); From b48606f15799a8f5c694ad0f1ba64d1e0304cfb5 Mon Sep 17 00:00:00 2001 From: Dillon Sharlet Date: Fri, 11 Dec 2020 10:54:57 -0700 Subject: [PATCH 2/3] clang-format --- src/CodeGen_OpenGLCompute_Dev.cpp | 2 +- src/CodeGen_PTX_Dev.cpp | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/CodeGen_OpenGLCompute_Dev.cpp b/src/CodeGen_OpenGLCompute_Dev.cpp index 7e5033668f88..a491c1cc5c60 100644 --- a/src/CodeGen_OpenGLCompute_Dev.cpp +++ b/src/CodeGen_OpenGLCompute_Dev.cpp @@ -1,6 +1,6 @@ +#include "CodeGen_OpenGLCompute_Dev.h" #include "CodeGen_GPU_Dev.h" #include "CodeGen_OpenGL_Dev.h" -#include "CodeGen_OpenGLCompute_Dev.h" #include "Debug.h" #include "Deinterleave.h" #include "IRMatch.h" diff --git a/src/CodeGen_PTX_Dev.cpp b/src/CodeGen_PTX_Dev.cpp index d0032581008b..be2b2c04000e 100644 --- a/src/CodeGen_PTX_Dev.cpp +++ b/src/CodeGen_PTX_Dev.cpp @@ -1,8 +1,8 @@ -#include "CodeGen_GPU_Dev.h" -#include "CodeGen_LLVM.h" #include "CodeGen_PTX_Dev.h" #include "CSE.h" +#include "CodeGen_GPU_Dev.h" #include "CodeGen_Internal.h" +#include "CodeGen_LLVM.h" #include "Debug.h" #include "ExprUsesVar.h" #include "IREquality.h" From 7fd118494c1e8f8088124af0070e9f5b851190d2 Mon Sep 17 00:00:00 2001 From: Dillon Sharlet Date: Fri, 11 Dec 2020 11:12:45 -0700 Subject: [PATCH 3/3] Remove redundant static. --- src/CodeGen_D3D12Compute_Dev.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/CodeGen_D3D12Compute_Dev.cpp b/src/CodeGen_D3D12Compute_Dev.cpp index 3335bf0ca482..afef3020264e 100644 --- a/src/CodeGen_D3D12Compute_Dev.cpp +++ b/src/CodeGen_D3D12Compute_Dev.cpp @@ -754,7 +754,7 @@ void CodeGen_D3D12Compute_Dev::CodeGen_D3D12Compute_C::visit(const Select *op) { print_assignment(op->type, rhs.str()); } -static bool is_shared_allocation(const Allocate *op) { +bool is_shared_allocation(const Allocate *op) { return op->memory_type == MemoryType::GPUShared; }