Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions src/AddImageChecks.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ using std::pair;
using std::string;
using std::vector;

namespace {

/* Find all the externally referenced buffers in a stmt */
class FindBuffers : public IRGraphVisitor {
public:
Expand Down Expand Up @@ -710,6 +712,8 @@ Stmt add_image_checks_inner(Stmt s,
return s;
}

} // namespace

// The following function repeats the arguments list it just passes
// through six times. Surely there is a better way?
Stmt add_image_checks(const Stmt &s,
Expand Down
4 changes: 4 additions & 0 deletions src/AddParameterChecks.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@ using std::pair;
using std::string;
using std::vector;

namespace {

// Find all the externally referenced scalar parameters
class FindParameters : public IRGraphVisitor {
public:
Expand All @@ -26,6 +28,8 @@ class FindParameters : public IRGraphVisitor {
}
};

} // namespace

// Insert checks to make sure that parameters are within their
// declared range.
Stmt add_parameter_checks(const vector<Stmt> &preconditions, Stmt s, const Target &t) {
Expand Down
4 changes: 4 additions & 0 deletions src/AllocationBoundsInference.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@ using std::set;
using std::string;
using std::vector;

namespace {

// Figure out the region touched of each buffer, and deposit them as
// let statements outside of each realize node, or at the top level if
// they're not internal allocations.
Expand Down Expand Up @@ -154,6 +156,8 @@ class StripDeclareBoxTouched : public IRMutator {
}
};

} // namespace

Stmt allocation_bounds_inference(Stmt s,
const map<string, Function> &env,
const FuncValueBounds &fb) {
Expand Down
4 changes: 4 additions & 0 deletions src/AsyncProducers.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@ using std::set;
using std::string;
using std::vector;

namespace {

/** A mutator which eagerly folds no-op stmts */
class NoOpCollapsingMutator : public IRMutator {
protected:
Expand Down Expand Up @@ -651,6 +653,8 @@ class TightenForkNodes : public IRMutator {

// TODO: merge semaphores?

} // namespace

Stmt fork_async_producers(Stmt s, const map<string, Function> &env) {
s = TightenProducerConsumerNodes(env).mutate(s);
s = ForkAsyncProducers(env).mutate(s);
Expand Down
4 changes: 4 additions & 0 deletions src/BoundSmallAllocations.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
namespace Halide {
namespace Internal {

namespace {

// Find a constant upper bound on the size of each thread-local allocation
class BoundSmallAllocations : public IRMutator {
using IRMutator::visit;
Expand Down Expand Up @@ -147,6 +149,8 @@ class BoundSmallAllocations : public IRMutator {
}
};

} // namespace

Stmt bound_small_allocations(const Stmt &s) {
return BoundSmallAllocations().mutate(s);
}
Expand Down
8 changes: 8 additions & 0 deletions src/Bounds.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,8 @@ std::ostream &operator<<(std::ostream &stream, const Box &b) {
return stream;
}

namespace {

class Bounds : public IRVisitor {
public:
Interval interval;
Expand Down Expand Up @@ -1577,6 +1579,8 @@ class Bounds : public IRVisitor {
}
};

} // namespace

Interval bounds_of_expr_in_scope(const Expr &expr, const Scope<Interval> &scope, const FuncValueBounds &fb, bool const_bound) {
//debug(3) << "computing bounds_of_expr_in_scope " << expr << "\n";
Bounds b(&scope, fb, const_bound);
Expand Down Expand Up @@ -1782,6 +1786,8 @@ bool box_contains(const Box &outer, const Box &inner) {
return can_prove(condition);
}

namespace {

class FindInnermostVar : public IRVisitor {
public:
const Scope<int> &vars_depth;
Expand Down Expand Up @@ -2630,6 +2636,8 @@ class BoxesTouched : public IRGraphVisitor {
}
};

} // namespace

map<string, Box> boxes_touched(const Expr &e, Stmt s, bool consider_calls, bool consider_provides,
const string &fn, const Scope<Interval> &scope, const FuncValueBounds &fb) {
if (!fn.empty() && s.defined()) {
Expand Down
3 changes: 2 additions & 1 deletion src/BoundsInference.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -198,7 +198,6 @@ bool is_fused_with_others(const vector<vector<Function>> &fused_groups,
}
return false;
}
} // namespace

class BoundsInference : public IRMutator {
public:
Expand Down Expand Up @@ -1255,6 +1254,8 @@ class BoundsInference : public IRMutator {
}
};

} // namespace

Stmt bounds_inference(Stmt s,
const vector<Function> &outputs,
const vector<string> &order,
Expand Down
3 changes: 2 additions & 1 deletion src/CodeGen_C.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -220,7 +220,6 @@ class HalideFreeHelper {
};
} // namespace
)INLINE_CODE";
} // namespace

class TypeInfoGatherer : public IRGraphVisitor {
private:
Expand Down Expand Up @@ -311,6 +310,8 @@ class TypeInfoGatherer : public IRGraphVisitor {
std::set<Type> vector_types_used;
};

} // namespace

CodeGen_C::CodeGen_C(ostream &s, Target t, OutputKind output_kind, const std::string &guard)
: IRPrinter(s), id("$$ BAD ID $$"), target(t), output_kind(output_kind),
extern_c_open(false), inside_atomic_mutex_node(false), emit_atomic_stores(false), using_vector_typedefs(false) {
Expand Down
96 changes: 95 additions & 1 deletion src/CodeGen_D3D12Compute_Dev.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,9 @@
#include <sstream>
#include <utility>

#include "CodeGen_C.h"
#include "CodeGen_D3D12Compute_Dev.h"
#include "CodeGen_GPU_Dev.h"
#include "CodeGen_Internal.h"
#include "Debug.h"
#include "DeviceArgument.h"
Expand All @@ -23,6 +25,92 @@ using std::vector;

static ostringstream nil;

namespace {

class CodeGen_D3D12Compute_Dev : public CodeGen_GPU_Dev {
public:
CodeGen_D3D12Compute_Dev(Target target);

/** Compile a GPU kernel into the module. This may be called many times
* with different kernels, which will all be accumulated into a single
* source module shared by a given Halide pipeline. */
void add_kernel(Stmt stmt,
const std::string &name,
const std::vector<DeviceArgument> &args) override;

/** (Re)initialize the GPU kernel module. This is separate from compile,
* since a GPU device module will often have many kernels compiled into it
* for a single pipeline. */
void init_module() override;

std::vector<char> compile_to_src() override;

std::string get_current_kernel_name() override;

void dump() override;

std::string print_gpu_name(const std::string &name) override;

std::string api_unique_name() override {
return "d3d12compute";
}

protected:
friend struct StoragePackUnpack;

class CodeGen_D3D12Compute_C : public CodeGen_C {
public:
CodeGen_D3D12Compute_C(std::ostream &s, Target t)
: CodeGen_C(s, t) {
integer_suffix_style = IntegerSuffixStyle::HLSL;
}
void add_kernel(Stmt stmt,
const std::string &name,
const std::vector<DeviceArgument> &args);

protected:
friend struct StoragePackUnpack;

std::string print_type(Type type, AppendSpaceIfNeeded space_option = DoNotAppendSpace) override;
std::string print_storage_type(Type type);
std::string print_type_maybe_storage(Type type, bool storage, AppendSpaceIfNeeded space);
std::string print_reinterpret(Type type, const Expr &e) override;
std::string print_extern_call(const Call *op) override;

std::string print_vanilla_cast(Type type, const std::string &value_expr);
std::string print_reinforced_cast(Type type, const std::string &value_expr);
std::string print_cast(Type target_type, Type source_type, const std::string &value_expr);
std::string print_reinterpret_cast(Type type, const std::string &value_expr);

std::string print_assignment(Type t, const std::string &rhs) override;

using CodeGen_C::visit;
void visit(const Evaluate *op) override;
void visit(const Min *) override;
void visit(const Max *) override;
void visit(const Div *) override;
void visit(const Mod *) override;
void visit(const For *) override;
void visit(const Ramp *op) override;
void visit(const Broadcast *op) override;
void visit(const Call *op) override;
void visit(const Load *op) override;
void visit(const Store *op) override;
void visit(const Select *op) override;
void visit(const Allocate *op) override;
void visit(const Free *op) override;
void visit(const Cast *op) override;
void visit(const Atomic *op) override;
void visit(const FloatImm *op) override;

Scope<> groupshared_allocations;
};

std::ostringstream src_stream;
std::string cur_kernel_name;
CodeGen_D3D12Compute_C d3d12compute_c;
};

CodeGen_D3D12Compute_Dev::CodeGen_D3D12Compute_Dev(Target t)
: d3d12compute_c(src_stream, t) {
}
Expand Down Expand Up @@ -666,7 +754,7 @@ void CodeGen_D3D12Compute_Dev::CodeGen_D3D12Compute_C::visit(const Select *op) {
print_assignment(op->type, rhs.str());
}

static bool is_shared_allocation(const Allocate *op) {
bool is_shared_allocation(const Allocate *op) {
return op->memory_type == MemoryType::GPUShared;
}

Expand Down Expand Up @@ -1279,5 +1367,11 @@ std::string CodeGen_D3D12Compute_Dev::print_gpu_name(const std::string &name) {
return name;
}

} // namespace

CodeGen_GPU_Dev *new_CodeGen_D3D12Compute_Dev(const Target &target) {
return new CodeGen_D3D12Compute_Dev(target);
}

} // namespace Internal
} // namespace Halide
91 changes: 4 additions & 87 deletions src/CodeGen_D3D12Compute_Dev.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,98 +5,15 @@
* Defines the code-generator for producing D3D12-compatible HLSL kernel code
*/

#include <sstream>

#include "CodeGen_C.h"
#include "CodeGen_GPU_Dev.h"
#include "Target.h"

namespace Halide {
namespace Internal {

class CodeGen_D3D12Compute_Dev : public CodeGen_GPU_Dev {
public:
CodeGen_D3D12Compute_Dev(Target target);

/** Compile a GPU kernel into the module. This may be called many times
* with different kernels, which will all be accumulated into a single
* source module shared by a given Halide pipeline. */
void add_kernel(Stmt stmt,
const std::string &name,
const std::vector<DeviceArgument> &args) override;

/** (Re)initialize the GPU kernel module. This is separate from compile,
* since a GPU device module will often have many kernels compiled into it
* for a single pipeline. */
void init_module() override;

std::vector<char> compile_to_src() override;

std::string get_current_kernel_name() override;

void dump() override;

std::string print_gpu_name(const std::string &name) override;
struct Target;

std::string api_unique_name() override {
return "d3d12compute";
}

protected:
friend struct StoragePackUnpack;

class CodeGen_D3D12Compute_C : public CodeGen_C {
public:
CodeGen_D3D12Compute_C(std::ostream &s, Target t)
: CodeGen_C(s, t) {
integer_suffix_style = IntegerSuffixStyle::HLSL;
}
void add_kernel(Stmt stmt,
const std::string &name,
const std::vector<DeviceArgument> &args);

protected:
friend struct StoragePackUnpack;

std::string print_type(Type type, AppendSpaceIfNeeded space_option = DoNotAppendSpace) override;
std::string print_storage_type(Type type);
std::string print_type_maybe_storage(Type type, bool storage, AppendSpaceIfNeeded space);
std::string print_reinterpret(Type type, const Expr &e) override;
std::string print_extern_call(const Call *op) override;

std::string print_vanilla_cast(Type type, const std::string &value_expr);
std::string print_reinforced_cast(Type type, const std::string &value_expr);
std::string print_cast(Type target_type, Type source_type, const std::string &value_expr);
std::string print_reinterpret_cast(Type type, const std::string &value_expr);

std::string print_assignment(Type t, const std::string &rhs) override;

using CodeGen_C::visit;
void visit(const Evaluate *op) override;
void visit(const Min *) override;
void visit(const Max *) override;
void visit(const Div *) override;
void visit(const Mod *) override;
void visit(const For *) override;
void visit(const Ramp *op) override;
void visit(const Broadcast *op) override;
void visit(const Call *op) override;
void visit(const Load *op) override;
void visit(const Store *op) override;
void visit(const Select *op) override;
void visit(const Allocate *op) override;
void visit(const Free *op) override;
void visit(const Cast *op) override;
void visit(const Atomic *op) override;
void visit(const FloatImm *op) override;
namespace Internal {

Scope<> groupshared_allocations;
};
struct CodeGen_GPU_Dev;

std::ostringstream src_stream;
std::string cur_kernel_name;
CodeGen_D3D12Compute_C d3d12compute_c;
};
CodeGen_GPU_Dev *new_CodeGen_D3D12Compute_Dev(const Target &target);

} // namespace Internal
} // namespace Halide
Expand Down
Loading