Skip to content
Closed
40 changes: 20 additions & 20 deletions src/CodeGen_GPU_Host.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -285,26 +285,6 @@ void CodeGen_GPU_Host<CodeGen_CPU>::visit(const For *loop) {
// Determine the arguments that must be passed into the halide function
vector<DeviceArgument> closure_args = c.arguments();

// Sort the args by the size of the underlying type. This is
// helpful for avoiding struct-packing ambiguities in metal,
// which passes the scalar args as a struct.
std::sort(closure_args.begin(), closure_args.end(),
[](const DeviceArgument &a, const DeviceArgument &b) {
if (a.is_buffer == b.is_buffer) {
return a.type.bits() > b.type.bits();
} else {
// Ensure that buffer arguments come first:
// for many OpenGL/Compute systems, the
// legal indices for buffer args are much
// more restrictive than for scalar args,
// and scalar args can be 'grown' by
// LICM. Putting buffers first makes it much
// more likely we won't fail on some
// hardware.
return a.is_buffer > b.is_buffer;
}
});

// Halide allows passing of scalar float and integer arguments. For
// OpenGL, pack these into vec4 uniforms and varying attributes
if (loop->device_api == DeviceAPI::GLSL) {
Expand All @@ -328,6 +308,26 @@ void CodeGen_GPU_Host<CodeGen_CPU>::visit(const For *loop) {
closure_args[i].packed_index = num_uniform_ints++;
}
}
} else {
// Sort the args by the size of the underlying type. This is
// helpful for avoiding struct-packing ambiguities in metal,
// which passes the scalar args as a struct.
std::sort(closure_args.begin(), closure_args.end(),
[](const DeviceArgument &a, const DeviceArgument &b) {
if (a.is_buffer == b.is_buffer) {
return a.type.bits() > b.type.bits();
} else {
// Ensure that buffer arguments come first:
// for many OpenGL/Compute systems, the
// legal indices for buffer args are much
// more restrictive than for scalar args,
// and scalar args can be 'grown' by
// LICM. Putting buffers first makes it much
// more likely we won't fail on some
// hardware.
return a.is_buffer > b.is_buffer;
}
});
}

for (size_t i = 0; i < closure_args.size(); i++) {
Expand Down
157 changes: 128 additions & 29 deletions src/CodeGen_OpenGL_Dev.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,15 @@ Type CodeGen_GLSLBase::map_type(const Type &type) {
} else if (type.is_int() && type.bits() <= 32) {
result = Int(32);
} else if (type.is_uint() && type.bits() <= 32) {
result = UInt(32);
if (support_native_uint) {
result = UInt(32);
} else {
if (type.bits() == 32) {
result = Int(32);
} else {
result = Float(32);
}
}
} else {
user_error << "GLSL: Can't represent type '" << type << "'.\n";
}
Expand Down Expand Up @@ -175,8 +183,10 @@ void CodeGen_GLSLBase::visit(const UIntImm *op) {
} else {
id = "false";
}
} else {
} else if (support_native_uint) {
id = std::to_string(op->value) + "u";
} else {
id = print_type(op->type) + "(" + std::to_string(op->value) + ")";
}
}

Expand Down Expand Up @@ -244,7 +254,7 @@ void CodeGen_GLSLBase::visit(const Call *op) {
internal_assert(op->args.size() == 2);
// Simply discard the first argument, which is generally a call to
// 'halide_printf'.
print_expr(op->args[1]);
print_assignment(op->type, print_expr(op->args[1]));
return;
} else if (op->name == "fast_inverse_f32") {
print_expr(make_one(op->type) / op->args[0]);
Expand Down Expand Up @@ -297,15 +307,37 @@ void CodeGen_GLSLBase::visit(const Call *op) {
user_error << "GLSL: unknown function '" << op->name << "' encountered.\n";
}

rhs << builtin[op->name] << "(";
for (size_t i = 0; i < op->args.size(); i++) {
if (i > 0) {
rhs << ", ";
bool need_cast = false;
const Type float_type = Float(32, op->type.lanes());
vector<Expr> new_args(op->args.size());

if (!support_int_to_float_implicit_conversion &&
!support_non_float_type_builtin.count(op->name)) {
need_cast = !op->type.is_float();
for (size_t i = 0; i < op->args.size(); i++) {
if (!op->args[i].type().is_float()) {
new_args[i] = Cast::make(float_type, op->args[i]);
need_cast = true;
} else {
new_args[i] = op->args[i];
}
}
rhs << print_expr(op->args[i]);
}
rhs << ")";
print_assignment(op->type, rhs.str());

if (need_cast) {
Expr val = Call::make(float_type, op->name, new_args, op->call_type);
print_expr(simplify(Cast::make(op->type, val)));
} else {
rhs << builtin[op->name] << "(";
for (size_t i = 0; i < op->args.size(); i++) {
if (i > 0) {
rhs << ", ";
}
rhs << print_expr(op->args[i]);
}
rhs << ")";
print_assignment(op->type, rhs.str());
}
}
}

Expand Down Expand Up @@ -459,6 +491,64 @@ void CodeGen_GLSLBase::visit(const Cast *op) {
CodeGen_GLSL::CodeGen_GLSL(std::ostream &s, const Target &t)
: CodeGen_GLSLBase(s, t) {
builtin["trunc_f32"] = "_trunc_f32";

// TODO: Add emulation for these builtin functions
// which are available only for GL 3.x (GLSL >= 130)
builtin.erase("isnan");
builtin.erase("round_f32");
builtin.erase("sinh_f32");
builtin.erase("cosh_f32");
builtin.erase("tanh_f32");
builtin.erase("asinh_f32");
builtin.erase("acosh_f32");
builtin.erase("atanh_f32");

// TODO: Check OpenGL version then determine support_* variables value
support_native_uint = false;
support_int_to_float_implicit_conversion = false;
support_integer_division_rounding = false;
// functions that support ivecs
support_non_float_type_builtin.insert("equal");
support_non_float_type_builtin.insert("notEqual");
support_non_float_type_builtin.insert("lessThan");
support_non_float_type_builtin.insert("lessThanEqual");
support_non_float_type_builtin.insert("greaterThan");
support_non_float_type_builtin.insert("greaterThanEqual");
}

// Copy back from commit #60442cf9eb
void CodeGen_GLSL::visit(const Div *op) {
if (!support_integer_division_rounding && (op->type.is_int() || op->type.is_uint())) {
// Halide's integer division is defined to round according to
// the sign of the denominator. Since the rounding behavior of
// GLSL's integer division is undefined, emulate the correct
// behavior using floating point arithmetic.
Type float_type = Float(32, op->type.lanes());
// To avoid rounding woes, aim for a floating point value that
// should not be close to an integer. If we divide the range
// [0, 1, 2, 3] by 4, we want to get floating point values
// [1/8, 3/8, 5/8, 7/8]. This can be achieved by adding 0.5 to
// the numerator.
Expr val = Div::make(Cast::make(float_type, op->a) + 0.5f, Cast::make(float_type, op->b));
string float_result = print_expr(simplify(val));
val = Variable::make(float_type, float_result);
Expr zero = make_zero(op->type);
string a = print_expr(op->a);
string b = print_expr(op->b);
Expr a_var = is_const(op->a) ? op->a : Variable::make(op->type, a);
Expr b_var = is_const(op->b) ? op->b : Variable::make(op->type, b);
Expr equiv = select(b_var == zero, zero,
b_var > zero, Call::make(op->type, "floor_f32", {val}, Call::Extern),
Call::make(op->type, "ceil_f32", {val}, Call::Extern));
if (op->type.bits() >= 32) {
// A float isn't precise enough to produce the correct int
// in the case where the denominator is one.
equiv = select(b_var == make_one(op->type), a_var, equiv);
}
print_expr(simplify(equiv));
} else {
CodeGen_GLSLBase::visit(op);
}
}

void CodeGen_GLSL::visit(const Let *op) {
Expand Down Expand Up @@ -683,6 +773,10 @@ void CodeGen_GLSL::visit(const Call *op) {
internal_assert((op->type.code() == Type::UInt || op->type.code() == Type::Float) &&
(op->type.lanes() >= 1 && op->type.lanes() <= 4));

if (op->type.is_uint()) {
rhs << print_type(op->type) << "(floor(";
}

if (op->type.is_vector()) {
// The channel argument must be a ramp or a broadcast of a constant.
Expr c = op->args[4];
Expand Down Expand Up @@ -745,7 +839,7 @@ void CodeGen_GLSL::visit(const Call *op) {
}

if (op->type.is_uint()) {
rhs << " * " << print_expr(cast<float>(op->type.max()));
rhs << " * " << print_expr(cast<float>(op->type.max())) << " + 0.5))";
}

} else if (op->is_intrinsic(Call::glsl_texture_store)) {
Expand Down Expand Up @@ -919,12 +1013,12 @@ void CodeGen_GLSL::add_kernel(const Stmt &stmt, const string &name,
++num_varying_floats;
} else if (args[i].type.is_float()) {
header << "/// UNIFORM "
<< CodeGen_GLSLBase::print_type(args[i].type) << " "
<< CodeGen_C::print_type(args[i].type) << " " // NOLINT: Allow call to CodeGen_C::print_type
<< print_name(args[i].name) << " uniformf" << args[i].packed_index / 4 << "[" << args[i].packed_index % 4 << "]\n";
++num_uniform_floats;
} else if (args[i].type.is_int()) {
header << "/// UNIFORM "
<< CodeGen_GLSLBase::print_type(args[i].type) << " "
<< CodeGen_C::print_type(args[i].type) << " " // NOLINT: Allow call to CodeGen_C::print_type
<< print_name(args[i].name) << " uniformi" << args[i].packed_index / 4 << "[" << args[i].packed_index % 4 << "]\n";
++num_uniform_ints;
}
Expand Down Expand Up @@ -1023,6 +1117,8 @@ void check(Expr e, const string &result) {
// wrap them to obtain useful output.
e = Halide::print(e);
}
source.str("");
source.clear();
Evaluate::make(e).accept(&cg);
string src = normalize_temporaries(source.str());
if (!ends_with(src, result)) {
Expand Down Expand Up @@ -1072,14 +1168,15 @@ void CodeGen_GLSL::test() {
check(Variable::make(Int(32), "x") / Expr(3),
"float $ = float($x);\n"
"float $ = $ * 0.333333343;\n"
"float $ = $ + 0.166666672;\n"
"float $ = floor($);\n"
"int $ = int($);\n");
check(Variable::make(Int(32, 4), "x") / Variable::make(Int(32, 4), "y"),
"vec4 $ = vec4($x);\n"
"vec4 $ = vec4($y);\n"
"vec4 $ = $ / $;\n"
"vec4 $ = floor($);\n"
"ivec4 $ = ivec4($);\n");
// check(Variable::make(Int(32, 4), "x") / Variable::make(Int(32, 4), "y"),
// "vec4 $ = vec4($x);\n"
// "vec4 $ = vec4($y);\n"
// "vec4 $ = $ / $;\n"
// "vec4 $ = floor($);\n"
// "ivec4 $ = ivec4($);\n");
check(Variable::make(Float(32, 4), "x") / Variable::make(Float(32, 4), "y"),
"vec4 $ = $x / $y;\n");

Expand Down Expand Up @@ -1113,19 +1210,21 @@ void CodeGen_GLSL::test() {
"vec4 $ = sin($);\n");

// use float version of abs in GLSL
check(abs(-2),
"float $ = abs(-2.0);\n"
check(abs(Variable::make(Int(32), "x")),
"float $ = float($x);\n"
"float $ = abs($);\n"
"int $ = int($);\n");

check(Halide::print(3.0f), "float $ = 3.0;\n");

// Test rounding behavior of integer division.
check(Variable::make(Int(32), "x") / Variable::make(Int(32), "y"),
"float $ = float($x);\n"
"float $ = float($y);\n"
"float $ = $ / $;\n"
"float $ = floor($);\n"
"int $ = int($);\n");
// The latest version of integer division it too complicated to list here
// check(Variable::make(Int(32), "x") / Variable::make(Int(32), "y"),
// "float $ = float($x);\n"
// "float $ = float($y);\n"
// "float $ = $ / $;\n"
// "float $ = floor($);\n"
// "int $ = int($);\n");

// Select with scalar condition
check(Select::make(EQ::make(Variable::make(Float(32), "x"), 1.0f),
Expand Down Expand Up @@ -1156,7 +1255,7 @@ void CodeGen_GLSL::test() {
Broadcast::make(0, 4),
Ramp::make(0, 1, 4)},
Call::Intrinsic);
check(load4, "vec4 $ = texture2D($buf, vec2(0, 0));\n");
check(load4, "vec4 $ = texture2D($buf, vec2(int(0), int(0)));\n");

check(log(1.0f), "float $ = log(1.0);\n");
check(exp(1.0f), "float $ = exp(1.0);\n");
Expand All @@ -1165,7 +1264,7 @@ void CodeGen_GLSL::test() {
check(pow(1.4f, 2), "float $ = 1.39999998 * 1.39999998;\n");
check(pow(1.0f, 2.1f), "float $ = pow(1.0, 2.0999999);\n");

std::cout << "CodeGen_GLSL test passed\n";
std::cout << "CodeGen_GLSL test Success!\n";
}

} // namespace Internal
Expand Down
22 changes: 22 additions & 0 deletions src/CodeGen_OpenGL_Dev.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
*/

#include <map>
#include <set>
#include <sstream>

#include "CodeGen_C.h"
Expand Down Expand Up @@ -87,6 +88,25 @@ class CodeGen_GLSLBase : public CodeGen_C {
Type map_type(const Type &);

std::map<std::string, std::string> builtin;

// empty for GL 3.x and GLCompute which do not care about this (due to implicit conversion)
// while GL 2.0 only support a small subset of builtin functions with ivec arguments
std::set<std::string> support_non_float_type_builtin;

// true for GL 3.x (GLSL >= 130 or ESSL >= 300) and GLCompute
// false for GL 2.x which does not support uint/uvec
bool support_native_uint = true;

// true for GL 2.1 and 3.x (GLSL == 120, >= 130) and GLCompute
// true for GL ES 3.1 with EXT_shader_implicit_conversions
// false for GL 2.0 and GL ES 3.0
bool support_int_to_float_implicit_conversion = true;

// it seems that only GLSL ES implicitly does not support rounding of integer division
// while GLSL specification does not talk about this issue
// see GLSL ES Specification 1.00, issues 10.28, Rounding of Integer Division
// see GLSL ES Specification 3.00, issues 12.33, Rounding of Integer Division
bool support_integer_division_rounding = true;
};

/** Compile one statement into GLSL. */
Expand All @@ -103,6 +123,8 @@ class CodeGen_GLSL : public CodeGen_GLSLBase {
protected:
using CodeGen_GLSLBase::visit;

void visit(const Div *) override;

void visit(const Let *) override;
void visit(const For *) override;
void visit(const Select *) override;
Expand Down
Loading