Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
6f72225
Fix disorder kernel args for OpenGL
xndcn Dec 10, 2020
b072097
Add support of less than 3 dimensions `image_load` for OpenGL
xndcn Dec 10, 2020
8c2e82a
Fix mismatch type of uniforms for OpenGL kernel args
xndcn Dec 10, 2020
b5cd6c8
Add some variables to distinguish GL 2.x from GLCompute
xndcn Dec 10, 2020
e5bee4f
Fix issue of precision loss for OpenGL uint texture loading
xndcn Dec 10, 2020
2b26b6f
Fix issue of VAO for OpenGL 3.x
xndcn Dec 10, 2020
957b6bd
Fix remain errors in OpenGL tests
xndcn Dec 10, 2020
7f32cd4
Fix error `size_t i` in InjectOpenGLIntrinsics.cpp
xndcn Dec 12, 2020
f79a1fc
Add back apps/glsl since OpenGL backend is fixed
xndcn Dec 15, 2020
ba6af38
Add some more comments in OpenGL backend
xndcn Dec 16, 2020
72ce194
Add preferred EGL feature for apps/glsl
xndcn Dec 16, 2020
52133a2
Explicitly list EGL as an optional component
alexreinking Dec 16, 2020
6f0dc47
Refine CMake files around `EGL` feature as reviews
xndcn Dec 17, 2020
c71739a
Merge branch 'master' into pr/5545
steven-johnson Jan 5, 2021
8107da1
Change to only link X11 library under Linux
xndcn Jan 7, 2021
3967dca
Merge branch 'fix-opengl' of https://github.com/xndcn/Halide into pr/…
steven-johnson Jan 7, 2021
f1d4a13
fix CMake handling for OpenGL
alexreinking Jan 7, 2021
6043914
load libEGL in JITModule
alexreinking Jan 8, 2021
e76ffad
prefer libOpenGL to libGL and avoid libX11 when using EGL
alexreinking Jan 8, 2021
8e75350
adding soversions to opengl library loads
alexreinking Jan 8, 2021
629a4af
adding soversions to X11 library loads
alexreinking Jan 8, 2021
693e2ab
Merge branch 'master' into pr/5545
steven-johnson Jan 8, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion apps/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ add_subdirectory(conv_layer)
add_subdirectory(cuda_mat_mul)
add_subdirectory(depthwise_separable_conv)
add_subdirectory(fft)
# add_subdirectory(glsl) # TODO(#4937): bugged; not built by Makefile
add_subdirectory(glsl)
add_subdirectory(harris)
# add_subdirectory(hexagon_benchmarks) # TODO(#5374): missing CMake build
# add_subdirectory(hexagon_dma) # TODO(#5374): missing CMake build
Expand Down
11 changes: 9 additions & 2 deletions apps/glsl/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,13 @@ set(CMAKE_CXX_EXTENSIONS NO)
# Find Halide
find_package(Halide REQUIRED)

find_package(OpenGL REQUIRED)
set(opengl_features opengl)
if (TARGET OpenGL::OpenGL AND TARGET OpenGL::EGL)
# EGL requires GLVND (which is found iff ::OpenGL is present)
list(APPEND opengl_features egl)
endif ()

# Generators
add_executable(glsl_blur.generator halide_blur_glsl_generator.cpp)
target_link_libraries(glsl_blur.generator PRIVATE Halide::Generator)
Expand All @@ -24,8 +31,8 @@ add_executable(ycc.generator halide_ycc_glsl_generator.cpp)
target_link_libraries(ycc.generator PRIVATE Halide::Generator)

# Libraries
add_halide_library(halide_blur_glsl FROM glsl_blur.generator FEATURES opengl debug)
add_halide_library(halide_ycc_glsl FROM ycc.generator FEATURES opengl debug)
add_halide_library(halide_blur_glsl FROM glsl_blur.generator FEATURES ${opengl_features} debug)
add_halide_library(halide_ycc_glsl FROM ycc.generator FEATURES ${opengl_features} debug)

# Final executable
add_executable(opengl_test opengl_test.cpp)
Expand Down
21 changes: 9 additions & 12 deletions cmake/HalideGeneratorHelpers.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -343,21 +343,18 @@ endfunction()

function(_Halide_target_link_gpu_libs TARGET VISIBILITY)
if ("${ARGN}" MATCHES "opengl")
if (NOT TARGET X11::X11)
find_package(X11)
if (NOT X11_FOUND)
message(AUTHOR_WARNING "X11 dependency not found on system.")
if ("${ARGN}" MATCHES "egl")
find_package(OpenGL REQUIRED COMPONENTS OpenGL EGL)
target_link_libraries(${TARGET} ${VISIBILITY} OpenGL::OpenGL OpenGL::EGL)
else ()
if ("${ARGN}" MATCHES "linux" OR ("${ARGN}" MATCHES "host" AND Halide_HOST_TARGET MATCHES "linux"))
find_package(X11 REQUIRED)
target_link_libraries(${TARGET} ${VISIBILITY} X11::X11)
endif ()
endif ()
target_link_libraries(${TARGET} ${VISIBILITY} X11::X11)

if (NOT TARGET OpenGL::GL)
find_package(OpenGL QUIET)
if (NOT OPENGL_FOUND)
message(AUTHOR_WARNING "OpenGL dependency not found on system.")
endif ()
find_package(OpenGL REQUIRED)
target_link_libraries(${TARGET} ${VISIBILITY} OpenGL::GL)
endif ()
target_link_libraries(${TARGET} ${VISIBILITY} OpenGL::GL)
endif ()

if ("${ARGN}" MATCHES "metal")
Expand Down
40 changes: 20 additions & 20 deletions src/CodeGen_GPU_Host.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -277,26 +277,6 @@ void CodeGen_GPU_Host<CodeGen_CPU>::visit(const For *loop) {
// Determine the arguments that must be passed into the halide function
vector<DeviceArgument> closure_args = c.arguments();

// Sort the args by the size of the underlying type. This is
// helpful for avoiding struct-packing ambiguities in metal,
// which passes the scalar args as a struct.
std::sort(closure_args.begin(), closure_args.end(),
[](const DeviceArgument &a, const DeviceArgument &b) {
if (a.is_buffer == b.is_buffer) {
return a.type.bits() > b.type.bits();
} else {
// Ensure that buffer arguments come first:
// for many OpenGL/Compute systems, the
// legal indices for buffer args are much
// more restrictive than for scalar args,
// and scalar args can be 'grown' by
// LICM. Putting buffers first makes it much
// more likely we won't fail on some
// hardware.
return a.is_buffer > b.is_buffer;
}
});

// Halide allows passing of scalar float and integer arguments. For
// OpenGL, pack these into vec4 uniforms and varying attributes
if (loop->device_api == DeviceAPI::GLSL) {
Expand All @@ -320,6 +300,26 @@ void CodeGen_GPU_Host<CodeGen_CPU>::visit(const For *loop) {
closure_args[i].packed_index = num_uniform_ints++;
}
}
} else {
// Sort the args by the size of the underlying type. This is
// helpful for avoiding struct-packing ambiguities in metal,
// which passes the scalar args as a struct.
std::sort(closure_args.begin(), closure_args.end(),
[](const DeviceArgument &a, const DeviceArgument &b) {
if (a.is_buffer == b.is_buffer) {
return a.type.bits() > b.type.bits();
} else {
// Ensure that buffer arguments come first:
// for many OpenGL/Compute systems, the
// legal indices for buffer args are much
// more restrictive than for scalar args,
// and scalar args can be 'grown' by
// LICM. Putting buffers first makes it much
// more likely we won't fail on some
// hardware.
return a.is_buffer > b.is_buffer;
}
});
}

for (size_t i = 0; i < closure_args.size(); i++) {
Expand Down
168 changes: 138 additions & 30 deletions src/CodeGen_OpenGL_Dev.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,8 @@ bool is_opengl_es(const Target &target) {
// versions (desktop GL, GLES2, GLES3, ...), probably by making it part of
// Target.
return (target.os == Target::Android ||
target.os == Target::IOS);
target.os == Target::IOS) ||
target.has_feature(Target::EGL);
}

char get_lane_suffix(int i) {
Expand Down Expand Up @@ -134,7 +135,20 @@ Type CodeGen_GLSLBase::map_type(const Type &type) {
} else if (type.is_int() && type.bits() <= 32) {
result = Int(32);
} else if (type.is_uint() && type.bits() <= 32) {
result = UInt(32);
if (support_native_uint) {
result = UInt(32);
} else {
if (type.bits() == 32) {
// GLSL <= 120 doesn't have unsigned types, simply use int.
// WARNING: Using int to represent unsigned int may result in
// overflows and undefined behavior.
result = Int(32);
} else {
// Embed all other uints in a GLSL float. Probably not actually
// valid for uint16 on systems with low float precision.
result = Float(32);
}
}
} else {
user_error << "GLSL: Can't represent type '" << type << "'.\n";
}
Expand Down Expand Up @@ -175,8 +189,10 @@ void CodeGen_GLSLBase::visit(const UIntImm *op) {
} else {
id = "false";
}
} else {
} else if (support_native_uint) {
id = std::to_string(op->value) + "u";
} else {
id = print_type(op->type) + "(" + std::to_string(op->value) + ")";
}
}

Expand Down Expand Up @@ -244,7 +260,7 @@ void CodeGen_GLSLBase::visit(const Call *op) {
internal_assert(op->args.size() == 2);
// Simply discard the first argument, which is generally a call to
// 'halide_printf'.
print_expr(op->args[1]);
print_assignment(op->type, print_expr(op->args[1]));
return;
} else if (op->name == "fast_inverse_f32") {
print_expr(make_one(op->type) / op->args[0]);
Expand Down Expand Up @@ -297,15 +313,40 @@ void CodeGen_GLSLBase::visit(const Call *op) {
user_error << "GLSL: unknown function '" << op->name << "' encountered.\n";
}

rhs << builtin[op->name] << "(";
for (size_t i = 0; i < op->args.size(); i++) {
if (i > 0) {
rhs << ", ";
bool need_cast = false;
const Type float_type = Float(32, op->type.lanes());
vector<Expr> new_args(op->args.size());

// For GL 2.0, Most GLSL builtins are only defined for float arguments,
// so we may have to introduce type casts around the arguments and the
// entire function call.
if (!support_int_to_float_implicit_conversion &&
!support_non_float_type_builtin.count(op->name)) {
need_cast = !op->type.is_float();
for (size_t i = 0; i < op->args.size(); i++) {
if (!op->args[i].type().is_float()) {
new_args[i] = Cast::make(float_type, op->args[i]);
need_cast = true;
} else {
new_args[i] = op->args[i];
}
}
rhs << print_expr(op->args[i]);
}
rhs << ")";
print_assignment(op->type, rhs.str());

if (need_cast) {
Expr val = Call::make(float_type, op->name, new_args, op->call_type);
print_expr(simplify(Cast::make(op->type, val)));
} else {
rhs << builtin[op->name] << "(";
for (size_t i = 0; i < op->args.size(); i++) {
if (i > 0) {
rhs << ", ";
}
rhs << print_expr(op->args[i]);
}
rhs << ")";
print_assignment(op->type, rhs.str());
}
}
}

Expand Down Expand Up @@ -459,6 +500,64 @@ void CodeGen_GLSLBase::visit(const Cast *op) {
CodeGen_GLSL::CodeGen_GLSL(std::ostream &s, const Target &t)
: CodeGen_GLSLBase(s, t) {
builtin["trunc_f32"] = "_trunc_f32";

// TODO: Add emulation for these builtin functions
// which are available only for GL 3.x (GLSL >= 130)
builtin.erase("isnan");
builtin.erase("round_f32");
builtin.erase("sinh_f32");
builtin.erase("cosh_f32");
builtin.erase("tanh_f32");
builtin.erase("asinh_f32");
builtin.erase("acosh_f32");
builtin.erase("atanh_f32");

// TODO: Check OpenGL version then determine support_* variables value
support_native_uint = false;
support_int_to_float_implicit_conversion = false;
support_integer_division_rounding = false;
// functions that support ivecs
support_non_float_type_builtin.insert("equal");
support_non_float_type_builtin.insert("notEqual");
support_non_float_type_builtin.insert("lessThan");
support_non_float_type_builtin.insert("lessThanEqual");
support_non_float_type_builtin.insert("greaterThan");
support_non_float_type_builtin.insert("greaterThanEqual");
}

// Copy back from commit #60442cf9eb
void CodeGen_GLSL::visit(const Div *op) {
if (!support_integer_division_rounding && (op->type.is_int() || op->type.is_uint())) {
// Halide's integer division is defined to round according to
// the sign of the denominator. Since the rounding behavior of
// GLSL's integer division is undefined, emulate the correct
// behavior using floating point arithmetic.
Type float_type = Float(32, op->type.lanes());
// To avoid rounding woes, aim for a floating point value that
// should not be close to an integer. If we divide the range
// [0, 1, 2, 3] by 4, we want to get floating point values
// [1/8, 3/8, 5/8, 7/8]. This can be achieved by adding 0.5 to
// the numerator.
Expr val = Div::make(Cast::make(float_type, op->a) + 0.5f, Cast::make(float_type, op->b));
string float_result = print_expr(simplify(val));
val = Variable::make(float_type, float_result);
Expr zero = make_zero(op->type);
string a = print_expr(op->a);
string b = print_expr(op->b);
Expr a_var = is_const(op->a) ? op->a : Variable::make(op->type, a);
Expr b_var = is_const(op->b) ? op->b : Variable::make(op->type, b);
Expr equiv = select(b_var == zero, zero,
b_var > zero, Call::make(op->type, "floor_f32", {val}, Call::Extern),
Call::make(op->type, "ceil_f32", {val}, Call::Extern));
if (op->type.bits() >= 32) {
// A float isn't precise enough to produce the correct int
// in the case where the denominator is one.
equiv = select(b_var == make_one(op->type), a_var, equiv);
}
print_expr(simplify(equiv));
} else {
CodeGen_GLSLBase::visit(op);
}
}

void CodeGen_GLSL::visit(const Let *op) {
Expand Down Expand Up @@ -683,6 +782,10 @@ void CodeGen_GLSL::visit(const Call *op) {
internal_assert((op->type.code() == Type::UInt || op->type.code() == Type::Float) &&
(op->type.lanes() >= 1 && op->type.lanes() <= 4));

if (op->type.is_uint()) {
rhs << print_type(op->type) << "(floor(";
}

if (op->type.is_vector()) {
// The channel argument must be a ramp or a broadcast of a constant.
Expr c = op->args[4];
Expand Down Expand Up @@ -745,7 +848,7 @@ void CodeGen_GLSL::visit(const Call *op) {
}

if (op->type.is_uint()) {
rhs << " * " << print_expr(cast<float>(op->type.max()));
rhs << " * " << print_expr(cast<float>(op->type.max())) << " + 0.5))";
}

} else if (op->is_intrinsic(Call::glsl_texture_store)) {
Expand Down Expand Up @@ -919,12 +1022,12 @@ void CodeGen_GLSL::add_kernel(const Stmt &stmt, const string &name,
++num_varying_floats;
} else if (args[i].type.is_float()) {
header << "/// UNIFORM "
<< CodeGen_GLSLBase::print_type(args[i].type) << " "
<< CodeGen_C::print_type(args[i].type) << " " // NOLINT: Allow call to CodeGen_C::print_type
<< print_name(args[i].name) << " uniformf" << args[i].packed_index / 4 << "[" << args[i].packed_index % 4 << "]\n";
++num_uniform_floats;
} else if (args[i].type.is_int()) {
header << "/// UNIFORM "
<< CodeGen_GLSLBase::print_type(args[i].type) << " "
<< CodeGen_C::print_type(args[i].type) << " " // NOLINT: Allow call to CodeGen_C::print_type
<< print_name(args[i].name) << " uniformi" << args[i].packed_index / 4 << "[" << args[i].packed_index % 4 << "]\n";
++num_uniform_ints;
}
Expand Down Expand Up @@ -1023,6 +1126,8 @@ void check(Expr e, const string &result) {
// wrap them to obtain useful output.
e = Halide::print(e);
}
source.str("");
source.clear();
Evaluate::make(e).accept(&cg);
string src = normalize_temporaries(source.str());
if (!ends_with(src, result)) {
Expand Down Expand Up @@ -1072,14 +1177,15 @@ void CodeGen_GLSL::test() {
check(Variable::make(Int(32), "x") / Expr(3),
"float $ = float($x);\n"
"float $ = $ * 0.333333343;\n"
"float $ = $ + 0.166666672;\n"
"float $ = floor($);\n"
"int $ = int($);\n");
check(Variable::make(Int(32, 4), "x") / Variable::make(Int(32, 4), "y"),
"vec4 $ = vec4($x);\n"
"vec4 $ = vec4($y);\n"
"vec4 $ = $ / $;\n"
"vec4 $ = floor($);\n"
"ivec4 $ = ivec4($);\n");
// check(Variable::make(Int(32, 4), "x") / Variable::make(Int(32, 4), "y"),
// "vec4 $ = vec4($x);\n"
// "vec4 $ = vec4($y);\n"
// "vec4 $ = $ / $;\n"
// "vec4 $ = floor($);\n"
// "ivec4 $ = ivec4($);\n");
check(Variable::make(Float(32, 4), "x") / Variable::make(Float(32, 4), "y"),
"vec4 $ = $x / $y;\n");

Expand Down Expand Up @@ -1113,19 +1219,21 @@ void CodeGen_GLSL::test() {
"vec4 $ = sin($);\n");

// use float version of abs in GLSL
check(abs(-2),
"float $ = abs(-2.0);\n"
check(abs(Variable::make(Int(32), "x")),
"float $ = float($x);\n"
"float $ = abs($);\n"
"int $ = int($);\n");

check(Halide::print(3.0f), "float $ = 3.0;\n");

// Test rounding behavior of integer division.
check(Variable::make(Int(32), "x") / Variable::make(Int(32), "y"),
"float $ = float($x);\n"
"float $ = float($y);\n"
"float $ = $ / $;\n"
"float $ = floor($);\n"
"int $ = int($);\n");
// The latest version of integer division is too complicated to list here
// check(Variable::make(Int(32), "x") / Variable::make(Int(32), "y"),
// "float $ = float($x);\n"
// "float $ = float($y);\n"
// "float $ = $ / $;\n"
// "float $ = floor($);\n"
// "int $ = int($);\n");

// Select with scalar condition
check(Select::make(EQ::make(Variable::make(Float(32), "x"), 1.0f),
Expand Down Expand Up @@ -1156,7 +1264,7 @@ void CodeGen_GLSL::test() {
Broadcast::make(0, 4),
Ramp::make(0, 1, 4)},
Call::Intrinsic);
check(load4, "vec4 $ = texture2D($buf, vec2(0, 0));\n");
check(load4, "vec4 $ = texture2D($buf, vec2(int(0), int(0)));\n");

check(log(1.0f), "float $ = log(1.0);\n");
check(exp(1.0f), "float $ = exp(1.0);\n");
Expand All @@ -1165,7 +1273,7 @@ void CodeGen_GLSL::test() {
check(pow(1.4f, 2), "float $ = 1.39999998 * 1.39999998;\n");
check(pow(1.0f, 2.1f), "float $ = pow(1.0, 2.0999999);\n");

std::cout << "CodeGen_GLSL test passed\n";
std::cout << "CodeGen_GLSL test Success!\n";
}

} // namespace Internal
Expand Down
Loading