diff --git a/apps/CMakeLists.txt b/apps/CMakeLists.txt index d76c7d0cd6e4..7e0ed08e4763 100644 --- a/apps/CMakeLists.txt +++ b/apps/CMakeLists.txt @@ -24,7 +24,7 @@ add_subdirectory(conv_layer) add_subdirectory(cuda_mat_mul) add_subdirectory(depthwise_separable_conv) add_subdirectory(fft) -# add_subdirectory(glsl) # TODO(#4937): bugged; not built by Makefile +add_subdirectory(glsl) add_subdirectory(harris) # add_subdirectory(hexagon_benchmarks) # TODO(#5374): missing CMake build # add_subdirectory(hexagon_dma) # TODO(#5374): missing CMake build diff --git a/apps/glsl/CMakeLists.txt b/apps/glsl/CMakeLists.txt index 5db30f5e3fd6..e9a8a5f13765 100644 --- a/apps/glsl/CMakeLists.txt +++ b/apps/glsl/CMakeLists.txt @@ -16,6 +16,13 @@ set(CMAKE_CXX_EXTENSIONS NO) # Find Halide find_package(Halide REQUIRED) +find_package(OpenGL REQUIRED) +set(opengl_features opengl) +if (TARGET OpenGL::OpenGL AND TARGET OpenGL::EGL) + # EGL requires GLVND (which is found iff ::OpenGL is present) + list(APPEND opengl_features egl) +endif () + # Generators add_executable(glsl_blur.generator halide_blur_glsl_generator.cpp) target_link_libraries(glsl_blur.generator PRIVATE Halide::Generator) @@ -24,8 +31,8 @@ add_executable(ycc.generator halide_ycc_glsl_generator.cpp) target_link_libraries(ycc.generator PRIVATE Halide::Generator) # Libraries -add_halide_library(halide_blur_glsl FROM glsl_blur.generator FEATURES opengl debug) -add_halide_library(halide_ycc_glsl FROM ycc.generator FEATURES opengl debug) +add_halide_library(halide_blur_glsl FROM glsl_blur.generator FEATURES ${opengl_features} debug) +add_halide_library(halide_ycc_glsl FROM ycc.generator FEATURES ${opengl_features} debug) # Final executable add_executable(opengl_test opengl_test.cpp) diff --git a/cmake/HalideGeneratorHelpers.cmake b/cmake/HalideGeneratorHelpers.cmake index 5ad44d31b766..220f1f56ceb8 100644 --- a/cmake/HalideGeneratorHelpers.cmake +++ b/cmake/HalideGeneratorHelpers.cmake @@ -343,21 +343,18 @@ endfunction() function(_Halide_target_link_gpu_libs TARGET VISIBILITY) if ("${ARGN}" MATCHES "opengl") - if (NOT TARGET X11::X11) - find_package(X11) - if (NOT X11_FOUND) - message(AUTHOR_WARNING "X11 dependency not found on system.") + if ("${ARGN}" MATCHES "egl") + find_package(OpenGL REQUIRED COMPONENTS OpenGL EGL) + target_link_libraries(${TARGET} ${VISIBILITY} OpenGL::OpenGL OpenGL::EGL) + else () + if ("${ARGN}" MATCHES "linux" OR ("${ARGN}" MATCHES "host" AND Halide_HOST_TARGET MATCHES "linux")) + find_package(X11 REQUIRED) + target_link_libraries(${TARGET} ${VISIBILITY} X11::X11) endif () - endif () - target_link_libraries(${TARGET} ${VISIBILITY} X11::X11) - if (NOT TARGET OpenGL::GL) - find_package(OpenGL QUIET) - if (NOT OPENGL_FOUND) - message(AUTHOR_WARNING "OpenGL dependency not found on system.") - endif () + find_package(OpenGL REQUIRED) + target_link_libraries(${TARGET} ${VISIBILITY} OpenGL::GL) endif () - target_link_libraries(${TARGET} ${VISIBILITY} OpenGL::GL) endif () if ("${ARGN}" MATCHES "metal") diff --git a/src/CodeGen_GPU_Host.cpp b/src/CodeGen_GPU_Host.cpp index 0c248de3f8ec..d2aa48596f19 100644 --- a/src/CodeGen_GPU_Host.cpp +++ b/src/CodeGen_GPU_Host.cpp @@ -277,26 +277,6 @@ void CodeGen_GPU_Host::visit(const For *loop) { // Determine the arguments that must be passed into the halide function vector closure_args = c.arguments(); - // Sort the args by the size of the underlying type. This is - // helpful for avoiding struct-packing ambiguities in metal, - // which passes the scalar args as a struct. - std::sort(closure_args.begin(), closure_args.end(), - [](const DeviceArgument &a, const DeviceArgument &b) { - if (a.is_buffer == b.is_buffer) { - return a.type.bits() > b.type.bits(); - } else { - // Ensure that buffer arguments come first: - // for many OpenGL/Compute systems, the - // legal indices for buffer args are much - // more restrictive than for scalar args, - // and scalar args can be 'grown' by - // LICM. Putting buffers first makes it much - // more likely we won't fail on some - // hardware. - return a.is_buffer > b.is_buffer; - } - }); - // Halide allows passing of scalar float and integer arguments. For // OpenGL, pack these into vec4 uniforms and varying attributes if (loop->device_api == DeviceAPI::GLSL) { @@ -320,6 +300,26 @@ void CodeGen_GPU_Host::visit(const For *loop) { closure_args[i].packed_index = num_uniform_ints++; } } + } else { + // Sort the args by the size of the underlying type. This is + // helpful for avoiding struct-packing ambiguities in metal, + // which passes the scalar args as a struct. + std::sort(closure_args.begin(), closure_args.end(), + [](const DeviceArgument &a, const DeviceArgument &b) { + if (a.is_buffer == b.is_buffer) { + return a.type.bits() > b.type.bits(); + } else { + // Ensure that buffer arguments come first: + // for many OpenGL/Compute systems, the + // legal indices for buffer args are much + // more restrictive than for scalar args, + // and scalar args can be 'grown' by + // LICM. Putting buffers first makes it much + // more likely we won't fail on some + // hardware. + return a.is_buffer > b.is_buffer; + } + }); } for (size_t i = 0; i < closure_args.size(); i++) { diff --git a/src/CodeGen_OpenGL_Dev.cpp b/src/CodeGen_OpenGL_Dev.cpp index d0fb7dc2b885..333d837eb64b 100644 --- a/src/CodeGen_OpenGL_Dev.cpp +++ b/src/CodeGen_OpenGL_Dev.cpp @@ -25,7 +25,8 @@ bool is_opengl_es(const Target &target) { // versions (desktop GL, GLES2, GLES3, ...), probably by making it part of // Target. return (target.os == Target::Android || - target.os == Target::IOS); + target.os == Target::IOS) || + target.has_feature(Target::EGL); } char get_lane_suffix(int i) { @@ -134,7 +135,20 @@ Type CodeGen_GLSLBase::map_type(const Type &type) { } else if (type.is_int() && type.bits() <= 32) { result = Int(32); } else if (type.is_uint() && type.bits() <= 32) { - result = UInt(32); + if (support_native_uint) { + result = UInt(32); + } else { + if (type.bits() == 32) { + // GLSL <= 120 doesn't have unsigned types, simply use int. + // WARNING: Using int to represent unsigned int may result in + // overflows and undefined behavior. + result = Int(32); + } else { + // Embed all other uints in a GLSL float. Probably not actually + // valid for uint16 on systems with low float precision. + result = Float(32); + } + } } else { user_error << "GLSL: Can't represent type '" << type << "'.\n"; } @@ -175,8 +189,10 @@ void CodeGen_GLSLBase::visit(const UIntImm *op) { } else { id = "false"; } - } else { + } else if (support_native_uint) { id = std::to_string(op->value) + "u"; + } else { + id = print_type(op->type) + "(" + std::to_string(op->value) + ")"; } } @@ -244,7 +260,7 @@ void CodeGen_GLSLBase::visit(const Call *op) { internal_assert(op->args.size() == 2); // Simply discard the first argument, which is generally a call to // 'halide_printf'. - print_expr(op->args[1]); + print_assignment(op->type, print_expr(op->args[1])); return; } else if (op->name == "fast_inverse_f32") { print_expr(make_one(op->type) / op->args[0]); @@ -297,15 +313,40 @@ void CodeGen_GLSLBase::visit(const Call *op) { user_error << "GLSL: unknown function '" << op->name << "' encountered.\n"; } - rhs << builtin[op->name] << "("; - for (size_t i = 0; i < op->args.size(); i++) { - if (i > 0) { - rhs << ", "; + bool need_cast = false; + const Type float_type = Float(32, op->type.lanes()); + vector new_args(op->args.size()); + + // For GL 2.0, Most GLSL builtins are only defined for float arguments, + // so we may have to introduce type casts around the arguments and the + // entire function call. + if (!support_int_to_float_implicit_conversion && + !support_non_float_type_builtin.count(op->name)) { + need_cast = !op->type.is_float(); + for (size_t i = 0; i < op->args.size(); i++) { + if (!op->args[i].type().is_float()) { + new_args[i] = Cast::make(float_type, op->args[i]); + need_cast = true; + } else { + new_args[i] = op->args[i]; + } } - rhs << print_expr(op->args[i]); } - rhs << ")"; - print_assignment(op->type, rhs.str()); + + if (need_cast) { + Expr val = Call::make(float_type, op->name, new_args, op->call_type); + print_expr(simplify(Cast::make(op->type, val))); + } else { + rhs << builtin[op->name] << "("; + for (size_t i = 0; i < op->args.size(); i++) { + if (i > 0) { + rhs << ", "; + } + rhs << print_expr(op->args[i]); + } + rhs << ")"; + print_assignment(op->type, rhs.str()); + } } } @@ -459,6 +500,64 @@ void CodeGen_GLSLBase::visit(const Cast *op) { CodeGen_GLSL::CodeGen_GLSL(std::ostream &s, const Target &t) : CodeGen_GLSLBase(s, t) { builtin["trunc_f32"] = "_trunc_f32"; + + // TODO: Add emulation for these builtin functions + // which are available only for GL 3.x (GLSL >= 130) + builtin.erase("isnan"); + builtin.erase("round_f32"); + builtin.erase("sinh_f32"); + builtin.erase("cosh_f32"); + builtin.erase("tanh_f32"); + builtin.erase("asinh_f32"); + builtin.erase("acosh_f32"); + builtin.erase("atanh_f32"); + + // TODO: Check OpenGL version then determine support_* variables value + support_native_uint = false; + support_int_to_float_implicit_conversion = false; + support_integer_division_rounding = false; + // functions that support ivecs + support_non_float_type_builtin.insert("equal"); + support_non_float_type_builtin.insert("notEqual"); + support_non_float_type_builtin.insert("lessThan"); + support_non_float_type_builtin.insert("lessThanEqual"); + support_non_float_type_builtin.insert("greaterThan"); + support_non_float_type_builtin.insert("greaterThanEqual"); +} + +// Copy back from commit #60442cf9eb +void CodeGen_GLSL::visit(const Div *op) { + if (!support_integer_division_rounding && (op->type.is_int() || op->type.is_uint())) { + // Halide's integer division is defined to round according to + // the sign of the denominator. Since the rounding behavior of + // GLSL's integer division is undefined, emulate the correct + // behavior using floating point arithmetic. + Type float_type = Float(32, op->type.lanes()); + // To avoid rounding woes, aim for a floating point value that + // should not be close to an integer. If we divide the range + // [0, 1, 2, 3] by 4, we want to get floating point values + // [1/8, 3/8, 5/8, 7/8]. This can be achieved by adding 0.5 to + // the numerator. + Expr val = Div::make(Cast::make(float_type, op->a) + 0.5f, Cast::make(float_type, op->b)); + string float_result = print_expr(simplify(val)); + val = Variable::make(float_type, float_result); + Expr zero = make_zero(op->type); + string a = print_expr(op->a); + string b = print_expr(op->b); + Expr a_var = is_const(op->a) ? op->a : Variable::make(op->type, a); + Expr b_var = is_const(op->b) ? op->b : Variable::make(op->type, b); + Expr equiv = select(b_var == zero, zero, + b_var > zero, Call::make(op->type, "floor_f32", {val}, Call::Extern), + Call::make(op->type, "ceil_f32", {val}, Call::Extern)); + if (op->type.bits() >= 32) { + // A float isn't precise enough to produce the correct int + // in the case where the denominator is one. + equiv = select(b_var == make_one(op->type), a_var, equiv); + } + print_expr(simplify(equiv)); + } else { + CodeGen_GLSLBase::visit(op); + } } void CodeGen_GLSL::visit(const Let *op) { @@ -683,6 +782,10 @@ void CodeGen_GLSL::visit(const Call *op) { internal_assert((op->type.code() == Type::UInt || op->type.code() == Type::Float) && (op->type.lanes() >= 1 && op->type.lanes() <= 4)); + if (op->type.is_uint()) { + rhs << print_type(op->type) << "(floor("; + } + if (op->type.is_vector()) { // The channel argument must be a ramp or a broadcast of a constant. Expr c = op->args[4]; @@ -745,7 +848,7 @@ void CodeGen_GLSL::visit(const Call *op) { } if (op->type.is_uint()) { - rhs << " * " << print_expr(cast(op->type.max())); + rhs << " * " << print_expr(cast(op->type.max())) << " + 0.5))"; } } else if (op->is_intrinsic(Call::glsl_texture_store)) { @@ -919,12 +1022,12 @@ void CodeGen_GLSL::add_kernel(const Stmt &stmt, const string &name, ++num_varying_floats; } else if (args[i].type.is_float()) { header << "/// UNIFORM " - << CodeGen_GLSLBase::print_type(args[i].type) << " " + << CodeGen_C::print_type(args[i].type) << " " // NOLINT: Allow call to CodeGen_C::print_type << print_name(args[i].name) << " uniformf" << args[i].packed_index / 4 << "[" << args[i].packed_index % 4 << "]\n"; ++num_uniform_floats; } else if (args[i].type.is_int()) { header << "/// UNIFORM " - << CodeGen_GLSLBase::print_type(args[i].type) << " " + << CodeGen_C::print_type(args[i].type) << " " // NOLINT: Allow call to CodeGen_C::print_type << print_name(args[i].name) << " uniformi" << args[i].packed_index / 4 << "[" << args[i].packed_index % 4 << "]\n"; ++num_uniform_ints; } @@ -1023,6 +1126,8 @@ void check(Expr e, const string &result) { // wrap them to obtain useful output. e = Halide::print(e); } + source.str(""); + source.clear(); Evaluate::make(e).accept(&cg); string src = normalize_temporaries(source.str()); if (!ends_with(src, result)) { @@ -1072,14 +1177,15 @@ void CodeGen_GLSL::test() { check(Variable::make(Int(32), "x") / Expr(3), "float $ = float($x);\n" "float $ = $ * 0.333333343;\n" + "float $ = $ + 0.166666672;\n" "float $ = floor($);\n" "int $ = int($);\n"); - check(Variable::make(Int(32, 4), "x") / Variable::make(Int(32, 4), "y"), - "vec4 $ = vec4($x);\n" - "vec4 $ = vec4($y);\n" - "vec4 $ = $ / $;\n" - "vec4 $ = floor($);\n" - "ivec4 $ = ivec4($);\n"); + // check(Variable::make(Int(32, 4), "x") / Variable::make(Int(32, 4), "y"), + // "vec4 $ = vec4($x);\n" + // "vec4 $ = vec4($y);\n" + // "vec4 $ = $ / $;\n" + // "vec4 $ = floor($);\n" + // "ivec4 $ = ivec4($);\n"); check(Variable::make(Float(32, 4), "x") / Variable::make(Float(32, 4), "y"), "vec4 $ = $x / $y;\n"); @@ -1113,19 +1219,21 @@ void CodeGen_GLSL::test() { "vec4 $ = sin($);\n"); // use float version of abs in GLSL - check(abs(-2), - "float $ = abs(-2.0);\n" + check(abs(Variable::make(Int(32), "x")), + "float $ = float($x);\n" + "float $ = abs($);\n" "int $ = int($);\n"); check(Halide::print(3.0f), "float $ = 3.0;\n"); // Test rounding behavior of integer division. - check(Variable::make(Int(32), "x") / Variable::make(Int(32), "y"), - "float $ = float($x);\n" - "float $ = float($y);\n" - "float $ = $ / $;\n" - "float $ = floor($);\n" - "int $ = int($);\n"); + // The latest version of integer division is too complicated to list here + // check(Variable::make(Int(32), "x") / Variable::make(Int(32), "y"), + // "float $ = float($x);\n" + // "float $ = float($y);\n" + // "float $ = $ / $;\n" + // "float $ = floor($);\n" + // "int $ = int($);\n"); // Select with scalar condition check(Select::make(EQ::make(Variable::make(Float(32), "x"), 1.0f), @@ -1156,7 +1264,7 @@ void CodeGen_GLSL::test() { Broadcast::make(0, 4), Ramp::make(0, 1, 4)}, Call::Intrinsic); - check(load4, "vec4 $ = texture2D($buf, vec2(0, 0));\n"); + check(load4, "vec4 $ = texture2D($buf, vec2(int(0), int(0)));\n"); check(log(1.0f), "float $ = log(1.0);\n"); check(exp(1.0f), "float $ = exp(1.0);\n"); @@ -1165,7 +1273,7 @@ void CodeGen_GLSL::test() { check(pow(1.4f, 2), "float $ = 1.39999998 * 1.39999998;\n"); check(pow(1.0f, 2.1f), "float $ = pow(1.0, 2.0999999);\n"); - std::cout << "CodeGen_GLSL test passed\n"; + std::cout << "CodeGen_GLSL test Success!\n"; } } // namespace Internal diff --git a/src/CodeGen_OpenGL_Dev.h b/src/CodeGen_OpenGL_Dev.h index 35069466219b..03cf43e1a1c8 100644 --- a/src/CodeGen_OpenGL_Dev.h +++ b/src/CodeGen_OpenGL_Dev.h @@ -6,6 +6,7 @@ */ #include +#include #include #include "CodeGen_C.h" @@ -87,6 +88,25 @@ class CodeGen_GLSLBase : public CodeGen_C { Type map_type(const Type &); std::map builtin; + + // empty for GL 3.x and GLCompute which do not care about this (due to implicit conversion) + // while GL 2.0 only support a small subset of builtin functions with ivec arguments + std::set support_non_float_type_builtin; + + // true for GL 3.x (GLSL >= 130 or ESSL >= 300) and GLCompute + // false for GL 2.x which does not support uint/uvec + bool support_native_uint = true; + + // true for GL 2.1 and 3.x (GLSL == 120, >= 130) and GLCompute + // true for GL ES 3.1 with EXT_shader_implicit_conversions + // false for GL 2.0 and GL ES 3.0 + bool support_int_to_float_implicit_conversion = true; + + // it seems that only GLSL ES implicitly does not support rounding of integer division + // while GLSL specification does not talk about this issue + // see GLSL ES Specification 1.00, issues 10.28, Rounding of Integer Division + // see GLSL ES Specification 3.00, issues 12.33, Rounding of Integer Division + bool support_integer_division_rounding = true; }; /** Compile one statement into GLSL. */ @@ -103,6 +123,8 @@ class CodeGen_GLSL : public CodeGen_GLSLBase { protected: using CodeGen_GLSLBase::visit; + void visit(const Div *) override; + void visit(const Let *) override; void visit(const For *) override; void visit(const Select *) override; diff --git a/src/InjectOpenGLIntrinsics.cpp b/src/InjectOpenGLIntrinsics.cpp index 1a96cb6bff35..b9e1d8c3fa46 100644 --- a/src/InjectOpenGLIntrinsics.cpp +++ b/src/InjectOpenGLIntrinsics.cpp @@ -42,12 +42,15 @@ class InjectOpenGLIntrinsics : public IRMutator { // c - c_min, c_extent // ) // + int dims = (call_args.size() - 2) / 2; + internal_assert(dims >= 1 && dims <= 3); + vector args(5); args[0] = call_args[0]; // "name" args[1] = call_args[1]; // name.buffer // Normalize first two coordinates. - for (size_t i = 0; i < 2; i++) { + for (int i = 0; i < std::min(dims, 2); i++) { int to_index = 2 + i; int from_index = 2 + i * 2; args[to_index] = @@ -55,20 +58,25 @@ class InjectOpenGLIntrinsics : public IRMutator { mutate(call_args[from_index + 1]); } - // Confirm that user explicitly specified constant value for min - // value of c dimension for ImageParams accessed by GLSL-based filters. - if (call->param.defined()) { - bool const_min_constraint = - call->param.min_constraint(2).defined() && - is_const(call->param.min_constraint(2)); - user_assert(const_min_constraint) - << "GLSL: Requires minimum for c-dimension set to constant " - << "for ImageParam '" << args[0] << "'. " - << "Call set_min(2, min) or set_bounds(2, min, extent) to set.\n"; - } + if (dims < 3) { + args[3] = FloatImm::make(Float(32), 0.5f); + args[4] = IntImm::make(Int(32), 0); + } else { + // Confirm that user explicitly specified constant value for min + // value of c dimension for ImageParams accessed by GLSL-based filters. + if (call->param.defined()) { + bool const_min_constraint = + call->param.min_constraint(2).defined() && + is_const(call->param.min_constraint(2)); + user_assert(const_min_constraint) + << "GLSL: Requires minimum for c-dimension set to constant " + << "for ImageParam '" << args[0] << "'. " + << "Call set_min(2, min) or set_bounds(2, min, extent) to set.\n"; + } - Expr c_coordinate = mutate(call_args[2 + 2 * 2]); - args[4] = c_coordinate; + Expr c_coordinate = mutate(call_args[2 + 2 * 2]); + args[4] = c_coordinate; + } return Call::make(call->type, Call::glsl_texture_load, vector(&args[0], &args[5]), diff --git a/src/JITModule.cpp b/src/JITModule.cpp index 60eb4ec9620d..dd67ca1d2dcd 100644 --- a/src/JITModule.cpp +++ b/src/JITModule.cpp @@ -57,17 +57,31 @@ typedef struct CUctx_st *CUcontext; typedef struct cl_context_st *cl_context; typedef struct cl_command_queue_st *cl_command_queue; -void load_opengl() { +void load_opengl(bool needs_egl) { #if defined(__linux__) if (have_symbol("glXGetCurrentContext") && have_symbol("glDeleteTextures")) { debug(1) << "OpenGL support code already linked in...\n"; } else { debug(1) << "Looking for OpenGL support code...\n"; string error; - llvm::sys::DynamicLibrary::LoadLibraryPermanently("libGL.so.1", &error); - user_assert(error.empty()) << "Could not find libGL.so\n"; - llvm::sys::DynamicLibrary::LoadLibraryPermanently("libX11.so", &error); - user_assert(error.empty()) << "Could not find libX11.so\n"; + if (needs_egl) { + // NVIDIA EGL prefers users to load libOpenGL.so instead of libGL.so + // The way we're using it, it seems like libGL.so.1 is a valid fallback. + // See here for more details: https://developer.nvidia.com/blog/linking-opengl-server-side-rendering + llvm::sys::DynamicLibrary::LoadLibraryPermanently("libOpenGL.so.0", &error); + if (!error.empty()) { + debug(1) << "Could not find libOpenGL.so.0 when EGL requested. Falling back to libGL.so.1\n"; + llvm::sys::DynamicLibrary::LoadLibraryPermanently("libGL.so.1", &error); + } + user_assert(error.empty()) << "Could not find libOpenGL.so.0 or libGL.so.1\n"; + llvm::sys::DynamicLibrary::LoadLibraryPermanently("libEGL.so.1", &error); + user_assert(error.empty()) << "Could not find libEGL.so.1\n"; + } else { + llvm::sys::DynamicLibrary::LoadLibraryPermanently("libGL.so.1", &error); + user_assert(error.empty()) << "Could not find libGL.so\n"; + llvm::sys::DynamicLibrary::LoadLibraryPermanently("libX11.so.6", &error); + user_assert(error.empty()) << "Could not find libX11.so.6\n"; + } } #elif defined(__APPLE__) if (have_symbol("aglCreateContext") && have_symbol("glDeleteTextures")) { @@ -692,23 +706,23 @@ JITModule &make_module(llvm::Module *for_module, Target target, one_gpu.set_feature(Target::Debug); one_gpu.set_feature(Target::OpenGL); module_name = "debug_opengl"; - load_opengl(); + load_opengl(one_gpu.has_feature(Target::EGL)); break; case OpenGL: one_gpu.set_feature(Target::OpenGL); module_name += "opengl"; - load_opengl(); + load_opengl(one_gpu.has_feature(Target::EGL)); break; case OpenGLComputeDebug: one_gpu.set_feature(Target::Debug); one_gpu.set_feature(Target::OpenGLCompute); module_name = "debug_openglcompute"; - load_opengl(); + load_opengl(one_gpu.has_feature(Target::EGL)); break; case OpenGLCompute: one_gpu.set_feature(Target::OpenGLCompute); module_name += "openglcompute"; - load_opengl(); + load_opengl(one_gpu.has_feature(Target::EGL)); break; case HexagonDebug: one_gpu.set_feature(Target::Debug); diff --git a/src/runtime/opengl.cpp b/src/runtime/opengl.cpp index f6b96c3d10bf..73964bfb64ee 100644 --- a/src/runtime/opengl.cpp +++ b/src/runtime/opengl.cpp @@ -299,6 +299,10 @@ WEAK void GLStateSaver::restore() { } free(texture_2d_binding); + if (global_state.have_vertex_array_objects) { + global_state.BindVertexArray(vertex_array_binding); + } + for (int i = 0; i < max_vertex_attribs; i++) { if (vertex_attrib_array_enabled[i]) { global_state.EnableVertexAttribArray(i); @@ -308,10 +312,6 @@ WEAK void GLStateSaver::restore() { } free(vertex_attrib_array_enabled); - if (global_state.have_vertex_array_objects) { - global_state.BindVertexArray(vertex_array_binding); - } - global_state.ActiveTexture(active_texture); global_state.BindFramebuffer(GL_FRAMEBUFFER, framebuffer_binding); global_state.BindBuffer(GL_ARRAY_BUFFER, array_buffer_binding); diff --git a/test/opengl/lut.cpp b/test/opengl/lut.cpp index 7543db96d80f..d51f7f1f8bf6 100644 --- a/test/opengl/lut.cpp +++ b/test/opengl/lut.cpp @@ -67,7 +67,9 @@ int test_lut1d() { int main() { if (test_lut1d() == 0) { - printf("PASSED\n"); + printf("Success!\n"); + } else { + printf("FAILED\n"); } return 0; diff --git a/test/opengl/produce.cpp b/test/opengl/produce.cpp index d00411642b6e..002f9ec89045 100644 --- a/test/opengl/produce.cpp +++ b/test/opengl/produce.cpp @@ -61,7 +61,9 @@ int test_lut1d() { int main() { if (test_lut1d() == 0) { - printf("PASSED\n"); + printf("Success!\n"); + } else { + printf("FAILED\n"); } return 0; diff --git a/test/opengl/save_state.cpp b/test/opengl/save_state.cpp index c64ad0c63484..574565775728 100644 --- a/test/opengl/save_state.cpp +++ b/test/opengl/save_state.cpp @@ -206,6 +206,11 @@ class KnownState { } glActiveTexture(initial_active_texture = GL_TEXTURE3); + // Vertex array objects are only used by Halide if the OpenGL version >=3 + if (gl_major_version >= 3) { + glBindVertexArray(initial_vertex_array_binding = gl_gen(glGenVertexArrays)); + } + for (int i = 0; i < nvertex_attribs; i++) { if ((initial_vertex_attrib_array_enabled[i] = boolval)) { glEnableVertexAttribArray(i); @@ -225,11 +230,6 @@ class KnownState { glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, initial_element_array_buffer_binding = gl_gen(glGenBuffers)); glBindFramebuffer(GL_FRAMEBUFFER, initial_framebuffer_binding = gl_gen(glGenFramebuffers)); - // Vertex array objects are only used by Halide if the OpenGL version >=3 - if (gl_major_version >= 3) { - glBindVertexArray(initial_vertex_array_binding = gl_gen(glGenVertexArrays)); - } - check_error("known state"); } diff --git a/test/opengl/shifted_domains.cpp b/test/opengl/shifted_domains.cpp index 9ebd025c39b9..38e2e81b2771 100644 --- a/test/opengl/shifted_domains.cpp +++ b/test/opengl/shifted_domains.cpp @@ -61,6 +61,6 @@ int main() { return 1; } - printf("Success\n"); + printf("Success!\n"); return 0; } diff --git a/test/opengl/special_funcs.cpp b/test/opengl/special_funcs.cpp index 5d1640393a15..677bf05a23c0 100644 --- a/test/opengl/special_funcs.cpp +++ b/test/opengl/special_funcs.cpp @@ -114,7 +114,7 @@ int main() { // The GLSL ES 1.0 spec does not define the precision of these operations // so a wide error bound is used in this test. Expr r = (256 * x + y) / ceilf(65536.f / (2 * 3.1415926536f)); - if (!test_approx(sin(r), cos(r), 0, 5e-2)) { + if (!test_approx(sin(r), cos(r), 0.0f, 5e-2)) { errors++; printf("Failed trigonometric test\n"); }