diff --git a/Makefile b/Makefile index 921f8e388e3b..4b2ca0e8d54b 100644 --- a/Makefile +++ b/Makefile @@ -424,7 +424,6 @@ SOURCE_FILES = \ CodeGen_Metal_Dev.cpp \ CodeGen_MIPS.cpp \ CodeGen_OpenCL_Dev.cpp \ - CodeGen_OpenGL_Dev.cpp \ CodeGen_OpenGLCompute_Dev.cpp \ CodeGen_Posix.cpp \ CodeGen_PowerPC.cpp \ @@ -595,7 +594,6 @@ HEADER_FILES = \ CodeGen_Metal_Dev.h \ CodeGen_MIPS.h \ CodeGen_OpenCL_Dev.h \ - CodeGen_OpenGL_Dev.h \ CodeGen_OpenGLCompute_Dev.h \ CodeGen_Posix.h \ CodeGen_PowerPC.h \ diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 0b45adf43715..8a826a02e835 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -33,7 +33,6 @@ set(HEADER_FILES CodeGen_Metal_Dev.h CodeGen_MIPS.h CodeGen_OpenCL_Dev.h - CodeGen_OpenGL_Dev.h CodeGen_OpenGLCompute_Dev.h CodeGen_Posix.h CodeGen_PowerPC.h @@ -197,7 +196,6 @@ set(SOURCE_FILES CodeGen_Metal_Dev.cpp CodeGen_MIPS.cpp CodeGen_OpenCL_Dev.cpp - CodeGen_OpenGL_Dev.cpp CodeGen_OpenGLCompute_Dev.cpp CodeGen_Posix.cpp CodeGen_PowerPC.cpp diff --git a/src/CodeGen_GPU_Host.cpp b/src/CodeGen_GPU_Host.cpp index 7f3d3a46fb68..a151a4f2ca14 100644 --- a/src/CodeGen_GPU_Host.cpp +++ b/src/CodeGen_GPU_Host.cpp @@ -8,7 +8,6 @@ #include "CodeGen_Metal_Dev.h" #include "CodeGen_OpenCL_Dev.h" #include "CodeGen_OpenGLCompute_Dev.h" -#include "CodeGen_OpenGL_Dev.h" #include "CodeGen_PTX_Dev.h" #include "CodeGen_PowerPC.h" #include "CodeGen_RISCV.h" diff --git a/src/CodeGen_OpenGLCompute_Dev.cpp b/src/CodeGen_OpenGLCompute_Dev.cpp index 1be9045bb1d1..be2c3afd2cc2 100644 --- a/src/CodeGen_OpenGLCompute_Dev.cpp +++ b/src/CodeGen_OpenGLCompute_Dev.cpp @@ -1,6 +1,7 @@ #include "CodeGen_OpenGLCompute_Dev.h" +#include "CSE.h" +#include "CodeGen_C.h" #include "CodeGen_GPU_Dev.h" -#include "CodeGen_OpenGL_Dev.h" #include "Debug.h" #include "Deinterleave.h" #include "IRMatch.h" @@ -20,6 +21,484 @@ using std::vector; namespace { +char get_lane_suffix(int i) { + internal_assert(i >= 0 && i < 4); + return "rgba"[i]; +} + +/** + * This class handles GLSL arithmetic. + * TODO: combine this with CodeGen_OpenGLCompute_C, which is now the only subclass + * (unless it ends up being useful for Vulkan in the future?) + */ +class CodeGen_GLSLBase : public CodeGen_C { +public: + CodeGen_GLSLBase(std::ostream &s, Target t); + + std::string print_name(const std::string &name) override; + std::string print_type(Type type, AppendSpaceIfNeeded space_option = DoNotAppendSpace) override; + +protected: + using CodeGen_C::visit; + + void visit(const Cast *) override; + + void visit(const FloatImm *) override; + void visit(const UIntImm *) override; + void visit(const IntImm *) override; + + void visit(const Max *op) override; + void visit(const Min *op) override; + void visit(const Call *op) override; + + void visit(const Mod *) override; + + // these have specific functions + // in GLSL that operate on vectors + void visit(const EQ *) override; + void visit(const NE *) override; + void visit(const LT *) override; + void visit(const LE *) override; + void visit(const GT *) override; + void visit(const GE *) override; + + void visit(const Shuffle *) override; + + Type map_type(const Type &); + + std::map builtin; + + // empty for GL 3.x and GLCompute which do not care about this (due to implicit conversion) + // while GL 2.0 only support a small subset of builtin functions with ivec arguments + std::set support_non_float_type_builtin; + + // true for GL 3.x (GLSL >= 130 or ESSL >= 300) and GLCompute + // false for GL 2.x which does not support uint/uvec + bool support_native_uint = true; + + // true for GL 2.1 and 3.x (GLSL == 120, >= 130) and GLCompute + // true for GL ES 3.1 with EXT_shader_implicit_conversions + // false for GL 2.0 and GL ES 3.0 + bool support_int_to_float_implicit_conversion = true; + + // it seems that only GLSL ES implicitly does not support rounding of integer division + // while GLSL specification does not talk about this issue + // see GLSL ES Specification 1.00, issues 10.28, Rounding of Integer Division + // see GLSL ES Specification 3.00, issues 12.33, Rounding of Integer Division + bool support_integer_division_rounding = true; +}; + +CodeGen_GLSLBase::CodeGen_GLSLBase(std::ostream &s, Target target) + : CodeGen_C(s, target) { + builtin["sin_f32"] = "sin"; + builtin["sqrt_f32"] = "sqrt"; + builtin["cos_f32"] = "cos"; + builtin["exp_f32"] = "exp"; + builtin["log_f32"] = "log"; + builtin["abs_f32"] = "abs"; + builtin["floor_f32"] = "floor"; + builtin["ceil_f32"] = "ceil"; + builtin["asin_f32"] = "asin"; + builtin["acos_f32"] = "acos"; + builtin["tan_f32"] = "tan"; + builtin["atan_f32"] = "atan"; + builtin["atan2_f32"] = "atan"; // also called atan in GLSL + builtin["sinh_f32"] = "sinh"; + builtin["cosh_f32"] = "cosh"; + builtin["tanh_f32"] = "tanh"; + builtin["asinh_f32"] = "asinh"; + builtin["acosh_f32"] = "acosh"; + builtin["atanh_f32"] = "atanh"; + builtin["min"] = "min"; + builtin["max"] = "max"; + builtin["mix"] = "mix"; + builtin["mod"] = "mod"; + builtin["abs"] = "abs"; + builtin["isnan"] = "isnan"; + builtin["round_f32"] = "roundEven"; + builtin["fast_inverse_sqrt_f32"] = "inversesqrt"; + + // functions that produce bvecs + builtin["equal"] = "equal"; + builtin["notEqual"] = "notEqual"; + builtin["lessThan"] = "lessThan"; + builtin["lessThanEqual"] = "lessThanEqual"; + builtin["greaterThan"] = "greaterThan"; + builtin["greaterThanEqual"] = "greaterThanEqual"; +} + +// Maps Halide types to appropriate GLSL types or emit error if no equivalent +// type is available. +Type CodeGen_GLSLBase::map_type(const Type &type) { + Type result = type; + if (type.is_scalar()) { + if (type.is_float()) { + user_assert(type.bits() <= 32) + << "GLSL: Can't represent a float with " << type.bits() << " bits.\n"; + result = Float(32); + } else if (type.is_bool()) { + // unchanged + } else if (type.is_int() && type.bits() <= 32) { + result = Int(32); + } else if (type.is_uint() && type.bits() <= 32) { + if (support_native_uint) { + result = UInt(32); + } else { + if (type.bits() == 32) { + // GLSL <= 120 doesn't have unsigned types, simply use int. + // WARNING: Using int to represent unsigned int may result in + // overflows and undefined behavior. + result = Int(32); + } else { + // Embed all other uints in a GLSL float. Probably not actually + // valid for uint16 on systems with low float precision. + result = Float(32); + } + } + } else { + user_error << "GLSL: Can't represent type '" << type << "'.\n"; + } + } else { + user_assert(type.lanes() <= 4) + << "GLSL: vector types wider than 4 aren't supported\n"; + user_assert(type.is_bool() || type.is_int() || type.is_uint() || type.is_float()) + << "GLSL: Can't represent vector type '" << type << "'.\n"; + Type scalar_type = type.element_of(); + result = map_type(scalar_type).with_lanes(type.lanes()); + } + return result; +} + +void CodeGen_GLSLBase::visit(const FloatImm *op) { + ostringstream oss; + // Print integral numbers with trailing ".0". For fractional numbers use a + // precision of 9 digits, which should be enough to recover the binary + // float unambiguously from the decimal representation (if iostreams + // implements correct rounding). + const float truncated = (op->value < 0 ? std::ceil(op->value) : std::floor(op->value)); + if (truncated == op->value) { + oss << std::fixed << std::setprecision(1) << op->value; + } else { + oss << std::setprecision(9) << op->value; + } + id = oss.str(); +} + +void CodeGen_GLSLBase::visit(const IntImm *op) { + id = print_type(op->type) + "(" + std::to_string(op->value) + ")"; +} + +void CodeGen_GLSLBase::visit(const UIntImm *op) { + if (op->type == Bool()) { + if (op->value == 1) { + id = "true"; + } else { + id = "false"; + } + } else if (support_native_uint) { + id = std::to_string(op->value) + "u"; + } else { + id = print_type(op->type) + "(" + std::to_string(op->value) + ")"; + } +} + +void CodeGen_GLSLBase::visit(const Max *op) { + print_expr(Call::make(op->type, "max", {op->a, op->b}, Call::PureExtern)); +} + +void CodeGen_GLSLBase::visit(const Min *op) { + print_expr(Call::make(op->type, "min", {op->a, op->b}, Call::PureExtern)); +} + +void CodeGen_GLSLBase::visit(const Mod *op) { + if (op->type.is_int() || op->type.is_uint()) { + // Just exploit the Euclidean identity + // FIXME: Why doesn't lower_euclidean_mod work for glsl? + // https://github.com/halide/Halide/issues/4979 + Expr zero = make_zero(op->type); + Expr equiv = select(op->a == zero, zero, + op->a - (op->a / op->b) * op->b); + equiv = common_subexpression_elimination(equiv); + print_expr(equiv); + } else { + print_expr(Call::make(op->type, "mod", {op->a, op->b}, Call::Extern)); + } +} + +void CodeGen_GLSLBase::visit(const Call *op) { + if (op->is_intrinsic(Call::lerp)) { + // Implement lerp using GLSL's mix() function, which always uses + // floating point arithmetic. + Expr zero_val = op->args[0]; + Expr one_val = op->args[1]; + Expr weight = op->args[2]; + + internal_assert(weight.type().is_uint() || weight.type().is_float()); + if (weight.type().is_uint()) { + // Normalize integer weights to [0.0f, 1.0f] range. + internal_assert(weight.type().bits() < 32); + weight = Div::make(Cast::make(Float(32), weight), + Cast::make(Float(32), weight.type().max())); + } else if (op->type.is_uint()) { + // Round float weights down to next multiple of (1/op->type.imax()) + // to give same results as lerp based on integer arithmetic. + internal_assert(op->type.bits() < 32); + weight = floor(weight * op->type.max()) / op->type.max(); + } + + Type result_type = Float(32, op->type.lanes()); + Expr e = Call::make(result_type, "mix", {zero_val, one_val, weight}, Call::Extern); + + if (!op->type.is_float()) { + // Mirror rounding implementation of Halide's integer lerp. + e = Cast::make(op->type, floor(e + 0.5f)); + } + print_expr(e); + return; + } else if (op->is_intrinsic(Call::absd)) { + internal_assert(op->args.size() == 2); + Expr a = op->args[0]; + Expr b = op->args[1]; + Expr e = cast(op->type, select(a < b, b - a, a - b)); + print_expr(e); + return; + } else if (op->is_intrinsic(Call::return_second)) { + internal_assert(op->args.size() == 2); + // Simply discard the first argument, which is generally a call to + // 'halide_printf'. + print_assignment(op->type, print_expr(op->args[1])); + return; + } else if (op->name == "fast_inverse_f32") { + print_expr(make_one(op->type) / op->args[0]); + return; + } else if (op->name == "fast_inverse_sqrt_f32") { + print_expr(make_one(op->type) / sqrt(op->args[0])); + return; + } else if (op->name == "pow_f32") { + if (can_prove(op->args[0] > 0)) { + ostringstream rhs; + rhs << "pow(" << print_expr(op->args[0]) << ", " << print_expr(op->args[1]) << ")"; + print_assignment(op->type, rhs.str()); + return; + } else { + ostringstream base; + string a = print_expr(op->args[0]); + string b = print_expr(op->args[1]); + base << "pow(abs(" << a << "), " << b << ")"; + string c = print_assignment(op->type, base.str()); + Expr a_var = is_const(op->args[0]) ? op->args[0] : Variable::make(op->type, a); + Expr b_var = is_const(op->args[1]) ? op->args[1] : Variable::make(op->type, b); + Expr c_var = Variable::make(op->type, c); + // OpenGL isn't required to produce NaNs, so we return + // zero in the undefined case. + Expr equiv = select(a_var > 0 || b_var % 2 == 0, c_var, + b_var % 2 == 1, -c_var, + 0.0f); + print_expr(simplify(equiv)); + return; + } + } else if (op->is_intrinsic(Call::shift_right)) { + print_assignment(op->type, print_expr(op->args[0]) + " >> " + print_expr(op->args[1])); + } else if (op->is_intrinsic(Call::shift_left)) { + print_assignment(op->type, print_expr(op->args[0]) + " << " + print_expr(op->args[1])); + } else if (op->is_intrinsic(Call::bitwise_not)) { + print_assignment(op->type, "~" + print_expr(op->args[0])); + } else if (op->is_intrinsic(Call::bitwise_and)) { + print_assignment(op->type, print_expr(op->args[0]) + " & " + print_expr(op->args[1])); + } else if (op->is_intrinsic(Call::bitwise_or)) { + print_assignment(op->type, print_expr(op->args[0]) + " | " + print_expr(op->args[1])); + } else if (op->is_intrinsic(Call::bitwise_xor)) { + print_assignment(op->type, print_expr(op->args[0]) + " ^ " + print_expr(op->args[1])); + } else if (op->is_intrinsic(Call::div_round_to_zero)) { + print_assignment(op->type, print_expr(op->args[0]) + " / " + print_expr(op->args[1])); + } else if (op->is_intrinsic(Call::mod_round_to_zero)) { + print_assignment(op->type, print_expr(op->args[0]) + " % " + print_expr(op->args[1])); + } else { + ostringstream rhs; + if (builtin.count(op->name) == 0) { + user_error << "GLSL: unknown function '" << op->name << "' encountered.\n"; + } + + bool need_cast = false; + const Type float_type = Float(32, op->type.lanes()); + vector new_args(op->args.size()); + + // For GL 2.0, Most GLSL builtins are only defined for float arguments, + // so we may have to introduce type casts around the arguments and the + // entire function call. + if (!support_int_to_float_implicit_conversion && + !support_non_float_type_builtin.count(op->name)) { + need_cast = !op->type.is_float(); + for (size_t i = 0; i < op->args.size(); i++) { + if (!op->args[i].type().is_float()) { + new_args[i] = Cast::make(float_type, op->args[i]); + need_cast = true; + } else { + new_args[i] = op->args[i]; + } + } + } + + if (need_cast) { + Expr val = Call::make(float_type, op->name, new_args, op->call_type); + print_expr(simplify(Cast::make(op->type, val))); + } else { + rhs << builtin[op->name] << "("; + for (size_t i = 0; i < op->args.size(); i++) { + if (i > 0) { + rhs << ", "; + } + rhs << print_expr(op->args[i]); + } + rhs << ")"; + print_assignment(op->type, rhs.str()); + } + } +} + +string CodeGen_GLSLBase::print_type(Type type, AppendSpaceIfNeeded space_option) { + ostringstream oss; + type = map_type(type); + if (type.is_scalar()) { + if (type.is_float()) { + oss << "float"; + } else if (type.is_bool()) { + oss << "bool"; + } else if (type.is_int()) { + oss << "int"; + } else if (type.is_uint()) { + oss << "uint"; + } else { + internal_error << "GLSL: invalid type '" << type << "' encountered.\n"; + } + } else { + if (type.is_float()) { + // no prefix for float vectors + } else if (type.is_bool()) { + oss << "b"; + } else if (type.is_int()) { + oss << "i"; + } else if (type.is_uint()) { + oss << "u"; + } else { + internal_error << "GLSL: invalid type '" << type << "' encountered.\n"; + } + oss << "vec" << type.lanes(); + } + + if (space_option == AppendSpace) { + oss << " "; + } + + return oss.str(); +} + +// The following comparisons are defined for ivec and vec +// types, so we don't use call_builtin +void CodeGen_GLSLBase::visit(const EQ *op) { + if (op->type.is_vector()) { + print_expr(Call::make(op->type, "equal", {op->a, op->b}, Call::Extern)); + } else { + CodeGen_C::visit(op); + } +} + +void CodeGen_GLSLBase::visit(const NE *op) { + if (op->type.is_vector()) { + print_expr(Call::make(op->type, "notEqual", {op->a, op->b}, Call::Extern)); + } else { + CodeGen_C::visit(op); + } +} + +void CodeGen_GLSLBase::visit(const LT *op) { + if (op->type.is_vector()) { + print_expr(Call::make(op->type, "lessThan", {op->a, op->b}, Call::Extern)); + } else { + CodeGen_C::visit(op); + } +} + +void CodeGen_GLSLBase::visit(const LE *op) { + if (op->type.is_vector()) { + print_expr(Call::make(op->type, "lessThanEqual", {op->a, op->b}, Call::Extern)); + } else { + CodeGen_C::visit(op); + } +} + +void CodeGen_GLSLBase::visit(const GT *op) { + if (op->type.is_vector()) { + print_expr(Call::make(op->type, "greaterThan", {op->a, op->b}, Call::Extern)); + } else { + CodeGen_C::visit(op); + } +} + +void CodeGen_GLSLBase::visit(const GE *op) { + if (op->type.is_vector()) { + print_expr(Call::make(op->type, "greaterThanEqual", {op->a, op->b}, Call::Extern)); + } else { + CodeGen_C::visit(op); + } +} + +void CodeGen_GLSLBase::visit(const Shuffle *op) { + // The halide Shuffle represents the llvm intrinisc + // shufflevector, however, for GLSL its use is limited to swizzling + // up to a four channel vec type. + + internal_assert(op->vectors.size() == 1); + + int shuffle_lanes = op->type.lanes(); + internal_assert(shuffle_lanes <= 4); + + string expr = print_expr(op->vectors[0]); + + // Create a swizzle expression for the shuffle + string swizzle; + for (int i = 0; i != shuffle_lanes; ++i) { + int channel = op->indices[i]; + internal_assert(channel < 4) << "Shuffle of invalid channel"; + swizzle += get_lane_suffix(channel); + } + + print_assignment(op->type, expr + "." + swizzle); +} + +// Identifiers containing double underscores '__' are reserved in GLSL, so we +// have to use a different name mangling scheme than in the C code generator. +string CodeGen_GLSLBase::print_name(const string &name) { + const string mangled = CodeGen_C::print_name(name); + return replace_all(mangled, "__", "XX"); +} + +void CodeGen_GLSLBase::visit(const Cast *op) { + Type value_type = op->value.type(); + // If both types are represented by the same GLSL type, no explicit cast + // is necessary. + if (map_type(op->type) == map_type(value_type)) { + Expr value = op->value; + if (value_type.code() == Type::Float) { + // float->int conversions may need explicit truncation if an + // integer type is embedded into a float. (Note: overflows are + // considered undefined behavior, so we do nothing about values + // that are out of range of the target type.) + if (op->type.code() == Type::UInt) { + value = simplify(floor(value)); + } else if (op->type.code() == Type::Int) { + value = simplify(trunc(value)); + } + } + // FIXME: Overflow is not UB for most Halide types + // https://github.com/halide/Halide/issues/4975 + value.accept(this); + } else { + Type target_type = map_type(op->type); + print_assignment(target_type, print_type(target_type) + "(" + print_expr(op->value) + ")"); + } +} + class CodeGen_OpenGLCompute_Dev : public CodeGen_GPU_Dev { public: CodeGen_OpenGLCompute_Dev(const Target &target); diff --git a/src/CodeGen_OpenGL_Dev.cpp b/src/CodeGen_OpenGL_Dev.cpp deleted file mode 100644 index 2081fd37b75c..000000000000 --- a/src/CodeGen_OpenGL_Dev.cpp +++ /dev/null @@ -1,1148 +0,0 @@ -#include "CodeGen_OpenGL_Dev.h" -#include "CSE.h" -#include "Debug.h" -#include "Deinterleave.h" -#include "IRMatch.h" -#include "IRMutator.h" -#include "IROperator.h" -#include "Simplify.h" -#include -#include -#include - -namespace Halide { -namespace Internal { - -using std::ostringstream; -using std::string; -using std::vector; - -namespace { - -bool is_opengl_es(const Target &target) { - // TODO: we need a better way to switch between the different OpenGL - // versions (desktop GL, GLES2, GLES3, ...), probably by making it part of - // Target. - return (target.os == Target::Android || - target.os == Target::IOS) || - target.has_feature(Target::EGL); -} - -char get_lane_suffix(int i) { - internal_assert(i >= 0 && i < 4); - return "rgba"[i]; -} - -} // namespace - -CodeGen_OpenGL_Dev::CodeGen_OpenGL_Dev(const Target &target) - : target(target) { - debug(1) << "Creating GLSL codegen\n"; - glc = new CodeGen_GLSL(src_stream, target); -} - -CodeGen_OpenGL_Dev::~CodeGen_OpenGL_Dev() { - delete glc; -} - -void CodeGen_OpenGL_Dev::add_kernel(Stmt s, const string &name, - const vector &args) { - cur_kernel_name = name; - glc->add_kernel(s, name, args); -} - -void CodeGen_OpenGL_Dev::init_module() { - src_stream.str(""); - src_stream.clear(); - cur_kernel_name = ""; -} - -vector CodeGen_OpenGL_Dev::compile_to_src() { - string str = src_stream.str(); - debug(1) << "GLSL source:\n" - << str << "\n"; - vector buffer(str.begin(), str.end()); - buffer.push_back(0); - return buffer; -} - -string CodeGen_OpenGL_Dev::get_current_kernel_name() { - return cur_kernel_name; -} - -void CodeGen_OpenGL_Dev::dump() { - std::cerr << src_stream.str() << "\n"; -} - -string CodeGen_OpenGL_Dev::print_gpu_name(const string &name) { - return glc->print_name(name); -} - -// -// CodeGen_GLSLBase -// -CodeGen_GLSLBase::CodeGen_GLSLBase(std::ostream &s, Target target) - : CodeGen_C(s, target) { - builtin["sin_f32"] = "sin"; - builtin["sqrt_f32"] = "sqrt"; - builtin["cos_f32"] = "cos"; - builtin["exp_f32"] = "exp"; - builtin["log_f32"] = "log"; - builtin["abs_f32"] = "abs"; - builtin["floor_f32"] = "floor"; - builtin["ceil_f32"] = "ceil"; - builtin["asin_f32"] = "asin"; - builtin["acos_f32"] = "acos"; - builtin["tan_f32"] = "tan"; - builtin["atan_f32"] = "atan"; - builtin["atan2_f32"] = "atan"; // also called atan in GLSL - builtin["sinh_f32"] = "sinh"; - builtin["cosh_f32"] = "cosh"; - builtin["tanh_f32"] = "tanh"; - builtin["asinh_f32"] = "asinh"; - builtin["acosh_f32"] = "acosh"; - builtin["atanh_f32"] = "atanh"; - builtin["min"] = "min"; - builtin["max"] = "max"; - builtin["mix"] = "mix"; - builtin["mod"] = "mod"; - builtin["abs"] = "abs"; - builtin["isnan"] = "isnan"; - builtin["round_f32"] = "roundEven"; - builtin["fast_inverse_sqrt_f32"] = "inversesqrt"; - - // functions that produce bvecs - builtin["equal"] = "equal"; - builtin["notEqual"] = "notEqual"; - builtin["lessThan"] = "lessThan"; - builtin["lessThanEqual"] = "lessThanEqual"; - builtin["greaterThan"] = "greaterThan"; - builtin["greaterThanEqual"] = "greaterThanEqual"; -} - -// Maps Halide types to appropriate GLSL types or emit error if no equivalent -// type is available. -Type CodeGen_GLSLBase::map_type(const Type &type) { - Type result = type; - if (type.is_scalar()) { - if (type.is_float()) { - user_assert(type.bits() <= 32) - << "GLSL: Can't represent a float with " << type.bits() << " bits.\n"; - result = Float(32); - } else if (type.is_bool()) { - // unchanged - } else if (type.is_int() && type.bits() <= 32) { - result = Int(32); - } else if (type.is_uint() && type.bits() <= 32) { - if (support_native_uint) { - result = UInt(32); - } else { - if (type.bits() == 32) { - // GLSL <= 120 doesn't have unsigned types, simply use int. - // WARNING: Using int to represent unsigned int may result in - // overflows and undefined behavior. - result = Int(32); - } else { - // Embed all other uints in a GLSL float. Probably not actually - // valid for uint16 on systems with low float precision. - result = Float(32); - } - } - } else { - user_error << "GLSL: Can't represent type '" << type << "'.\n"; - } - } else { - user_assert(type.lanes() <= 4) - << "GLSL: vector types wider than 4 aren't supported\n"; - user_assert(type.is_bool() || type.is_int() || type.is_uint() || type.is_float()) - << "GLSL: Can't represent vector type '" << type << "'.\n"; - Type scalar_type = type.element_of(); - result = map_type(scalar_type).with_lanes(type.lanes()); - } - return result; -} - -void CodeGen_GLSLBase::visit(const FloatImm *op) { - ostringstream oss; - // Print integral numbers with trailing ".0". For fractional numbers use a - // precision of 9 digits, which should be enough to recover the binary - // float unambiguously from the decimal representation (if iostreams - // implements correct rounding). - const float truncated = (op->value < 0 ? std::ceil(op->value) : std::floor(op->value)); - if (truncated == op->value) { - oss << std::fixed << std::setprecision(1) << op->value; - } else { - oss << std::setprecision(9) << op->value; - } - id = oss.str(); -} - -void CodeGen_GLSLBase::visit(const IntImm *op) { - id = print_type(op->type) + "(" + std::to_string(op->value) + ")"; -} - -void CodeGen_GLSLBase::visit(const UIntImm *op) { - if (op->type == Bool()) { - if (op->value == 1) { - id = "true"; - } else { - id = "false"; - } - } else if (support_native_uint) { - id = std::to_string(op->value) + "u"; - } else { - id = print_type(op->type) + "(" + std::to_string(op->value) + ")"; - } -} - -void CodeGen_GLSLBase::visit(const Max *op) { - print_expr(Call::make(op->type, "max", {op->a, op->b}, Call::PureExtern)); -} - -void CodeGen_GLSLBase::visit(const Min *op) { - print_expr(Call::make(op->type, "min", {op->a, op->b}, Call::PureExtern)); -} - -void CodeGen_GLSLBase::visit(const Mod *op) { - if (op->type.is_int() || op->type.is_uint()) { - // Just exploit the Euclidean identity - // FIXME: Why doesn't lower_euclidean_mod work for glsl? - // https://github.com/halide/Halide/issues/4979 - Expr zero = make_zero(op->type); - Expr equiv = select(op->a == zero, zero, - op->a - (op->a / op->b) * op->b); - equiv = common_subexpression_elimination(equiv); - print_expr(equiv); - } else { - print_expr(Call::make(op->type, "mod", {op->a, op->b}, Call::Extern)); - } -} - -void CodeGen_GLSLBase::visit(const Call *op) { - if (op->is_intrinsic(Call::lerp)) { - // Implement lerp using GLSL's mix() function, which always uses - // floating point arithmetic. - Expr zero_val = op->args[0]; - Expr one_val = op->args[1]; - Expr weight = op->args[2]; - - internal_assert(weight.type().is_uint() || weight.type().is_float()); - if (weight.type().is_uint()) { - // Normalize integer weights to [0.0f, 1.0f] range. - internal_assert(weight.type().bits() < 32); - weight = Div::make(Cast::make(Float(32), weight), - Cast::make(Float(32), weight.type().max())); - } else if (op->type.is_uint()) { - // Round float weights down to next multiple of (1/op->type.imax()) - // to give same results as lerp based on integer arithmetic. - internal_assert(op->type.bits() < 32); - weight = floor(weight * op->type.max()) / op->type.max(); - } - - Type result_type = Float(32, op->type.lanes()); - Expr e = Call::make(result_type, "mix", {zero_val, one_val, weight}, Call::Extern); - - if (!op->type.is_float()) { - // Mirror rounding implementation of Halide's integer lerp. - e = Cast::make(op->type, floor(e + 0.5f)); - } - print_expr(e); - return; - } else if (op->is_intrinsic(Call::absd)) { - internal_assert(op->args.size() == 2); - Expr a = op->args[0]; - Expr b = op->args[1]; - Expr e = cast(op->type, select(a < b, b - a, a - b)); - print_expr(e); - return; - } else if (op->is_intrinsic(Call::return_second)) { - internal_assert(op->args.size() == 2); - // Simply discard the first argument, which is generally a call to - // 'halide_printf'. - print_assignment(op->type, print_expr(op->args[1])); - return; - } else if (op->name == "fast_inverse_f32") { - print_expr(make_one(op->type) / op->args[0]); - return; - } else if (op->name == "fast_inverse_sqrt_f32") { - print_expr(make_one(op->type) / sqrt(op->args[0])); - return; - } else if (op->name == "pow_f32") { - if (can_prove(op->args[0] > 0)) { - ostringstream rhs; - rhs << "pow(" << print_expr(op->args[0]) << ", " << print_expr(op->args[1]) << ")"; - print_assignment(op->type, rhs.str()); - return; - } else { - ostringstream base; - string a = print_expr(op->args[0]); - string b = print_expr(op->args[1]); - base << "pow(abs(" << a << "), " << b << ")"; - string c = print_assignment(op->type, base.str()); - Expr a_var = is_const(op->args[0]) ? op->args[0] : Variable::make(op->type, a); - Expr b_var = is_const(op->args[1]) ? op->args[1] : Variable::make(op->type, b); - Expr c_var = Variable::make(op->type, c); - // OpenGL isn't required to produce NaNs, so we return - // zero in the undefined case. - Expr equiv = select(a_var > 0 || b_var % 2 == 0, c_var, - b_var % 2 == 1, -c_var, - 0.0f); - print_expr(simplify(equiv)); - return; - } - } else if (op->is_intrinsic(Call::shift_right)) { - print_assignment(op->type, print_expr(op->args[0]) + " >> " + print_expr(op->args[1])); - } else if (op->is_intrinsic(Call::shift_left)) { - print_assignment(op->type, print_expr(op->args[0]) + " << " + print_expr(op->args[1])); - } else if (op->is_intrinsic(Call::bitwise_not)) { - print_assignment(op->type, "~" + print_expr(op->args[0])); - } else if (op->is_intrinsic(Call::bitwise_and)) { - print_assignment(op->type, print_expr(op->args[0]) + " & " + print_expr(op->args[1])); - } else if (op->is_intrinsic(Call::bitwise_or)) { - print_assignment(op->type, print_expr(op->args[0]) + " | " + print_expr(op->args[1])); - } else if (op->is_intrinsic(Call::bitwise_xor)) { - print_assignment(op->type, print_expr(op->args[0]) + " ^ " + print_expr(op->args[1])); - } else if (op->is_intrinsic(Call::div_round_to_zero)) { - print_assignment(op->type, print_expr(op->args[0]) + " / " + print_expr(op->args[1])); - } else if (op->is_intrinsic(Call::mod_round_to_zero)) { - print_assignment(op->type, print_expr(op->args[0]) + " % " + print_expr(op->args[1])); - } else { - ostringstream rhs; - if (builtin.count(op->name) == 0) { - user_error << "GLSL: unknown function '" << op->name << "' encountered.\n"; - } - - bool need_cast = false; - const Type float_type = Float(32, op->type.lanes()); - vector new_args(op->args.size()); - - // For GL 2.0, Most GLSL builtins are only defined for float arguments, - // so we may have to introduce type casts around the arguments and the - // entire function call. - if (!support_int_to_float_implicit_conversion && - !support_non_float_type_builtin.count(op->name)) { - need_cast = !op->type.is_float(); - for (size_t i = 0; i < op->args.size(); i++) { - if (!op->args[i].type().is_float()) { - new_args[i] = Cast::make(float_type, op->args[i]); - need_cast = true; - } else { - new_args[i] = op->args[i]; - } - } - } - - if (need_cast) { - Expr val = Call::make(float_type, op->name, new_args, op->call_type); - print_expr(simplify(Cast::make(op->type, val))); - } else { - rhs << builtin[op->name] << "("; - for (size_t i = 0; i < op->args.size(); i++) { - if (i > 0) { - rhs << ", "; - } - rhs << print_expr(op->args[i]); - } - rhs << ")"; - print_assignment(op->type, rhs.str()); - } - } -} - -string CodeGen_GLSLBase::print_type(Type type, AppendSpaceIfNeeded space_option) { - ostringstream oss; - type = map_type(type); - if (type.is_scalar()) { - if (type.is_float()) { - oss << "float"; - } else if (type.is_bool()) { - oss << "bool"; - } else if (type.is_int()) { - oss << "int"; - } else if (type.is_uint()) { - oss << "uint"; - } else { - internal_error << "GLSL: invalid type '" << type << "' encountered.\n"; - } - } else { - if (type.is_float()) { - // no prefix for float vectors - } else if (type.is_bool()) { - oss << "b"; - } else if (type.is_int()) { - oss << "i"; - } else if (type.is_uint()) { - oss << "u"; - } else { - internal_error << "GLSL: invalid type '" << type << "' encountered.\n"; - } - oss << "vec" << type.lanes(); - } - - if (space_option == AppendSpace) { - oss << " "; - } - - return oss.str(); -} - -// The following comparisons are defined for ivec and vec -// types, so we don't use call_builtin -void CodeGen_GLSLBase::visit(const EQ *op) { - if (op->type.is_vector()) { - print_expr(Call::make(op->type, "equal", {op->a, op->b}, Call::Extern)); - } else { - CodeGen_C::visit(op); - } -} - -void CodeGen_GLSLBase::visit(const NE *op) { - if (op->type.is_vector()) { - print_expr(Call::make(op->type, "notEqual", {op->a, op->b}, Call::Extern)); - } else { - CodeGen_C::visit(op); - } -} - -void CodeGen_GLSLBase::visit(const LT *op) { - if (op->type.is_vector()) { - print_expr(Call::make(op->type, "lessThan", {op->a, op->b}, Call::Extern)); - } else { - CodeGen_C::visit(op); - } -} - -void CodeGen_GLSLBase::visit(const LE *op) { - if (op->type.is_vector()) { - print_expr(Call::make(op->type, "lessThanEqual", {op->a, op->b}, Call::Extern)); - } else { - CodeGen_C::visit(op); - } -} - -void CodeGen_GLSLBase::visit(const GT *op) { - if (op->type.is_vector()) { - print_expr(Call::make(op->type, "greaterThan", {op->a, op->b}, Call::Extern)); - } else { - CodeGen_C::visit(op); - } -} - -void CodeGen_GLSLBase::visit(const GE *op) { - if (op->type.is_vector()) { - print_expr(Call::make(op->type, "greaterThanEqual", {op->a, op->b}, Call::Extern)); - } else { - CodeGen_C::visit(op); - } -} - -void CodeGen_GLSLBase::visit(const Shuffle *op) { - // The halide Shuffle represents the llvm intrinisc - // shufflevector, however, for GLSL its use is limited to swizzling - // up to a four channel vec type. - - internal_assert(op->vectors.size() == 1); - - int shuffle_lanes = op->type.lanes(); - internal_assert(shuffle_lanes <= 4); - - string expr = print_expr(op->vectors[0]); - - // Create a swizzle expression for the shuffle - string swizzle; - for (int i = 0; i != shuffle_lanes; ++i) { - int channel = op->indices[i]; - internal_assert(channel < 4) << "Shuffle of invalid channel"; - swizzle += get_lane_suffix(channel); - } - - print_assignment(op->type, expr + "." + swizzle); -} - -// Identifiers containing double underscores '__' are reserved in GLSL, so we -// have to use a different name mangling scheme than in the C code generator. -string CodeGen_GLSLBase::print_name(const string &name) { - const string mangled = CodeGen_C::print_name(name); - return replace_all(mangled, "__", "XX"); -} - -void CodeGen_GLSLBase::visit(const Cast *op) { - Type value_type = op->value.type(); - // If both types are represented by the same GLSL type, no explicit cast - // is necessary. - if (map_type(op->type) == map_type(value_type)) { - Expr value = op->value; - if (value_type.code() == Type::Float) { - // float->int conversions may need explicit truncation if an - // integer type is embedded into a float. (Note: overflows are - // considered undefined behavior, so we do nothing about values - // that are out of range of the target type.) - if (op->type.code() == Type::UInt) { - value = simplify(floor(value)); - } else if (op->type.code() == Type::Int) { - value = simplify(trunc(value)); - } - } - // FIXME: Overflow is not UB for most Halide types - // https://github.com/halide/Halide/issues/4975 - value.accept(this); - } else { - Type target_type = map_type(op->type); - print_assignment(target_type, print_type(target_type) + "(" + print_expr(op->value) + ")"); - } -} - -// -// CodeGen_GLSL -// - -CodeGen_GLSL::CodeGen_GLSL(std::ostream &s, const Target &t) - : CodeGen_GLSLBase(s, t) { - builtin["trunc_f32"] = "_trunc_f32"; - - // TODO: Add emulation for these builtin functions - // which are available only for GL 3.x (GLSL >= 130) - builtin.erase("isnan"); - builtin.erase("round_f32"); - builtin.erase("sinh_f32"); - builtin.erase("cosh_f32"); - builtin.erase("tanh_f32"); - builtin.erase("asinh_f32"); - builtin.erase("acosh_f32"); - builtin.erase("atanh_f32"); - - // TODO: Check OpenGL version then determine support_* variables value - support_native_uint = false; - support_int_to_float_implicit_conversion = false; - support_integer_division_rounding = false; - // functions that support ivecs - support_non_float_type_builtin.insert("equal"); - support_non_float_type_builtin.insert("notEqual"); - support_non_float_type_builtin.insert("lessThan"); - support_non_float_type_builtin.insert("lessThanEqual"); - support_non_float_type_builtin.insert("greaterThan"); - support_non_float_type_builtin.insert("greaterThanEqual"); -} - -// Copy back from commit #60442cf9eb -void CodeGen_GLSL::visit(const Div *op) { - if (!support_integer_division_rounding && (op->type.is_int() || op->type.is_uint())) { - // Halide's integer division is defined to round according to - // the sign of the denominator. Since the rounding behavior of - // GLSL's integer division is undefined, emulate the correct - // behavior using floating point arithmetic. - Type float_type = Float(32, op->type.lanes()); - // To avoid rounding woes, aim for a floating point value that - // should not be close to an integer. If we divide the range - // [0, 1, 2, 3] by 4, we want to get floating point values - // [1/8, 3/8, 5/8, 7/8]. This can be achieved by adding 0.5 to - // the numerator. - Expr val = Div::make(Cast::make(float_type, op->a) + 0.5f, Cast::make(float_type, op->b)); - string float_result = print_expr(simplify(val)); - val = Variable::make(float_type, float_result); - Expr zero = make_zero(op->type); - string a = print_expr(op->a); - string b = print_expr(op->b); - Expr a_var = is_const(op->a) ? op->a : Variable::make(op->type, a); - Expr b_var = is_const(op->b) ? op->b : Variable::make(op->type, b); - Expr equiv = select(b_var == zero, zero, - b_var > zero, Call::make(op->type, "floor_f32", {val}, Call::Extern), - Call::make(op->type, "ceil_f32", {val}, Call::Extern)); - if (op->type.bits() >= 32) { - // A float isn't precise enough to produce the correct int - // in the case where the denominator is one. - equiv = select(b_var == make_one(op->type), a_var, equiv); - } - print_expr(simplify(equiv)); - } else { - CodeGen_GLSLBase::visit(op); - } -} - -void CodeGen_GLSL::visit(const Let *op) { - - if (op->name.find(".varying") != string::npos) { - - // Skip let statements for varying attributes - op->body.accept(this); - - return; - } - - CodeGen_C::visit(op); -} - -void CodeGen_GLSL::visit(const For *loop) { - user_assert(loop->for_type != ForType::GPULane) - << "The GLSL backend does not support the gpu_lanes() scheduling directive."; - - if (ends_with(loop->name, ".__block_id_x") || - ends_with(loop->name, ".__block_id_y")) { - internal_assert(loop->for_type == ForType::GPUBlock) - << "kernel loop must be gpu block\n"; - - debug(1) << "Dropping loop " << loop->name << " (" << loop->min << ", " << loop->extent << ")\n"; - - string idx; - if (ends_with(loop->name, ".__block_id_x")) { - idx = "int(_varyingf0[0])"; - } else if (ends_with(loop->name, ".__block_id_y")) { - idx = "int(_varyingf0[1])"; - } - stream << get_indent() << print_type(Int(32)) << " " << print_name(loop->name) << " = " << idx << ";\n"; - loop->body.accept(this); - } else { - user_assert(loop->for_type != ForType::Parallel) << "GLSL: parallel loops aren't allowed inside kernel.\n"; - CodeGen_C::visit(loop); - } -} - -vector evaluate_vector_select(const Select *op) { - const int lanes = op->type.lanes(); - vector result(lanes); - for (int i = 0; i < lanes; i++) { - Expr cond = extract_lane(op->condition, i); - Expr true_value = extract_lane(op->true_value, i); - Expr false_value = extract_lane(op->false_value, i); - - if (is_const(cond)) { - result[i] = is_const_one(cond) ? true_value : false_value; - } else { - result[i] = Select::make(cond, true_value, false_value); - } - } - return result; -} - -void CodeGen_GLSL::visit(const Select *op) { - string id_value; - if (op->condition.type().is_scalar()) { - id_value = unique_name('_'); - stream << get_indent() << print_type(op->type) << " " << id_value << ";\n"; - string cond = print_expr(op->condition); - stream << get_indent() << "if (" << cond << ") "; - open_scope(); - { - string true_val = print_expr(op->true_value); - stream << get_indent() << id_value << " = " << true_val << ";\n"; - } - close_scope(""); - - stream << get_indent() << "else "; - open_scope(); - { - string false_val = print_expr(op->false_value); - stream << get_indent() << id_value << " = " << false_val << ";\n"; - } - close_scope(""); - } else { - // Selects with vector conditions are typically used for constructing - // vector types. If the select condition can be evaluated at - // compile-time (which is often the case), we can built the vector - // directly without lowering to a sequence of "if" statements. - internal_assert(op->condition.type().lanes() == op->type.lanes()); - int lanes = op->type.lanes(); - vector result = evaluate_vector_select(op); - vector ids(lanes); - for (int i = 0; i < lanes; i++) { - ids[i] = print_expr(result[i]); - } - id_value = unique_name('_'); - stream << get_indent() << print_type(op->type) << " " << id_value << " = " - << print_type(op->type) << "("; - for (int i = 0; i < lanes; i++) { - stream << ids[i] << ((i < lanes - 1) ? ", " : ");\n"); - } - } - - id = id_value; -} - -string CodeGen_GLSL::get_vector_suffix(const Expr &e) { - vector matches; - Expr w = Variable::make(Int(32), "*"); - - // The vectorize pass will insert a ramp in the color dimension argument. - const Ramp *r = e.as(); - if (r && is_const_zero(r->base) && is_const_one(r->stride) && r->lanes == 4) { - // No suffix is needed when accessing a full RGBA vector. - return ""; - } else if (r && is_const_zero(r->base) && is_const_one(r->stride) && r->lanes == 3) { - return ".rgb"; - } else if (r && is_const_zero(r->base) && is_const_one(r->stride) && r->lanes == 2) { - return ".rg"; - } else { - // GLSL 1.0 Section 5.5 supports subscript based vector indexing - internal_assert(e.type().is_scalar()); - string id = print_expr(e); - if (e.type() != Int(32)) { - id = "int(" + id + ")"; - } - return string("[" + id + "]"); - } -} - -vector CodeGen_GLSL::print_lanes(const Expr &e) { - int l = e.type().lanes(); - internal_assert(e.type().is_vector()); - vector result(l); - if (const Broadcast *b = e.as()) { - string val = print_expr(b->value); - for (int i = 0; i < l; i++) { - result[i] = val; - } - } else if (const Ramp *r = e.as()) { - for (int i = 0; i < l; i++) { - result[i] = print_expr(simplify(r->base + i * r->stride)); - } - } else { - string val = print_expr(e); - for (int i = 0; i < l; i++) { - result[i] = val + "[" + std::to_string(i) + "]"; - } - } - return result; -} - -void CodeGen_GLSL::visit(const Load *op) { - user_assert(is_const_one(op->predicate)) << "GLSL: predicated load is not supported.\n"; - if (scalar_vars.contains(op->name)) { - internal_assert(is_const_zero(op->index)); - id = print_name(op->name); - } else if (vector_vars.contains(op->name)) { - id = print_name(op->name) + get_vector_suffix(op->index); - } else if (op->type.is_scalar()) { - string idx = print_expr(op->index); - print_assignment(op->type, print_name(op->name) + "[" + idx + "]"); - } else { - vector indices = print_lanes(op->index); - ostringstream rhs; - rhs << print_type(op->type) << "("; - for (int i = 0; i < op->type.lanes(); i++) { - if (i > 0) { - rhs << ", "; - } - rhs << print_name(op->name) << "[" + indices[i] + "]"; - } - rhs << ")"; - print_assignment(op->type, rhs.str()); - } -} - -void CodeGen_GLSL::visit(const Store *op) { - user_assert(is_const_one(op->predicate)) << "GLSL: predicated store is not supported.\n"; - if (scalar_vars.contains(op->name)) { - internal_assert(is_const_zero(op->index)); - string val = print_expr(op->value); - stream << get_indent() << print_name(op->name) << " = " << val << ";\n"; - } else if (vector_vars.contains(op->name)) { - string val = print_expr(op->value); - stream << get_indent() << print_name(op->name) << get_vector_suffix(op->index) - << " = " << val << ";\n"; - } else if (op->value.type().is_scalar()) { - string val = print_expr(op->value); - string idx = print_expr(op->index); - stream << get_indent() << print_name(op->name) << "[" << idx << "] = " << val << ";\n"; - } else { - vector indices = print_lanes(op->index); - vector values = print_lanes(op->value); - for (int i = 0; i < op->value.type().lanes(); i++) { - stream << get_indent() << print_name(op->name) - << "[" << indices[i] << "] = " - << values[i] << ";\n"; - } - } -} - -void CodeGen_GLSL::visit(const Evaluate *op) { - print_expr(op->value); -} - -namespace { -class AllAccessConstant : public IRVisitor { - using IRVisitor::visit; - - void visit(const Load *op) override { - if (op->name == buf && !is_const(op->index)) { - result = false; - } - IRVisitor::visit(op); - } - - void visit(const Store *op) override { - if (op->name == buf && !is_const(op->index)) { - result = false; - } - IRVisitor::visit(op); - } - -public: - bool result = true; - string buf; -}; -} // namespace - -void CodeGen_GLSL::visit(const Allocate *op) { - int32_t size = op->constant_allocation_size(); - user_assert(size) << "Allocations inside GLSL kernels must be constant-sized\n"; - - // Check if all access to the allocation uses a constant index - AllAccessConstant all_access_constant; - all_access_constant.buf = op->name; - op->body.accept(&all_access_constant); - - stream << get_indent(); - if (size == 1) { - // We can use a variable - stream << print_type(op->type) << " " << print_name(op->name) << ";\n"; - ScopedBinding p(scalar_vars, op->name, 0); - op->body.accept(this); - } else if (size <= 4 && all_access_constant.result) { - // We can just use a vector variable - stream << print_type(op->type.with_lanes(size)) << " " << print_name(op->name) << ";\n"; - ScopedBinding p(vector_vars, op->name, 0); - op->body.accept(this); - } else { - stream << print_type(op->type) << " " << print_name(op->name) << "[" << size << "];\n"; - op->body.accept(this); - } -} - -void CodeGen_GLSL::visit(const Free *op) { -} - -void CodeGen_GLSL::visit(const AssertStmt *) { - internal_error << "GLSL: unexpected Assertion node encountered.\n"; -} - -void CodeGen_GLSL::visit(const Ramp *op) { - ostringstream rhs; - rhs << print_type(op->type) << "("; - - if (op->lanes > 4) { - internal_error << "GLSL: ramp lanes " << op->lanes << " is not supported\n"; - } - - rhs << print_expr(op->base); - - for (int i = 1; i < op->lanes; ++i) { - rhs << ", " << print_expr(Add::make(op->base, Mul::make(i, op->stride))); - } - - rhs << ")"; - print_assignment(op->type, rhs.str()); -} - -void CodeGen_GLSL::visit(const Broadcast *op) { - ostringstream rhs; - rhs << print_type(op->type) << "(" << print_expr(op->value) << ")"; - print_assignment(op->type, rhs.str()); -} - -void CodeGen_GLSL::visit(const Atomic *op) { - // Floating point atomics can be tricky as there are no floating point atomics - // operations, and GLSL does not allow converting a floating point buffer to an - // integer buffer. - // Plus, OpenGL supports atomics starting from 4.3, but Halide doesn't distinguish - // between OpenGL versions yet. - user_assert(false) << "GLSL: atomics are not supported.\n"; -} - -void CodeGen_GLSL::add_kernel(const Stmt &stmt, const string &name, - const vector &args) { - - // This function produces fragment shader source for the halide statement. - // The corresponding vertex shader will be generated by the halide opengl - // runtime based on the arguments passed in comments below. Host codegen - // outputs expressions that are evaluated at runtime to produce vertex data - // and varying attribute values at the vertices. - - // Emit special header that declares the kernel name and its arguments. - // There is currently no standard way of passing information from the code - // generator to the runtime, and the information Halide passes to the - // runtime are fairly limited. We use these special comments to know the - // data types of arguments and whether textures are used for input or - // output. - - // Keep track of the number of uniform and varying attributes - int num_uniform_floats = 0; - int num_uniform_ints = 0; - - // The spatial x and y coordinates are always passed in the first two - // varying float attribute slots - int num_varying_floats = 2; - - ostringstream header; - header << "/// KERNEL " << name << "\n"; - for (size_t i = 0; i < args.size(); i++) { - if (args[i].is_buffer) { - Type t = args[i].type.element_of(); - - user_assert(args[i].read != args[i].write) << "GLSL: buffers may only be read OR written inside a kernel loop.\n"; - string type_name; - if (t == UInt(8)) { - type_name = "uint8_t"; - } else if (t == UInt(16)) { - type_name = "uint16_t"; - } else if (t == Float(32)) { - type_name = "float"; - } else { - user_error << "GLSL: buffer " << args[i].name << " has invalid type " << t << ".\n"; - } - header << "/// " << (args[i].read ? "IN_BUFFER " : "OUT_BUFFER ") - << type_name << " " << print_name(args[i].name) << "\n"; - } else if (ends_with(args[i].name, ".varying")) { - header << "/// VARYING " - // GLSL requires that varying attributes are float. Integer - // expressions for vertex attributes are cast to float during - // host codegen - << "float " << print_name(args[i].name) << " varyingf" << args[i].packed_index / 4 << "[" << args[i].packed_index % 4 << "]\n"; - ++num_varying_floats; - } else if (args[i].type.is_float()) { - header << "/// UNIFORM " - << CodeGen_C::print_type(args[i].type) << " " // NOLINT: Allow call to CodeGen_C::print_type - << print_name(args[i].name) << " uniformf" << args[i].packed_index / 4 << "[" << args[i].packed_index % 4 << "]\n"; - ++num_uniform_floats; - } else if (args[i].type.is_int()) { - header << "/// UNIFORM " - << CodeGen_C::print_type(args[i].type) << " " // NOLINT: Allow call to CodeGen_C::print_type - << print_name(args[i].name) << " uniformi" << args[i].packed_index / 4 << "[" << args[i].packed_index % 4 << "]\n"; - ++num_uniform_ints; - } - } - - // Compute the number of vec4's needed to pack the arguments - num_varying_floats = (num_varying_floats + 3) / 4; - num_uniform_floats = (num_uniform_floats + 3) / 4; - num_uniform_ints = (num_uniform_ints + 3) / 4; - - stream << header.str(); - - // Specify default float precision when compiling for OpenGL ES. - // TODO: emit correct #version - if (is_opengl_es(target)) { - stream << "#ifdef GL_FRAGMENT_PRECISION_HIGH\n" - << "precision highp float;\n" - << "#endif\n"; - } - - // Declare input textures and variables - for (size_t i = 0; i < args.size(); i++) { - if (args[i].is_buffer && args[i].read) { - stream << "uniform sampler2D " << print_name(args[i].name) << ";\n"; - } - } - - for (int i = 0; i != num_varying_floats; ++i) { - stream << "varying vec4 _varyingf" << i << ";\n"; - } - - for (int i = 0; i != num_uniform_floats; ++i) { - stream << "uniform vec4 _uniformf" << i << ";\n"; - } - - for (int i = 0; i != num_uniform_ints; ++i) { - stream << "uniform ivec4 _uniformi" << i << ";\n"; - } - - // Output additional builtin functions. - stream << "float _trunc_f32(float x) {\n" - " return floor(abs(x)) * sign(x);\n" - "}\n"; - - stream << "void main() {\n"; - indent += 2; - - // Unpack the uniform and varying parameters - for (size_t i = 0; i < args.size(); i++) { - if (args[i].is_buffer) { - continue; - } else if (ends_with(args[i].name, ".varying")) { - stream << get_indent() << "float " << print_name(args[i].name) - << " = _varyingf" << args[i].packed_index / 4 - << "[" << args[i].packed_index % 4 << "];\n"; - } else if (args[i].type.is_float()) { - stream << get_indent() << print_type(args[i].type) << " " - << print_name(args[i].name) - << " = _uniformf" << args[i].packed_index / 4 - << "[" << args[i].packed_index % 4 << "];\n"; - } else if (args[i].type.is_int()) { - stream << get_indent() << print_type(args[i].type) << " " - << print_name(args[i].name) - << " = _uniformi" << args[i].packed_index / 4 - << "[" << args[i].packed_index % 4 << "];\n"; - } - } - - print(stmt); - indent -= 2; - stream << "}\n"; -} - -namespace { -// Replace all temporary variables names like _1234 with '$'. This is done to -// make the individual tests below self-contained. -string normalize_temporaries(const string &s) { - string result; - for (size_t i = 0; i < s.size();) { - if (s[i] == '_') { - result += '$'; - for (i++; i < s.size() && isdigit(s[i]); i++) { - } - } else { - result += s[i++]; - } - } - return result; -} - -void check(Expr e, const string &result) { - ostringstream source; - CodeGen_GLSL cg(source, Target()); - if (e.as() || e.as()) { - // Hack: CodeGen_C doesn't treat immediates like other expressions, so - // wrap them to obtain useful output. - e = Halide::print(e); - } - source.str(""); - source.clear(); - Evaluate::make(e).accept(&cg); - string src = normalize_temporaries(source.str()); - if (!ends_with(src, result)) { - internal_error - << "Codegen failed for " << e << "\n" - << " Correct source code:\n" - << result - << " Actual source code:\n" - << src; - } -} - -} // namespace - -void CodeGen_GLSL::test() { - vector e; - - // Check that float constants are printed correctly. - check(1.0f, "float $ = 1.0;\n"); - check(1.0f + std::numeric_limits::epsilon(), "float $ = 1.00000012;\n"); - check(1.19209290e-07f, "float $ = 1.1920929e-07;\n"); - check(8388608.f, "float $ = 8388608.0;\n"); - check(-2.1e19f, "float $ = -20999999189405401088.0;\n"); - check(3.1415926536f, "float $ = 3.14159274;\n"); - - // Uint8 is embedded in GLSL floats, so no cast necessary - check(cast(Variable::make(UInt(8), "x") * 1.0f), - "float $ = $x * 1.0;\n"); - // But truncation is necessary for the reverse direction - check(cast(Variable::make(Float(32), "x")), - "float $ = floor($x);\n"); - - check(Min::make(Expr(1), Expr(5)), - "float $ = min(1.0, 5.0);\n" - "int $ = int($);\n"); - - check(Max::make(Expr(1), Expr(5)), - "float $ = max(1.0, 5.0);\n" - "int $ = int($);\n"); - - check(Max::make(Broadcast::make(1, 4), Broadcast::make(5, 4)), - "vec4 $ = vec4(1.0);\n" - "vec4 $ = vec4(5.0);\n" - "vec4 $ = max($, $);\n" - "ivec4 $ = ivec4($);\n"); - - check(Variable::make(Int(32), "x") / Expr(3), - "float $ = float($x);\n" - "float $ = $ * 0.333333343;\n" - "float $ = $ + 0.166666672;\n" - "float $ = floor($);\n" - "int $ = int($);\n"); - // check(Variable::make(Int(32, 4), "x") / Variable::make(Int(32, 4), "y"), - // "vec4 $ = vec4($x);\n" - // "vec4 $ = vec4($y);\n" - // "vec4 $ = $ / $;\n" - // "vec4 $ = floor($);\n" - // "ivec4 $ = ivec4($);\n"); - check(Variable::make(Float(32, 4), "x") / Variable::make(Float(32, 4), "y"), - "vec4 $ = $x / $y;\n"); - - // Integer lerp with integer weight - check(lerp(cast(0), cast(255), cast(127)), - "float $ = mix(0.0, 255.0, 0.498039216);\n" - "float $ = $ + 0.5;\n" - "float $ = floor($);\n"); - - // Integer lerp with float weight - check(lerp(cast(0), cast(255), 0.3f), - "float $ = mix(0.0, 255.0, 0.298039228);\n" - "float $ = $ + 0.5;\n" - "float $ = floor($);\n"); - - // Floating point lerp - check(lerp(0.0f, 1.0f, 0.3f), - "float $ = mix(0.0, 1.0, 0.300000012);\n"); - - // Vectorized lerp - check(lerp(Variable::make(Float(32, 4), "x"), Variable::make(Float(32, 4), "y"), Broadcast::make(0.25f, 4)), - "vec4 $ = vec4(0.25);\n" - "vec4 $ = mix($x, $y, $);\n"); - - // Sin with scalar arg - check(sin(3.0f), "float $ = sin(3.0);\n"); - - // Sin with vector arg - check(Call::make(Float(32, 4), "sin_f32", {Broadcast::make(1.f, 4)}, Internal::Call::Extern), - "vec4 $ = vec4(1.0);\n" - "vec4 $ = sin($);\n"); - - // use float version of abs in GLSL - check(abs(Variable::make(Int(32), "x")), - "float $ = float($x);\n" - "float $ = abs($);\n" - "int $ = int($);\n"); - - check(Halide::print(3.0f), "float $ = 3.0;\n"); - - // Test rounding behavior of integer division. - // The latest version of integer division is too complicated to list here - // check(Variable::make(Int(32), "x") / Variable::make(Int(32), "y"), - // "float $ = float($x);\n" - // "float $ = float($y);\n" - // "float $ = $ / $;\n" - // "float $ = floor($);\n" - // "int $ = int($);\n"); - - // Select with scalar condition - check(Select::make(EQ::make(Variable::make(Float(32), "x"), 1.0f), - Broadcast::make(1.f, 4), - Broadcast::make(2.f, 4)), - "vec4 $;\n" - "bool $ = $x == 1.0;\n" - "if ($) {\n" - " vec4 $ = vec4(1.0);\n" - " $ = $;\n" - "}\n" - "else {\n" - " vec4 $ = vec4(2.0);\n" - " $ = $;\n" - "}\n"); - - // Select with vector condition - check(Select::make(EQ::make(Ramp::make(-1, 1, 4), Broadcast::make(0, 4)), - Broadcast::make(1.f, 4), - Broadcast::make(2.f, 4)), - "vec4 $ = vec4(2.0, 1.0, 2.0, 2.0);\n"); - - check(log(1.0f), "float $ = log(1.0);\n"); - check(exp(1.0f), "float $ = exp(1.0);\n"); - - // Integer powers are expanded - check(pow(1.4f, 2), "float $ = 1.39999998 * 1.39999998;\n"); - check(pow(1.0f, 2.1f), "float $ = pow(1.0, 2.0999999);\n"); - - std::cout << "CodeGen_GLSL test Success!\n"; -} - -} // namespace Internal -} // namespace Halide diff --git a/src/CodeGen_OpenGL_Dev.h b/src/CodeGen_OpenGL_Dev.h deleted file mode 100644 index b180b5e0ef12..000000000000 --- a/src/CodeGen_OpenGL_Dev.h +++ /dev/null @@ -1,155 +0,0 @@ -#ifndef HALIDE_CODEGEN_OPENGL_DEV_H -#define HALIDE_CODEGEN_OPENGL_DEV_H - -/** \file - * Defines the code-generator for producing GLSL kernel code - */ - -#include -#include -#include - -#include "CodeGen_C.h" -#include "CodeGen_GPU_Dev.h" -#include "Target.h" - -namespace Halide { -namespace Internal { - -class CodeGen_GLSL; - -class CodeGen_OpenGL_Dev : public CodeGen_GPU_Dev { -public: - CodeGen_OpenGL_Dev(const Target &target); - ~CodeGen_OpenGL_Dev() override; - - // CodeGen_GPU_Dev interface - void add_kernel(Stmt stmt, const std::string &name, - const std::vector &args) override; - - void init_module() override; - - std::vector compile_to_src() override; - - std::string get_current_kernel_name() override; - - void dump() override; - - std::string api_unique_name() override { - return "opengl"; - } - -private: - CodeGen_GLSL *glc; - - std::string print_gpu_name(const std::string &name) override; - - std::ostringstream src_stream; - std::string cur_kernel_name; - Target target; -}; - -/** - * This class handles GLSL arithmetic, shared by CodeGen_GLSL and CodeGen_OpenGLCompute_C. - */ -class CodeGen_GLSLBase : public CodeGen_C { -public: - CodeGen_GLSLBase(std::ostream &s, Target t); - - std::string print_name(const std::string &name) override; - std::string print_type(Type type, AppendSpaceIfNeeded space_option = DoNotAppendSpace) override; - -protected: - using CodeGen_C::visit; - - void visit(const Cast *) override; - - void visit(const FloatImm *) override; - void visit(const UIntImm *) override; - void visit(const IntImm *) override; - - void visit(const Max *op) override; - void visit(const Min *op) override; - void visit(const Call *op) override; - - void visit(const Mod *) override; - - // these have specific functions - // in GLSL that operate on vectors - void visit(const EQ *) override; - void visit(const NE *) override; - void visit(const LT *) override; - void visit(const LE *) override; - void visit(const GT *) override; - void visit(const GE *) override; - - void visit(const Shuffle *) override; - - Type map_type(const Type &); - - std::map builtin; - - // empty for GL 3.x and GLCompute which do not care about this (due to implicit conversion) - // while GL 2.0 only support a small subset of builtin functions with ivec arguments - std::set support_non_float_type_builtin; - - // true for GL 3.x (GLSL >= 130 or ESSL >= 300) and GLCompute - // false for GL 2.x which does not support uint/uvec - bool support_native_uint = true; - - // true for GL 2.1 and 3.x (GLSL == 120, >= 130) and GLCompute - // true for GL ES 3.1 with EXT_shader_implicit_conversions - // false for GL 2.0 and GL ES 3.0 - bool support_int_to_float_implicit_conversion = true; - - // it seems that only GLSL ES implicitly does not support rounding of integer division - // while GLSL specification does not talk about this issue - // see GLSL ES Specification 1.00, issues 10.28, Rounding of Integer Division - // see GLSL ES Specification 3.00, issues 12.33, Rounding of Integer Division - bool support_integer_division_rounding = true; -}; - -/** Compile one statement into GLSL. */ -class CodeGen_GLSL : public CodeGen_GLSLBase { -public: - CodeGen_GLSL(std::ostream &s, const Target &t); - - void add_kernel(const Stmt &stmt, - const std::string &name, - const std::vector &args); - - static void test(); - -protected: - using CodeGen_GLSLBase::visit; - - void visit(const Div *) override; - - void visit(const Let *) override; - void visit(const For *) override; - void visit(const Select *) override; - - void visit(const Load *) override; - void visit(const Store *) override; - void visit(const Allocate *) override; - void visit(const Free *) override; - - void visit(const AssertStmt *) override; - void visit(const Ramp *op) override; - void visit(const Broadcast *) override; - - void visit(const Evaluate *) override; - void visit(const Atomic *) override; - -private: - std::string get_vector_suffix(const Expr &e); - - std::vector print_lanes(const Expr &expr); - - Scope scalar_vars, vector_vars; -}; - -} // namespace Internal -} // namespace Halide - -#endif