diff --git a/Makefile b/Makefile
index 921f8e388e3b..4b2ca0e8d54b 100644
--- a/Makefile
+++ b/Makefile
@@ -424,7 +424,6 @@ SOURCE_FILES = \
   CodeGen_Metal_Dev.cpp \
   CodeGen_MIPS.cpp \
   CodeGen_OpenCL_Dev.cpp \
-  CodeGen_OpenGL_Dev.cpp \
   CodeGen_OpenGLCompute_Dev.cpp \
   CodeGen_Posix.cpp \
   CodeGen_PowerPC.cpp \
@@ -595,7 +594,6 @@ HEADER_FILES = \
   CodeGen_Metal_Dev.h \
   CodeGen_MIPS.h \
   CodeGen_OpenCL_Dev.h \
-  CodeGen_OpenGL_Dev.h \
   CodeGen_OpenGLCompute_Dev.h \
   CodeGen_Posix.h \
   CodeGen_PowerPC.h \
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 0b45adf43715..8a826a02e835 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -33,7 +33,6 @@ set(HEADER_FILES
     CodeGen_Metal_Dev.h
     CodeGen_MIPS.h
     CodeGen_OpenCL_Dev.h
-    CodeGen_OpenGL_Dev.h
     CodeGen_OpenGLCompute_Dev.h
     CodeGen_Posix.h
     CodeGen_PowerPC.h
@@ -197,7 +196,6 @@ set(SOURCE_FILES
     CodeGen_Metal_Dev.cpp
     CodeGen_MIPS.cpp
     CodeGen_OpenCL_Dev.cpp
-    CodeGen_OpenGL_Dev.cpp
     CodeGen_OpenGLCompute_Dev.cpp
     CodeGen_Posix.cpp
     CodeGen_PowerPC.cpp
diff --git a/src/CodeGen_GPU_Host.cpp b/src/CodeGen_GPU_Host.cpp
index 7f3d3a46fb68..a151a4f2ca14 100644
--- a/src/CodeGen_GPU_Host.cpp
+++ b/src/CodeGen_GPU_Host.cpp
@@ -8,7 +8,6 @@
 #include "CodeGen_Metal_Dev.h"
 #include "CodeGen_OpenCL_Dev.h"
 #include "CodeGen_OpenGLCompute_Dev.h"
-#include "CodeGen_OpenGL_Dev.h"
 #include "CodeGen_PTX_Dev.h"
 #include "CodeGen_PowerPC.h"
 #include "CodeGen_RISCV.h"
diff --git a/src/CodeGen_OpenGLCompute_Dev.cpp b/src/CodeGen_OpenGLCompute_Dev.cpp
index 1be9045bb1d1..be2c3afd2cc2 100644
--- a/src/CodeGen_OpenGLCompute_Dev.cpp
+++ b/src/CodeGen_OpenGLCompute_Dev.cpp
@@ -1,6 +1,7 @@
 #include "CodeGen_OpenGLCompute_Dev.h"
+#include "CSE.h"
+#include "CodeGen_C.h"
 #include "CodeGen_GPU_Dev.h"
-#include "CodeGen_OpenGL_Dev.h"
 #include "Debug.h"
 #include "Deinterleave.h"
 #include "IRMatch.h"
@@ -20,6 +21,484 @@ using std::vector;
 
 namespace {
 
+char get_lane_suffix(int i) {
+    internal_assert(i >= 0 && i < 4);
+    return "rgba"[i];
+}
+
+/**
+  * This class handles GLSL arithmetic.
+  * TODO: combine this with CodeGen_OpenGLCompute_C, which is now the only subclass
+  * (unless it ends up being useful for Vulkan in the future?)
+  */
+class CodeGen_GLSLBase : public CodeGen_C {
+public:
+    CodeGen_GLSLBase(std::ostream &s, Target t);
+
+    std::string print_name(const std::string &name) override;
+    std::string print_type(Type type, AppendSpaceIfNeeded space_option = DoNotAppendSpace) override;
+
+protected:
+    using CodeGen_C::visit;
+
+    void visit(const Cast *) override;
+
+    void visit(const FloatImm *) override;
+    void visit(const UIntImm *) override;
+    void visit(const IntImm *) override;
+
+    void visit(const Max *op) override;
+    void visit(const Min *op) override;
+    void visit(const Call *op) override;
+
+    void visit(const Mod *) override;
+
+    // these have specific functions
+    // in GLSL that operate on vectors
+    void visit(const EQ *) override;
+    void visit(const NE *) override;
+    void visit(const LT *) override;
+    void visit(const LE *) override;
+    void visit(const GT *) override;
+    void visit(const GE *) override;
+
+    void visit(const Shuffle *) override;
+
+    Type map_type(const Type &);
+
+    std::map<std::string, std::string> builtin;
+
+    // empty for GL 3.x and GLCompute which do not care about this (due to implicit conversion)
+    // while GL 2.0 only support a small subset of builtin functions with ivec arguments
+    std::set<std::string> support_non_float_type_builtin;
+
+    // true for GL 3.x (GLSL >= 130 or ESSL >= 300) and GLCompute
+    // false for GL 2.x which does not support uint/uvec
+    bool support_native_uint = true;
+
+    // true for GL 2.1 and 3.x (GLSL == 120, >= 130) and GLCompute
+    // true for GL ES 3.1 with EXT_shader_implicit_conversions
+    // false for GL 2.0 and GL ES 3.0
+    bool support_int_to_float_implicit_conversion = true;
+
+    // it seems that only GLSL ES implicitly does not support rounding of integer division
+    // while GLSL specification does not talk about this issue
+    // see GLSL ES Specification 1.00, issues 10.28, Rounding of Integer Division
+    // see GLSL ES Specification 3.00, issues 12.33, Rounding of Integer Division
+    bool support_integer_division_rounding = true;
+};
+
+CodeGen_GLSLBase::CodeGen_GLSLBase(std::ostream &s, Target target)
+    : CodeGen_C(s, target) {
+    builtin["sin_f32"] = "sin";
+    builtin["sqrt_f32"] = "sqrt";
+    builtin["cos_f32"] = "cos";
+    builtin["exp_f32"] = "exp";
+    builtin["log_f32"] = "log";
+    builtin["abs_f32"] = "abs";
+    builtin["floor_f32"] = "floor";
+    builtin["ceil_f32"] = "ceil";
+    builtin["asin_f32"] = "asin";
+    builtin["acos_f32"] = "acos";
+    builtin["tan_f32"] = "tan";
+    builtin["atan_f32"] = "atan";
+    builtin["atan2_f32"] = "atan";  // also called atan in GLSL
+    builtin["sinh_f32"] = "sinh";
+    builtin["cosh_f32"] = "cosh";
+    builtin["tanh_f32"] = "tanh";
+    builtin["asinh_f32"] = "asinh";
+    builtin["acosh_f32"] = "acosh";
+    builtin["atanh_f32"] = "atanh";
+    builtin["min"] = "min";
+    builtin["max"] = "max";
+    builtin["mix"] = "mix";
+    builtin["mod"] = "mod";
+    builtin["abs"] = "abs";
+    builtin["isnan"] = "isnan";
+    builtin["round_f32"] = "roundEven";
+    builtin["fast_inverse_sqrt_f32"] = "inversesqrt";
+
+    // functions that produce bvecs
+    builtin["equal"] = "equal";
+    builtin["notEqual"] = "notEqual";
+    builtin["lessThan"] = "lessThan";
+    builtin["lessThanEqual"] = "lessThanEqual";
+    builtin["greaterThan"] = "greaterThan";
+    builtin["greaterThanEqual"] = "greaterThanEqual";
+}
+
+// Maps Halide types to appropriate GLSL types or emit error if no equivalent
+// type is available.
+Type CodeGen_GLSLBase::map_type(const Type &type) {
+    Type result = type;
+    if (type.is_scalar()) {
+        if (type.is_float()) {
+            user_assert(type.bits() <= 32)
+                << "GLSL: Can't represent a float with " << type.bits() << " bits.\n";
+            result = Float(32);
+        } else if (type.is_bool()) {
+            // unchanged
+        } else if (type.is_int() && type.bits() <= 32) {
+            result = Int(32);
+        } else if (type.is_uint() && type.bits() <= 32) {
+            if (support_native_uint) {
+                result = UInt(32);
+            } else {
+                if (type.bits() == 32) {
+                    // GLSL <= 120 doesn't have unsigned types, simply use int.
+                    // WARNING: Using int to represent unsigned int may result in
+                    // overflows and undefined behavior.
+                    result = Int(32);
+                } else {
+                    // Embed all other uints in a GLSL float. Probably not actually
+                    // valid for uint16 on systems with low float precision.
+                    result = Float(32);
+                }
+            }
+        } else {
+            user_error << "GLSL: Can't represent type '" << type << "'.\n";
+        }
+    } else {
+        user_assert(type.lanes() <= 4)
+            << "GLSL: vector types wider than 4 aren't supported\n";
+        user_assert(type.is_bool() || type.is_int() || type.is_uint() || type.is_float())
+            << "GLSL: Can't represent vector type '" << type << "'.\n";
+        Type scalar_type = type.element_of();
+        result = map_type(scalar_type).with_lanes(type.lanes());
+    }
+    return result;
+}
+
+void CodeGen_GLSLBase::visit(const FloatImm *op) {
+    ostringstream oss;
+    // Print integral numbers with trailing ".0". For fractional numbers use a
+    // precision of 9 digits, which should be enough to recover the binary
+    // float unambiguously from the decimal representation (if iostreams
+    // implements correct rounding).
+    const float truncated = (op->value < 0 ? std::ceil(op->value) : std::floor(op->value));
+    if (truncated == op->value) {
+        oss << std::fixed << std::setprecision(1) << op->value;
+    } else {
+        oss << std::setprecision(9) << op->value;
+    }
+    id = oss.str();
+}
+
+void CodeGen_GLSLBase::visit(const IntImm *op) {
+    id = print_type(op->type) + "(" + std::to_string(op->value) + ")";
+}
+
+void CodeGen_GLSLBase::visit(const UIntImm *op) {
+    if (op->type == Bool()) {
+        if (op->value == 1) {
+            id = "true";
+        } else {
+            id = "false";
+        }
+    } else if (support_native_uint) {
+        id = std::to_string(op->value) + "u";
+    } else {
+        id = print_type(op->type) + "(" + std::to_string(op->value) + ")";
+    }
+}
+
+void CodeGen_GLSLBase::visit(const Max *op) {
+    print_expr(Call::make(op->type, "max", {op->a, op->b}, Call::PureExtern));
+}
+
+void CodeGen_GLSLBase::visit(const Min *op) {
+    print_expr(Call::make(op->type, "min", {op->a, op->b}, Call::PureExtern));
+}
+
+void CodeGen_GLSLBase::visit(const Mod *op) {
+    if (op->type.is_int() || op->type.is_uint()) {
+        // Just exploit the Euclidean identity
+        // FIXME: Why doesn't lower_euclidean_mod work for glsl?
+        // https://github.com/halide/Halide/issues/4979
+        Expr zero = make_zero(op->type);
+        Expr equiv = select(op->a == zero, zero,
+                            op->a - (op->a / op->b) * op->b);
+        equiv = common_subexpression_elimination(equiv);
+        print_expr(equiv);
+    } else {
+        print_expr(Call::make(op->type, "mod", {op->a, op->b}, Call::Extern));
+    }
+}
+
+void CodeGen_GLSLBase::visit(const Call *op) {
+    if (op->is_intrinsic(Call::lerp)) {
+        // Implement lerp using GLSL's mix() function, which always uses
+        // floating point arithmetic.
+        Expr zero_val = op->args[0];
+        Expr one_val = op->args[1];
+        Expr weight = op->args[2];
+
+        internal_assert(weight.type().is_uint() || weight.type().is_float());
+        if (weight.type().is_uint()) {
+            // Normalize integer weights to [0.0f, 1.0f] range.
+            internal_assert(weight.type().bits() < 32);
+            weight = Div::make(Cast::make(Float(32), weight),
+                               Cast::make(Float(32), weight.type().max()));
+        } else if (op->type.is_uint()) {
+            // Round float weights down to next multiple of (1/op->type.imax())
+            // to give same results as lerp based on integer arithmetic.
+            internal_assert(op->type.bits() < 32);
+            weight = floor(weight * op->type.max()) / op->type.max();
+        }
+
+        Type result_type = Float(32, op->type.lanes());
+        Expr e = Call::make(result_type, "mix", {zero_val, one_val, weight}, Call::Extern);
+
+        if (!op->type.is_float()) {
+            // Mirror rounding implementation of Halide's integer lerp.
+            e = Cast::make(op->type, floor(e + 0.5f));
+        }
+        print_expr(e);
+        return;
+    } else if (op->is_intrinsic(Call::absd)) {
+        internal_assert(op->args.size() == 2);
+        Expr a = op->args[0];
+        Expr b = op->args[1];
+        Expr e = cast(op->type, select(a < b, b - a, a - b));
+        print_expr(e);
+        return;
+    } else if (op->is_intrinsic(Call::return_second)) {
+        internal_assert(op->args.size() == 2);
+        // Simply discard the first argument, which is generally a call to
+        // 'halide_printf'.
+        print_assignment(op->type, print_expr(op->args[1]));
+        return;
+    } else if (op->name == "fast_inverse_f32") {
+        print_expr(make_one(op->type) / op->args[0]);
+        return;
+    } else if (op->name == "fast_inverse_sqrt_f32") {
+        print_expr(make_one(op->type) / sqrt(op->args[0]));
+        return;
+    } else if (op->name == "pow_f32") {
+        if (can_prove(op->args[0] > 0)) {
+            ostringstream rhs;
+            rhs << "pow(" << print_expr(op->args[0]) << ", " << print_expr(op->args[1]) << ")";
+            print_assignment(op->type, rhs.str());
+            return;
+        } else {
+            ostringstream base;
+            string a = print_expr(op->args[0]);
+            string b = print_expr(op->args[1]);
+            base << "pow(abs(" << a << "), " << b << ")";
+            string c = print_assignment(op->type, base.str());
+            Expr a_var = is_const(op->args[0]) ? op->args[0] : Variable::make(op->type, a);
+            Expr b_var = is_const(op->args[1]) ? op->args[1] : Variable::make(op->type, b);
+            Expr c_var = Variable::make(op->type, c);
+            // OpenGL isn't required to produce NaNs, so we return
+            // zero in the undefined case.
+            Expr equiv = select(a_var > 0 || b_var % 2 == 0, c_var,
+                                b_var % 2 == 1, -c_var,
+                                0.0f);
+            print_expr(simplify(equiv));
+            return;
+        }
+    } else if (op->is_intrinsic(Call::shift_right)) {
+        print_assignment(op->type, print_expr(op->args[0]) + " >> " + print_expr(op->args[1]));
+    } else if (op->is_intrinsic(Call::shift_left)) {
+        print_assignment(op->type, print_expr(op->args[0]) + " << " + print_expr(op->args[1]));
+    } else if (op->is_intrinsic(Call::bitwise_not)) {
+        print_assignment(op->type, "~" + print_expr(op->args[0]));
+    } else if (op->is_intrinsic(Call::bitwise_and)) {
+        print_assignment(op->type, print_expr(op->args[0]) + " & " + print_expr(op->args[1]));
+    } else if (op->is_intrinsic(Call::bitwise_or)) {
+        print_assignment(op->type, print_expr(op->args[0]) + " | " + print_expr(op->args[1]));
+    } else if (op->is_intrinsic(Call::bitwise_xor)) {
+        print_assignment(op->type, print_expr(op->args[0]) + " ^ " + print_expr(op->args[1]));
+    } else if (op->is_intrinsic(Call::div_round_to_zero)) {
+        print_assignment(op->type, print_expr(op->args[0]) + " / " + print_expr(op->args[1]));
+    } else if (op->is_intrinsic(Call::mod_round_to_zero)) {
+        print_assignment(op->type, print_expr(op->args[0]) + " % " + print_expr(op->args[1]));
+    } else {
+        ostringstream rhs;
+        if (builtin.count(op->name) == 0) {
+            user_error << "GLSL: unknown function '" << op->name << "' encountered.\n";
+        }
+
+        bool need_cast = false;
+        const Type float_type = Float(32, op->type.lanes());
+        vector<Expr> new_args(op->args.size());
+
+        // For GL 2.0, Most GLSL builtins are only defined for float arguments,
+        // so we may have to introduce type casts around the arguments and the
+        // entire function call.
+        if (!support_int_to_float_implicit_conversion &&
+            !support_non_float_type_builtin.count(op->name)) {
+            need_cast = !op->type.is_float();
+            for (size_t i = 0; i < op->args.size(); i++) {
+                if (!op->args[i].type().is_float()) {
+                    new_args[i] = Cast::make(float_type, op->args[i]);
+                    need_cast = true;
+                } else {
+                    new_args[i] = op->args[i];
+                }
+            }
+        }
+
+        if (need_cast) {
+            Expr val = Call::make(float_type, op->name, new_args, op->call_type);
+            print_expr(simplify(Cast::make(op->type, val)));
+        } else {
+            rhs << builtin[op->name] << "(";
+            for (size_t i = 0; i < op->args.size(); i++) {
+                if (i > 0) {
+                    rhs << ", ";
+                }
+                rhs << print_expr(op->args[i]);
+            }
+            rhs << ")";
+            print_assignment(op->type, rhs.str());
+        }
+    }
+}
+
+string CodeGen_GLSLBase::print_type(Type type, AppendSpaceIfNeeded space_option) {
+    ostringstream oss;
+    type = map_type(type);
+    if (type.is_scalar()) {
+        if (type.is_float()) {
+            oss << "float";
+        } else if (type.is_bool()) {
+            oss << "bool";
+        } else if (type.is_int()) {
+            oss << "int";
+        } else if (type.is_uint()) {
+            oss << "uint";
+        } else {
+            internal_error << "GLSL: invalid type '" << type << "' encountered.\n";
+        }
+    } else {
+        if (type.is_float()) {
+            // no prefix for float vectors
+        } else if (type.is_bool()) {
+            oss << "b";
+        } else if (type.is_int()) {
+            oss << "i";
+        } else if (type.is_uint()) {
+            oss << "u";
+        } else {
+            internal_error << "GLSL: invalid type '" << type << "' encountered.\n";
+        }
+        oss << "vec" << type.lanes();
+    }
+
+    if (space_option == AppendSpace) {
+        oss << " ";
+    }
+
+    return oss.str();
+}
+
+// The following comparisons are defined for ivec and vec
+// types, so we don't use call_builtin
+void CodeGen_GLSLBase::visit(const EQ *op) {
+    if (op->type.is_vector()) {
+        print_expr(Call::make(op->type, "equal", {op->a, op->b}, Call::Extern));
+    } else {
+        CodeGen_C::visit(op);
+    }
+}
+
+void CodeGen_GLSLBase::visit(const NE *op) {
+    if (op->type.is_vector()) {
+        print_expr(Call::make(op->type, "notEqual", {op->a, op->b}, Call::Extern));
+    } else {
+        CodeGen_C::visit(op);
+    }
+}
+
+void CodeGen_GLSLBase::visit(const LT *op) {
+    if (op->type.is_vector()) {
+        print_expr(Call::make(op->type, "lessThan", {op->a, op->b}, Call::Extern));
+    } else {
+        CodeGen_C::visit(op);
+    }
+}
+
+void CodeGen_GLSLBase::visit(const LE *op) {
+    if (op->type.is_vector()) {
+        print_expr(Call::make(op->type, "lessThanEqual", {op->a, op->b}, Call::Extern));
+    } else {
+        CodeGen_C::visit(op);
+    }
+}
+
+void CodeGen_GLSLBase::visit(const GT *op) {
+    if (op->type.is_vector()) {
+        print_expr(Call::make(op->type, "greaterThan", {op->a, op->b}, Call::Extern));
+    } else {
+        CodeGen_C::visit(op);
+    }
+}
+
+void CodeGen_GLSLBase::visit(const GE *op) {
+    if (op->type.is_vector()) {
+        print_expr(Call::make(op->type, "greaterThanEqual", {op->a, op->b}, Call::Extern));
+    } else {
+        CodeGen_C::visit(op);
+    }
+}
+
+void CodeGen_GLSLBase::visit(const Shuffle *op) {
+    // The halide Shuffle represents the llvm intrinisc
+    // shufflevector, however, for GLSL its use is limited to swizzling
+    // up to a four channel vec type.
+
+    internal_assert(op->vectors.size() == 1);
+
+    int shuffle_lanes = op->type.lanes();
+    internal_assert(shuffle_lanes <= 4);
+
+    string expr = print_expr(op->vectors[0]);
+
+    // Create a swizzle expression for the shuffle
+    string swizzle;
+    for (int i = 0; i != shuffle_lanes; ++i) {
+        int channel = op->indices[i];
+        internal_assert(channel < 4) << "Shuffle of invalid channel";
+        swizzle += get_lane_suffix(channel);
+    }
+
+    print_assignment(op->type, expr + "." + swizzle);
+}
+
+// Identifiers containing double underscores '__' are reserved in GLSL, so we
+// have to use a different name mangling scheme than in the C code generator.
+string CodeGen_GLSLBase::print_name(const string &name) {
+    const string mangled = CodeGen_C::print_name(name);
+    return replace_all(mangled, "__", "XX");
+}
+
+void CodeGen_GLSLBase::visit(const Cast *op) {
+    Type value_type = op->value.type();
+    // If both types are represented by the same GLSL type, no explicit cast
+    // is necessary.
+    if (map_type(op->type) == map_type(value_type)) {
+        Expr value = op->value;
+        if (value_type.code() == Type::Float) {
+            // float->int conversions may need explicit truncation if an
+            // integer type is embedded into a float. (Note: overflows are
+            // considered undefined behavior, so we do nothing about values
+            // that are out of range of the target type.)
+            if (op->type.code() == Type::UInt) {
+                value = simplify(floor(value));
+            } else if (op->type.code() == Type::Int) {
+                value = simplify(trunc(value));
+            }
+        }
+        // FIXME: Overflow is not UB for most Halide types
+        // https://github.com/halide/Halide/issues/4975
+        value.accept(this);
+    } else {
+        Type target_type = map_type(op->type);
+        print_assignment(target_type, print_type(target_type) + "(" + print_expr(op->value) + ")");
+    }
+}
+
 class CodeGen_OpenGLCompute_Dev : public CodeGen_GPU_Dev {
 public:
     CodeGen_OpenGLCompute_Dev(const Target &target);
diff --git a/src/CodeGen_OpenGL_Dev.cpp b/src/CodeGen_OpenGL_Dev.cpp
deleted file mode 100644
index 2081fd37b75c..000000000000
--- a/src/CodeGen_OpenGL_Dev.cpp
+++ /dev/null
@@ -1,1148 +0,0 @@
-#include "CodeGen_OpenGL_Dev.h"
-#include "CSE.h"
-#include "Debug.h"
-#include "Deinterleave.h"
-#include "IRMatch.h"
-#include "IRMutator.h"
-#include "IROperator.h"
-#include "Simplify.h"
-#include <iomanip>
-#include <limits>
-#include <map>
-
-namespace Halide {
-namespace Internal {
-
-using std::ostringstream;
-using std::string;
-using std::vector;
-
-namespace {
-
-bool is_opengl_es(const Target &target) {
-    // TODO: we need a better way to switch between the different OpenGL
-    // versions (desktop GL, GLES2, GLES3, ...), probably by making it part of
-    // Target.
-    return (target.os == Target::Android ||
-            target.os == Target::IOS) ||
-           target.has_feature(Target::EGL);
-}
-
-char get_lane_suffix(int i) {
-    internal_assert(i >= 0 && i < 4);
-    return "rgba"[i];
-}
-
-}  // namespace
-
-CodeGen_OpenGL_Dev::CodeGen_OpenGL_Dev(const Target &target)
-    : target(target) {
-    debug(1) << "Creating GLSL codegen\n";
-    glc = new CodeGen_GLSL(src_stream, target);
-}
-
-CodeGen_OpenGL_Dev::~CodeGen_OpenGL_Dev() {
-    delete glc;
-}
-
-void CodeGen_OpenGL_Dev::add_kernel(Stmt s, const string &name,
-                                    const vector<DeviceArgument> &args) {
-    cur_kernel_name = name;
-    glc->add_kernel(s, name, args);
-}
-
-void CodeGen_OpenGL_Dev::init_module() {
-    src_stream.str("");
-    src_stream.clear();
-    cur_kernel_name = "";
-}
-
-vector<char> CodeGen_OpenGL_Dev::compile_to_src() {
-    string str = src_stream.str();
-    debug(1) << "GLSL source:\n"
-             << str << "\n";
-    vector<char> buffer(str.begin(), str.end());
-    buffer.push_back(0);
-    return buffer;
-}
-
-string CodeGen_OpenGL_Dev::get_current_kernel_name() {
-    return cur_kernel_name;
-}
-
-void CodeGen_OpenGL_Dev::dump() {
-    std::cerr << src_stream.str() << "\n";
-}
-
-string CodeGen_OpenGL_Dev::print_gpu_name(const string &name) {
-    return glc->print_name(name);
-}
-
-//
-// CodeGen_GLSLBase
-//
-CodeGen_GLSLBase::CodeGen_GLSLBase(std::ostream &s, Target target)
-    : CodeGen_C(s, target) {
-    builtin["sin_f32"] = "sin";
-    builtin["sqrt_f32"] = "sqrt";
-    builtin["cos_f32"] = "cos";
-    builtin["exp_f32"] = "exp";
-    builtin["log_f32"] = "log";
-    builtin["abs_f32"] = "abs";
-    builtin["floor_f32"] = "floor";
-    builtin["ceil_f32"] = "ceil";
-    builtin["asin_f32"] = "asin";
-    builtin["acos_f32"] = "acos";
-    builtin["tan_f32"] = "tan";
-    builtin["atan_f32"] = "atan";
-    builtin["atan2_f32"] = "atan";  // also called atan in GLSL
-    builtin["sinh_f32"] = "sinh";
-    builtin["cosh_f32"] = "cosh";
-    builtin["tanh_f32"] = "tanh";
-    builtin["asinh_f32"] = "asinh";
-    builtin["acosh_f32"] = "acosh";
-    builtin["atanh_f32"] = "atanh";
-    builtin["min"] = "min";
-    builtin["max"] = "max";
-    builtin["mix"] = "mix";
-    builtin["mod"] = "mod";
-    builtin["abs"] = "abs";
-    builtin["isnan"] = "isnan";
-    builtin["round_f32"] = "roundEven";
-    builtin["fast_inverse_sqrt_f32"] = "inversesqrt";
-
-    // functions that produce bvecs
-    builtin["equal"] = "equal";
-    builtin["notEqual"] = "notEqual";
-    builtin["lessThan"] = "lessThan";
-    builtin["lessThanEqual"] = "lessThanEqual";
-    builtin["greaterThan"] = "greaterThan";
-    builtin["greaterThanEqual"] = "greaterThanEqual";
-}
-
-// Maps Halide types to appropriate GLSL types or emit error if no equivalent
-// type is available.
-Type CodeGen_GLSLBase::map_type(const Type &type) {
-    Type result = type;
-    if (type.is_scalar()) {
-        if (type.is_float()) {
-            user_assert(type.bits() <= 32)
-                << "GLSL: Can't represent a float with " << type.bits() << " bits.\n";
-            result = Float(32);
-        } else if (type.is_bool()) {
-            // unchanged
-        } else if (type.is_int() && type.bits() <= 32) {
-            result = Int(32);
-        } else if (type.is_uint() && type.bits() <= 32) {
-            if (support_native_uint) {
-                result = UInt(32);
-            } else {
-                if (type.bits() == 32) {
-                    // GLSL <= 120 doesn't have unsigned types, simply use int.
-                    // WARNING: Using int to represent unsigned int may result in
-                    // overflows and undefined behavior.
-                    result = Int(32);
-                } else {
-                    // Embed all other uints in a GLSL float. Probably not actually
-                    // valid for uint16 on systems with low float precision.
-                    result = Float(32);
-                }
-            }
-        } else {
-            user_error << "GLSL: Can't represent type '" << type << "'.\n";
-        }
-    } else {
-        user_assert(type.lanes() <= 4)
-            << "GLSL: vector types wider than 4 aren't supported\n";
-        user_assert(type.is_bool() || type.is_int() || type.is_uint() || type.is_float())
-            << "GLSL: Can't represent vector type '" << type << "'.\n";
-        Type scalar_type = type.element_of();
-        result = map_type(scalar_type).with_lanes(type.lanes());
-    }
-    return result;
-}
-
-void CodeGen_GLSLBase::visit(const FloatImm *op) {
-    ostringstream oss;
-    // Print integral numbers with trailing ".0". For fractional numbers use a
-    // precision of 9 digits, which should be enough to recover the binary
-    // float unambiguously from the decimal representation (if iostreams
-    // implements correct rounding).
-    const float truncated = (op->value < 0 ? std::ceil(op->value) : std::floor(op->value));
-    if (truncated == op->value) {
-        oss << std::fixed << std::setprecision(1) << op->value;
-    } else {
-        oss << std::setprecision(9) << op->value;
-    }
-    id = oss.str();
-}
-
-void CodeGen_GLSLBase::visit(const IntImm *op) {
-    id = print_type(op->type) + "(" + std::to_string(op->value) + ")";
-}
-
-void CodeGen_GLSLBase::visit(const UIntImm *op) {
-    if (op->type == Bool()) {
-        if (op->value == 1) {
-            id = "true";
-        } else {
-            id = "false";
-        }
-    } else if (support_native_uint) {
-        id = std::to_string(op->value) + "u";
-    } else {
-        id = print_type(op->type) + "(" + std::to_string(op->value) + ")";
-    }
-}
-
-void CodeGen_GLSLBase::visit(const Max *op) {
-    print_expr(Call::make(op->type, "max", {op->a, op->b}, Call::PureExtern));
-}
-
-void CodeGen_GLSLBase::visit(const Min *op) {
-    print_expr(Call::make(op->type, "min", {op->a, op->b}, Call::PureExtern));
-}
-
-void CodeGen_GLSLBase::visit(const Mod *op) {
-    if (op->type.is_int() || op->type.is_uint()) {
-        // Just exploit the Euclidean identity
-        // FIXME: Why doesn't lower_euclidean_mod work for glsl?
-        // https://github.com/halide/Halide/issues/4979
-        Expr zero = make_zero(op->type);
-        Expr equiv = select(op->a == zero, zero,
-                            op->a - (op->a / op->b) * op->b);
-        equiv = common_subexpression_elimination(equiv);
-        print_expr(equiv);
-    } else {
-        print_expr(Call::make(op->type, "mod", {op->a, op->b}, Call::Extern));
-    }
-}
-
-void CodeGen_GLSLBase::visit(const Call *op) {
-    if (op->is_intrinsic(Call::lerp)) {
-        // Implement lerp using GLSL's mix() function, which always uses
-        // floating point arithmetic.
-        Expr zero_val = op->args[0];
-        Expr one_val = op->args[1];
-        Expr weight = op->args[2];
-
-        internal_assert(weight.type().is_uint() || weight.type().is_float());
-        if (weight.type().is_uint()) {
-            // Normalize integer weights to [0.0f, 1.0f] range.
-            internal_assert(weight.type().bits() < 32);
-            weight = Div::make(Cast::make(Float(32), weight),
-                               Cast::make(Float(32), weight.type().max()));
-        } else if (op->type.is_uint()) {
-            // Round float weights down to next multiple of (1/op->type.imax())
-            // to give same results as lerp based on integer arithmetic.
-            internal_assert(op->type.bits() < 32);
-            weight = floor(weight * op->type.max()) / op->type.max();
-        }
-
-        Type result_type = Float(32, op->type.lanes());
-        Expr e = Call::make(result_type, "mix", {zero_val, one_val, weight}, Call::Extern);
-
-        if (!op->type.is_float()) {
-            // Mirror rounding implementation of Halide's integer lerp.
-            e = Cast::make(op->type, floor(e + 0.5f));
-        }
-        print_expr(e);
-        return;
-    } else if (op->is_intrinsic(Call::absd)) {
-        internal_assert(op->args.size() == 2);
-        Expr a = op->args[0];
-        Expr b = op->args[1];
-        Expr e = cast(op->type, select(a < b, b - a, a - b));
-        print_expr(e);
-        return;
-    } else if (op->is_intrinsic(Call::return_second)) {
-        internal_assert(op->args.size() == 2);
-        // Simply discard the first argument, which is generally a call to
-        // 'halide_printf'.
-        print_assignment(op->type, print_expr(op->args[1]));
-        return;
-    } else if (op->name == "fast_inverse_f32") {
-        print_expr(make_one(op->type) / op->args[0]);
-        return;
-    } else if (op->name == "fast_inverse_sqrt_f32") {
-        print_expr(make_one(op->type) / sqrt(op->args[0]));
-        return;
-    } else if (op->name == "pow_f32") {
-        if (can_prove(op->args[0] > 0)) {
-            ostringstream rhs;
-            rhs << "pow(" << print_expr(op->args[0]) << ", " << print_expr(op->args[1]) << ")";
-            print_assignment(op->type, rhs.str());
-            return;
-        } else {
-            ostringstream base;
-            string a = print_expr(op->args[0]);
-            string b = print_expr(op->args[1]);
-            base << "pow(abs(" << a << "), " << b << ")";
-            string c = print_assignment(op->type, base.str());
-            Expr a_var = is_const(op->args[0]) ? op->args[0] : Variable::make(op->type, a);
-            Expr b_var = is_const(op->args[1]) ? op->args[1] : Variable::make(op->type, b);
-            Expr c_var = Variable::make(op->type, c);
-            // OpenGL isn't required to produce NaNs, so we return
-            // zero in the undefined case.
-            Expr equiv = select(a_var > 0 || b_var % 2 == 0, c_var,
-                                b_var % 2 == 1, -c_var,
-                                0.0f);
-            print_expr(simplify(equiv));
-            return;
-        }
-    } else if (op->is_intrinsic(Call::shift_right)) {
-        print_assignment(op->type, print_expr(op->args[0]) + " >> " + print_expr(op->args[1]));
-    } else if (op->is_intrinsic(Call::shift_left)) {
-        print_assignment(op->type, print_expr(op->args[0]) + " << " + print_expr(op->args[1]));
-    } else if (op->is_intrinsic(Call::bitwise_not)) {
-        print_assignment(op->type, "~" + print_expr(op->args[0]));
-    } else if (op->is_intrinsic(Call::bitwise_and)) {
-        print_assignment(op->type, print_expr(op->args[0]) + " & " + print_expr(op->args[1]));
-    } else if (op->is_intrinsic(Call::bitwise_or)) {
-        print_assignment(op->type, print_expr(op->args[0]) + " | " + print_expr(op->args[1]));
-    } else if (op->is_intrinsic(Call::bitwise_xor)) {
-        print_assignment(op->type, print_expr(op->args[0]) + " ^ " + print_expr(op->args[1]));
-    } else if (op->is_intrinsic(Call::div_round_to_zero)) {
-        print_assignment(op->type, print_expr(op->args[0]) + " / " + print_expr(op->args[1]));
-    } else if (op->is_intrinsic(Call::mod_round_to_zero)) {
-        print_assignment(op->type, print_expr(op->args[0]) + " % " + print_expr(op->args[1]));
-    } else {
-        ostringstream rhs;
-        if (builtin.count(op->name) == 0) {
-            user_error << "GLSL: unknown function '" << op->name << "' encountered.\n";
-        }
-
-        bool need_cast = false;
-        const Type float_type = Float(32, op->type.lanes());
-        vector<Expr> new_args(op->args.size());
-
-        // For GL 2.0, Most GLSL builtins are only defined for float arguments,
-        // so we may have to introduce type casts around the arguments and the
-        // entire function call.
-        if (!support_int_to_float_implicit_conversion &&
-            !support_non_float_type_builtin.count(op->name)) {
-            need_cast = !op->type.is_float();
-            for (size_t i = 0; i < op->args.size(); i++) {
-                if (!op->args[i].type().is_float()) {
-                    new_args[i] = Cast::make(float_type, op->args[i]);
-                    need_cast = true;
-                } else {
-                    new_args[i] = op->args[i];
-                }
-            }
-        }
-
-        if (need_cast) {
-            Expr val = Call::make(float_type, op->name, new_args, op->call_type);
-            print_expr(simplify(Cast::make(op->type, val)));
-        } else {
-            rhs << builtin[op->name] << "(";
-            for (size_t i = 0; i < op->args.size(); i++) {
-                if (i > 0) {
-                    rhs << ", ";
-                }
-                rhs << print_expr(op->args[i]);
-            }
-            rhs << ")";
-            print_assignment(op->type, rhs.str());
-        }
-    }
-}
-
-string CodeGen_GLSLBase::print_type(Type type, AppendSpaceIfNeeded space_option) {
-    ostringstream oss;
-    type = map_type(type);
-    if (type.is_scalar()) {
-        if (type.is_float()) {
-            oss << "float";
-        } else if (type.is_bool()) {
-            oss << "bool";
-        } else if (type.is_int()) {
-            oss << "int";
-        } else if (type.is_uint()) {
-            oss << "uint";
-        } else {
-            internal_error << "GLSL: invalid type '" << type << "' encountered.\n";
-        }
-    } else {
-        if (type.is_float()) {
-            // no prefix for float vectors
-        } else if (type.is_bool()) {
-            oss << "b";
-        } else if (type.is_int()) {
-            oss << "i";
-        } else if (type.is_uint()) {
-            oss << "u";
-        } else {
-            internal_error << "GLSL: invalid type '" << type << "' encountered.\n";
-        }
-        oss << "vec" << type.lanes();
-    }
-
-    if (space_option == AppendSpace) {
-        oss << " ";
-    }
-
-    return oss.str();
-}
-
-// The following comparisons are defined for ivec and vec
-// types, so we don't use call_builtin
-void CodeGen_GLSLBase::visit(const EQ *op) {
-    if (op->type.is_vector()) {
-        print_expr(Call::make(op->type, "equal", {op->a, op->b}, Call::Extern));
-    } else {
-        CodeGen_C::visit(op);
-    }
-}
-
-void CodeGen_GLSLBase::visit(const NE *op) {
-    if (op->type.is_vector()) {
-        print_expr(Call::make(op->type, "notEqual", {op->a, op->b}, Call::Extern));
-    } else {
-        CodeGen_C::visit(op);
-    }
-}
-
-void CodeGen_GLSLBase::visit(const LT *op) {
-    if (op->type.is_vector()) {
-        print_expr(Call::make(op->type, "lessThan", {op->a, op->b}, Call::Extern));
-    } else {
-        CodeGen_C::visit(op);
-    }
-}
-
-void CodeGen_GLSLBase::visit(const LE *op) {
-    if (op->type.is_vector()) {
-        print_expr(Call::make(op->type, "lessThanEqual", {op->a, op->b}, Call::Extern));
-    } else {
-        CodeGen_C::visit(op);
-    }
-}
-
-void CodeGen_GLSLBase::visit(const GT *op) {
-    if (op->type.is_vector()) {
-        print_expr(Call::make(op->type, "greaterThan", {op->a, op->b}, Call::Extern));
-    } else {
-        CodeGen_C::visit(op);
-    }
-}
-
-void CodeGen_GLSLBase::visit(const GE *op) {
-    if (op->type.is_vector()) {
-        print_expr(Call::make(op->type, "greaterThanEqual", {op->a, op->b}, Call::Extern));
-    } else {
-        CodeGen_C::visit(op);
-    }
-}
-
-void CodeGen_GLSLBase::visit(const Shuffle *op) {
-    // The halide Shuffle represents the llvm intrinisc
-    // shufflevector, however, for GLSL its use is limited to swizzling
-    // up to a four channel vec type.
-
-    internal_assert(op->vectors.size() == 1);
-
-    int shuffle_lanes = op->type.lanes();
-    internal_assert(shuffle_lanes <= 4);
-
-    string expr = print_expr(op->vectors[0]);
-
-    // Create a swizzle expression for the shuffle
-    string swizzle;
-    for (int i = 0; i != shuffle_lanes; ++i) {
-        int channel = op->indices[i];
-        internal_assert(channel < 4) << "Shuffle of invalid channel";
-        swizzle += get_lane_suffix(channel);
-    }
-
-    print_assignment(op->type, expr + "." + swizzle);
-}
-
-// Identifiers containing double underscores '__' are reserved in GLSL, so we
-// have to use a different name mangling scheme than in the C code generator.
-string CodeGen_GLSLBase::print_name(const string &name) {
-    const string mangled = CodeGen_C::print_name(name);
-    return replace_all(mangled, "__", "XX");
-}
-
-void CodeGen_GLSLBase::visit(const Cast *op) {
-    Type value_type = op->value.type();
-    // If both types are represented by the same GLSL type, no explicit cast
-    // is necessary.
-    if (map_type(op->type) == map_type(value_type)) {
-        Expr value = op->value;
-        if (value_type.code() == Type::Float) {
-            // float->int conversions may need explicit truncation if an
-            // integer type is embedded into a float. (Note: overflows are
-            // considered undefined behavior, so we do nothing about values
-            // that are out of range of the target type.)
-            if (op->type.code() == Type::UInt) {
-                value = simplify(floor(value));
-            } else if (op->type.code() == Type::Int) {
-                value = simplify(trunc(value));
-            }
-        }
-        // FIXME: Overflow is not UB for most Halide types
-        // https://github.com/halide/Halide/issues/4975
-        value.accept(this);
-    } else {
-        Type target_type = map_type(op->type);
-        print_assignment(target_type, print_type(target_type) + "(" + print_expr(op->value) + ")");
-    }
-}
-
-//
-// CodeGen_GLSL
-//
-
-CodeGen_GLSL::CodeGen_GLSL(std::ostream &s, const Target &t)
-    : CodeGen_GLSLBase(s, t) {
-    builtin["trunc_f32"] = "_trunc_f32";
-
-    // TODO: Add emulation for these builtin functions
-    //       which are available only for GL 3.x (GLSL >= 130)
-    builtin.erase("isnan");
-    builtin.erase("round_f32");
-    builtin.erase("sinh_f32");
-    builtin.erase("cosh_f32");
-    builtin.erase("tanh_f32");
-    builtin.erase("asinh_f32");
-    builtin.erase("acosh_f32");
-    builtin.erase("atanh_f32");
-
-    // TODO: Check OpenGL version then determine support_* variables value
-    support_native_uint = false;
-    support_int_to_float_implicit_conversion = false;
-    support_integer_division_rounding = false;
-    // functions that support ivecs
-    support_non_float_type_builtin.insert("equal");
-    support_non_float_type_builtin.insert("notEqual");
-    support_non_float_type_builtin.insert("lessThan");
-    support_non_float_type_builtin.insert("lessThanEqual");
-    support_non_float_type_builtin.insert("greaterThan");
-    support_non_float_type_builtin.insert("greaterThanEqual");
-}
-
-// Copy back from commit #60442cf9eb
-void CodeGen_GLSL::visit(const Div *op) {
-    if (!support_integer_division_rounding && (op->type.is_int() || op->type.is_uint())) {
-        // Halide's integer division is defined to round according to
-        // the sign of the denominator. Since the rounding behavior of
-        // GLSL's integer division is undefined, emulate the correct
-        // behavior using floating point arithmetic.
-        Type float_type = Float(32, op->type.lanes());
-        // To avoid rounding woes, aim for a floating point value that
-        // should not be close to an integer. If we divide the range
-        // [0, 1, 2, 3] by 4, we want to get floating point values
-        // [1/8, 3/8, 5/8, 7/8]. This can be achieved by adding 0.5 to
-        // the numerator.
-        Expr val = Div::make(Cast::make(float_type, op->a) + 0.5f, Cast::make(float_type, op->b));
-        string float_result = print_expr(simplify(val));
-        val = Variable::make(float_type, float_result);
-        Expr zero = make_zero(op->type);
-        string a = print_expr(op->a);
-        string b = print_expr(op->b);
-        Expr a_var = is_const(op->a) ? op->a : Variable::make(op->type, a);
-        Expr b_var = is_const(op->b) ? op->b : Variable::make(op->type, b);
-        Expr equiv = select(b_var == zero, zero,
-                            b_var > zero, Call::make(op->type, "floor_f32", {val}, Call::Extern),
-                            Call::make(op->type, "ceil_f32", {val}, Call::Extern));
-        if (op->type.bits() >= 32) {
-            // A float isn't precise enough to produce the correct int
-            // in the case where the denominator is one.
-            equiv = select(b_var == make_one(op->type), a_var, equiv);
-        }
-        print_expr(simplify(equiv));
-    } else {
-        CodeGen_GLSLBase::visit(op);
-    }
-}
-
-void CodeGen_GLSL::visit(const Let *op) {
-
-    if (op->name.find(".varying") != string::npos) {
-
-        // Skip let statements for varying attributes
-        op->body.accept(this);
-
-        return;
-    }
-
-    CodeGen_C::visit(op);
-}
-
-void CodeGen_GLSL::visit(const For *loop) {
-    user_assert(loop->for_type != ForType::GPULane)
-        << "The GLSL backend does not support the gpu_lanes() scheduling directive.";
-
-    if (ends_with(loop->name, ".__block_id_x") ||
-        ends_with(loop->name, ".__block_id_y")) {
-        internal_assert(loop->for_type == ForType::GPUBlock)
-            << "kernel loop must be gpu block\n";
-
-        debug(1) << "Dropping loop " << loop->name << " (" << loop->min << ", " << loop->extent << ")\n";
-
-        string idx;
-        if (ends_with(loop->name, ".__block_id_x")) {
-            idx = "int(_varyingf0[0])";
-        } else if (ends_with(loop->name, ".__block_id_y")) {
-            idx = "int(_varyingf0[1])";
-        }
-        stream << get_indent() << print_type(Int(32)) << " " << print_name(loop->name) << " = " << idx << ";\n";
-        loop->body.accept(this);
-    } else {
-        user_assert(loop->for_type != ForType::Parallel) << "GLSL: parallel loops aren't allowed inside kernel.\n";
-        CodeGen_C::visit(loop);
-    }
-}
-
-vector<Expr> evaluate_vector_select(const Select *op) {
-    const int lanes = op->type.lanes();
-    vector<Expr> result(lanes);
-    for (int i = 0; i < lanes; i++) {
-        Expr cond = extract_lane(op->condition, i);
-        Expr true_value = extract_lane(op->true_value, i);
-        Expr false_value = extract_lane(op->false_value, i);
-
-        if (is_const(cond)) {
-            result[i] = is_const_one(cond) ? true_value : false_value;
-        } else {
-            result[i] = Select::make(cond, true_value, false_value);
-        }
-    }
-    return result;
-}
-
-void CodeGen_GLSL::visit(const Select *op) {
-    string id_value;
-    if (op->condition.type().is_scalar()) {
-        id_value = unique_name('_');
-        stream << get_indent() << print_type(op->type) << " " << id_value << ";\n";
-        string cond = print_expr(op->condition);
-        stream << get_indent() << "if (" << cond << ") ";
-        open_scope();
-        {
-            string true_val = print_expr(op->true_value);
-            stream << get_indent() << id_value << " = " << true_val << ";\n";
-        }
-        close_scope("");
-
-        stream << get_indent() << "else ";
-        open_scope();
-        {
-            string false_val = print_expr(op->false_value);
-            stream << get_indent() << id_value << " = " << false_val << ";\n";
-        }
-        close_scope("");
-    } else {
-        // Selects with vector conditions are typically used for constructing
-        // vector types. If the select condition can be evaluated at
-        // compile-time (which is often the case), we can built the vector
-        // directly without lowering to a sequence of "if" statements.
-        internal_assert(op->condition.type().lanes() == op->type.lanes());
-        int lanes = op->type.lanes();
-        vector<Expr> result = evaluate_vector_select(op);
-        vector<string> ids(lanes);
-        for (int i = 0; i < lanes; i++) {
-            ids[i] = print_expr(result[i]);
-        }
-        id_value = unique_name('_');
-        stream << get_indent() << print_type(op->type) << " " << id_value << " = "
-               << print_type(op->type) << "(";
-        for (int i = 0; i < lanes; i++) {
-            stream << ids[i] << ((i < lanes - 1) ? ", " : ");\n");
-        }
-    }
-
-    id = id_value;
-}
-
-string CodeGen_GLSL::get_vector_suffix(const Expr &e) {
-    vector<Expr> matches;
-    Expr w = Variable::make(Int(32), "*");
-
-    // The vectorize pass will insert a ramp in the color dimension argument.
-    const Ramp *r = e.as<Ramp>();
-    if (r && is_const_zero(r->base) && is_const_one(r->stride) && r->lanes == 4) {
-        // No suffix is needed when accessing a full RGBA vector.
-        return "";
-    } else if (r && is_const_zero(r->base) && is_const_one(r->stride) && r->lanes == 3) {
-        return ".rgb";
-    } else if (r && is_const_zero(r->base) && is_const_one(r->stride) && r->lanes == 2) {
-        return ".rg";
-    } else {
-        // GLSL 1.0 Section 5.5 supports subscript based vector indexing
-        internal_assert(e.type().is_scalar());
-        string id = print_expr(e);
-        if (e.type() != Int(32)) {
-            id = "int(" + id + ")";
-        }
-        return string("[" + id + "]");
-    }
-}
-
-vector<string> CodeGen_GLSL::print_lanes(const Expr &e) {
-    int l = e.type().lanes();
-    internal_assert(e.type().is_vector());
-    vector<string> result(l);
-    if (const Broadcast *b = e.as<Broadcast>()) {
-        string val = print_expr(b->value);
-        for (int i = 0; i < l; i++) {
-            result[i] = val;
-        }
-    } else if (const Ramp *r = e.as<Ramp>()) {
-        for (int i = 0; i < l; i++) {
-            result[i] = print_expr(simplify(r->base + i * r->stride));
-        }
-    } else {
-        string val = print_expr(e);
-        for (int i = 0; i < l; i++) {
-            result[i] = val + "[" + std::to_string(i) + "]";
-        }
-    }
-    return result;
-}
-
-void CodeGen_GLSL::visit(const Load *op) {
-    user_assert(is_const_one(op->predicate)) << "GLSL: predicated load is not supported.\n";
-    if (scalar_vars.contains(op->name)) {
-        internal_assert(is_const_zero(op->index));
-        id = print_name(op->name);
-    } else if (vector_vars.contains(op->name)) {
-        id = print_name(op->name) + get_vector_suffix(op->index);
-    } else if (op->type.is_scalar()) {
-        string idx = print_expr(op->index);
-        print_assignment(op->type, print_name(op->name) + "[" + idx + "]");
-    } else {
-        vector<string> indices = print_lanes(op->index);
-        ostringstream rhs;
-        rhs << print_type(op->type) << "(";
-        for (int i = 0; i < op->type.lanes(); i++) {
-            if (i > 0) {
-                rhs << ", ";
-            }
-            rhs << print_name(op->name) << "[" + indices[i] + "]";
-        }
-        rhs << ")";
-        print_assignment(op->type, rhs.str());
-    }
-}
-
-void CodeGen_GLSL::visit(const Store *op) {
-    user_assert(is_const_one(op->predicate)) << "GLSL: predicated store is not supported.\n";
-    if (scalar_vars.contains(op->name)) {
-        internal_assert(is_const_zero(op->index));
-        string val = print_expr(op->value);
-        stream << get_indent() << print_name(op->name) << " = " << val << ";\n";
-    } else if (vector_vars.contains(op->name)) {
-        string val = print_expr(op->value);
-        stream << get_indent() << print_name(op->name) << get_vector_suffix(op->index)
-               << " = " << val << ";\n";
-    } else if (op->value.type().is_scalar()) {
-        string val = print_expr(op->value);
-        string idx = print_expr(op->index);
-        stream << get_indent() << print_name(op->name) << "[" << idx << "] = " << val << ";\n";
-    } else {
-        vector<string> indices = print_lanes(op->index);
-        vector<string> values = print_lanes(op->value);
-        for (int i = 0; i < op->value.type().lanes(); i++) {
-            stream << get_indent() << print_name(op->name)
-                   << "[" << indices[i] << "] = "
-                   << values[i] << ";\n";
-        }
-    }
-}
-
-void CodeGen_GLSL::visit(const Evaluate *op) {
-    print_expr(op->value);
-}
-
-namespace {
-class AllAccessConstant : public IRVisitor {
-    using IRVisitor::visit;
-
-    void visit(const Load *op) override {
-        if (op->name == buf && !is_const(op->index)) {
-            result = false;
-        }
-        IRVisitor::visit(op);
-    }
-
-    void visit(const Store *op) override {
-        if (op->name == buf && !is_const(op->index)) {
-            result = false;
-        }
-        IRVisitor::visit(op);
-    }
-
-public:
-    bool result = true;
-    string buf;
-};
-}  // namespace
-
-void CodeGen_GLSL::visit(const Allocate *op) {
-    int32_t size = op->constant_allocation_size();
-    user_assert(size) << "Allocations inside GLSL kernels must be constant-sized\n";
-
-    // Check if all access to the allocation uses a constant index
-    AllAccessConstant all_access_constant;
-    all_access_constant.buf = op->name;
-    op->body.accept(&all_access_constant);
-
-    stream << get_indent();
-    if (size == 1) {
-        // We can use a variable
-        stream << print_type(op->type) << " " << print_name(op->name) << ";\n";
-        ScopedBinding<int> p(scalar_vars, op->name, 0);
-        op->body.accept(this);
-    } else if (size <= 4 && all_access_constant.result) {
-        // We can just use a vector variable
-        stream << print_type(op->type.with_lanes(size)) << " " << print_name(op->name) << ";\n";
-        ScopedBinding<int> p(vector_vars, op->name, 0);
-        op->body.accept(this);
-    } else {
-        stream << print_type(op->type) << " " << print_name(op->name) << "[" << size << "];\n";
-        op->body.accept(this);
-    }
-}
-
-void CodeGen_GLSL::visit(const Free *op) {
-}
-
-void CodeGen_GLSL::visit(const AssertStmt *) {
-    internal_error << "GLSL: unexpected Assertion node encountered.\n";
-}
-
-void CodeGen_GLSL::visit(const Ramp *op) {
-    ostringstream rhs;
-    rhs << print_type(op->type) << "(";
-
-    if (op->lanes > 4) {
-        internal_error << "GLSL: ramp lanes " << op->lanes << " is not supported\n";
-    }
-
-    rhs << print_expr(op->base);
-
-    for (int i = 1; i < op->lanes; ++i) {
-        rhs << ", " << print_expr(Add::make(op->base, Mul::make(i, op->stride)));
-    }
-
-    rhs << ")";
-    print_assignment(op->type, rhs.str());
-}
-
-void CodeGen_GLSL::visit(const Broadcast *op) {
-    ostringstream rhs;
-    rhs << print_type(op->type) << "(" << print_expr(op->value) << ")";
-    print_assignment(op->type, rhs.str());
-}
-
-void CodeGen_GLSL::visit(const Atomic *op) {
-    // Floating point atomics can be tricky as there are no floating point atomics
-    // operations, and GLSL does not allow converting a  floating point buffer to an
-    // integer buffer.
-    // Plus, OpenGL supports atomics starting from 4.3, but Halide doesn't distinguish
-    // between OpenGL versions yet.
-    user_assert(false) << "GLSL: atomics are not supported.\n";
-}
-
-void CodeGen_GLSL::add_kernel(const Stmt &stmt, const string &name,
-                              const vector<DeviceArgument> &args) {
-
-    // This function produces fragment shader source for the halide statement.
-    // The corresponding vertex shader will be generated by the halide opengl
-    // runtime based on the arguments passed in comments below. Host codegen
-    // outputs expressions that are evaluated at runtime to produce vertex data
-    // and varying attribute values at the vertices.
-
-    // Emit special header that declares the kernel name and its arguments.
-    // There is currently no standard way of passing information from the code
-    // generator to the runtime, and the information Halide passes to the
-    // runtime are fairly limited.  We use these special comments to know the
-    // data types of arguments and whether textures are used for input or
-    // output.
-
-    // Keep track of the number of uniform and varying attributes
-    int num_uniform_floats = 0;
-    int num_uniform_ints = 0;
-
-    // The spatial x and y coordinates are always passed in the first two
-    // varying float attribute slots
-    int num_varying_floats = 2;
-
-    ostringstream header;
-    header << "/// KERNEL " << name << "\n";
-    for (size_t i = 0; i < args.size(); i++) {
-        if (args[i].is_buffer) {
-            Type t = args[i].type.element_of();
-
-            user_assert(args[i].read != args[i].write) << "GLSL: buffers may only be read OR written inside a kernel loop.\n";
-            string type_name;
-            if (t == UInt(8)) {
-                type_name = "uint8_t";
-            } else if (t == UInt(16)) {
-                type_name = "uint16_t";
-            } else if (t == Float(32)) {
-                type_name = "float";
-            } else {
-                user_error << "GLSL: buffer " << args[i].name << " has invalid type " << t << ".\n";
-            }
-            header << "/// " << (args[i].read ? "IN_BUFFER " : "OUT_BUFFER ")
-                   << type_name << " " << print_name(args[i].name) << "\n";
-        } else if (ends_with(args[i].name, ".varying")) {
-            header << "/// VARYING "
-                   // GLSL requires that varying attributes are float. Integer
-                   // expressions for vertex attributes are cast to float during
-                   // host codegen
-                   << "float " << print_name(args[i].name) << " varyingf" << args[i].packed_index / 4 << "[" << args[i].packed_index % 4 << "]\n";
-            ++num_varying_floats;
-        } else if (args[i].type.is_float()) {
-            header << "/// UNIFORM "
-                   << CodeGen_C::print_type(args[i].type) << " "  // NOLINT: Allow call to CodeGen_C::print_type
-                   << print_name(args[i].name) << " uniformf" << args[i].packed_index / 4 << "[" << args[i].packed_index % 4 << "]\n";
-            ++num_uniform_floats;
-        } else if (args[i].type.is_int()) {
-            header << "/// UNIFORM "
-                   << CodeGen_C::print_type(args[i].type) << " "  // NOLINT: Allow call to CodeGen_C::print_type
-                   << print_name(args[i].name) << " uniformi" << args[i].packed_index / 4 << "[" << args[i].packed_index % 4 << "]\n";
-            ++num_uniform_ints;
-        }
-    }
-
-    // Compute the number of vec4's needed to pack the arguments
-    num_varying_floats = (num_varying_floats + 3) / 4;
-    num_uniform_floats = (num_uniform_floats + 3) / 4;
-    num_uniform_ints = (num_uniform_ints + 3) / 4;
-
-    stream << header.str();
-
-    // Specify default float precision when compiling for OpenGL ES.
-    // TODO: emit correct #version
-    if (is_opengl_es(target)) {
-        stream << "#ifdef GL_FRAGMENT_PRECISION_HIGH\n"
-               << "precision highp float;\n"
-               << "#endif\n";
-    }
-
-    // Declare input textures and variables
-    for (size_t i = 0; i < args.size(); i++) {
-        if (args[i].is_buffer && args[i].read) {
-            stream << "uniform sampler2D " << print_name(args[i].name) << ";\n";
-        }
-    }
-
-    for (int i = 0; i != num_varying_floats; ++i) {
-        stream << "varying vec4 _varyingf" << i << ";\n";
-    }
-
-    for (int i = 0; i != num_uniform_floats; ++i) {
-        stream << "uniform vec4 _uniformf" << i << ";\n";
-    }
-
-    for (int i = 0; i != num_uniform_ints; ++i) {
-        stream << "uniform ivec4 _uniformi" << i << ";\n";
-    }
-
-    // Output additional builtin functions.
-    stream << "float _trunc_f32(float x) {\n"
-              "  return floor(abs(x)) * sign(x);\n"
-              "}\n";
-
-    stream << "void main() {\n";
-    indent += 2;
-
-    // Unpack the uniform and varying parameters
-    for (size_t i = 0; i < args.size(); i++) {
-        if (args[i].is_buffer) {
-            continue;
-        } else if (ends_with(args[i].name, ".varying")) {
-            stream << get_indent() << "float " << print_name(args[i].name)
-                   << " = _varyingf" << args[i].packed_index / 4
-                   << "[" << args[i].packed_index % 4 << "];\n";
-        } else if (args[i].type.is_float()) {
-            stream << get_indent() << print_type(args[i].type) << " "
-                   << print_name(args[i].name)
-                   << " = _uniformf" << args[i].packed_index / 4
-                   << "[" << args[i].packed_index % 4 << "];\n";
-        } else if (args[i].type.is_int()) {
-            stream << get_indent() << print_type(args[i].type) << " "
-                   << print_name(args[i].name)
-                   << " = _uniformi" << args[i].packed_index / 4
-                   << "[" << args[i].packed_index % 4 << "];\n";
-        }
-    }
-
-    print(stmt);
-    indent -= 2;
-    stream << "}\n";
-}
-
-namespace {
-// Replace all temporary variables names like _1234 with '$'. This is done to
-// make the individual tests below self-contained.
-string normalize_temporaries(const string &s) {
-    string result;
-    for (size_t i = 0; i < s.size();) {
-        if (s[i] == '_') {
-            result += '$';
-            for (i++; i < s.size() && isdigit(s[i]); i++) {
-            }
-        } else {
-            result += s[i++];
-        }
-    }
-    return result;
-}
-
-void check(Expr e, const string &result) {
-    ostringstream source;
-    CodeGen_GLSL cg(source, Target());
-    if (e.as<FloatImm>() || e.as<IntImm>()) {
-        // Hack: CodeGen_C doesn't treat immediates like other expressions, so
-        // wrap them to obtain useful output.
-        e = Halide::print(e);
-    }
-    source.str("");
-    source.clear();
-    Evaluate::make(e).accept(&cg);
-    string src = normalize_temporaries(source.str());
-    if (!ends_with(src, result)) {
-        internal_error
-            << "Codegen failed for " << e << "\n"
-            << "  Correct source code:\n"
-            << result
-            << "  Actual source code:\n"
-            << src;
-    }
-}
-
-}  // namespace
-
-void CodeGen_GLSL::test() {
-    vector<Expr> e;
-
-    // Check that float constants are printed correctly.
-    check(1.0f, "float $ = 1.0;\n");
-    check(1.0f + std::numeric_limits<float>::epsilon(), "float $ = 1.00000012;\n");
-    check(1.19209290e-07f, "float $ = 1.1920929e-07;\n");
-    check(8388608.f, "float $ = 8388608.0;\n");
-    check(-2.1e19f, "float $ = -20999999189405401088.0;\n");
-    check(3.1415926536f, "float $ = 3.14159274;\n");
-
-    // Uint8 is embedded in GLSL floats, so no cast necessary
-    check(cast<float>(Variable::make(UInt(8), "x") * 1.0f),
-          "float $ = $x * 1.0;\n");
-    // But truncation is necessary for the reverse direction
-    check(cast<uint8_t>(Variable::make(Float(32), "x")),
-          "float $ = floor($x);\n");
-
-    check(Min::make(Expr(1), Expr(5)),
-          "float $ = min(1.0, 5.0);\n"
-          "int $ = int($);\n");
-
-    check(Max::make(Expr(1), Expr(5)),
-          "float $ = max(1.0, 5.0);\n"
-          "int $ = int($);\n");
-
-    check(Max::make(Broadcast::make(1, 4), Broadcast::make(5, 4)),
-          "vec4 $ = vec4(1.0);\n"
-          "vec4 $ = vec4(5.0);\n"
-          "vec4 $ = max($, $);\n"
-          "ivec4 $ = ivec4($);\n");
-
-    check(Variable::make(Int(32), "x") / Expr(3),
-          "float $ = float($x);\n"
-          "float $ = $ * 0.333333343;\n"
-          "float $ = $ + 0.166666672;\n"
-          "float $ = floor($);\n"
-          "int $ = int($);\n");
-    // check(Variable::make(Int(32, 4), "x") / Variable::make(Int(32, 4), "y"),
-    //       "vec4 $ = vec4($x);\n"
-    //       "vec4 $ = vec4($y);\n"
-    //       "vec4 $ = $ / $;\n"
-    //       "vec4 $ = floor($);\n"
-    //       "ivec4 $ = ivec4($);\n");
-    check(Variable::make(Float(32, 4), "x") / Variable::make(Float(32, 4), "y"),
-          "vec4 $ = $x / $y;\n");
-
-    // Integer lerp with integer weight
-    check(lerp(cast<uint8_t>(0), cast<uint8_t>(255), cast<uint8_t>(127)),
-          "float $ = mix(0.0, 255.0, 0.498039216);\n"
-          "float $ = $ + 0.5;\n"
-          "float $ = floor($);\n");
-
-    // Integer lerp with float weight
-    check(lerp(cast<uint8_t>(0), cast<uint8_t>(255), 0.3f),
-          "float $ = mix(0.0, 255.0, 0.298039228);\n"
-          "float $ = $ + 0.5;\n"
-          "float $ = floor($);\n");
-
-    // Floating point lerp
-    check(lerp(0.0f, 1.0f, 0.3f),
-          "float $ = mix(0.0, 1.0, 0.300000012);\n");
-
-    // Vectorized lerp
-    check(lerp(Variable::make(Float(32, 4), "x"), Variable::make(Float(32, 4), "y"), Broadcast::make(0.25f, 4)),
-          "vec4 $ = vec4(0.25);\n"
-          "vec4 $ = mix($x, $y, $);\n");
-
-    // Sin with scalar arg
-    check(sin(3.0f), "float $ = sin(3.0);\n");
-
-    // Sin with vector arg
-    check(Call::make(Float(32, 4), "sin_f32", {Broadcast::make(1.f, 4)}, Internal::Call::Extern),
-          "vec4 $ = vec4(1.0);\n"
-          "vec4 $ = sin($);\n");
-
-    // use float version of abs in GLSL
-    check(abs(Variable::make(Int(32), "x")),
-          "float $ = float($x);\n"
-          "float $ = abs($);\n"
-          "int $ = int($);\n");
-
-    check(Halide::print(3.0f), "float $ = 3.0;\n");
-
-    // Test rounding behavior of integer division.
-    // The latest version of integer division is too complicated to list here
-    // check(Variable::make(Int(32), "x") / Variable::make(Int(32), "y"),
-    //       "float $ = float($x);\n"
-    //       "float $ = float($y);\n"
-    //       "float $ = $ / $;\n"
-    //       "float $ = floor($);\n"
-    //       "int $ = int($);\n");
-
-    // Select with scalar condition
-    check(Select::make(EQ::make(Variable::make(Float(32), "x"), 1.0f),
-                       Broadcast::make(1.f, 4),
-                       Broadcast::make(2.f, 4)),
-          "vec4 $;\n"
-          "bool $ = $x == 1.0;\n"
-          "if ($) {\n"
-          " vec4 $ = vec4(1.0);\n"
-          " $ = $;\n"
-          "}\n"
-          "else {\n"
-          " vec4 $ = vec4(2.0);\n"
-          " $ = $;\n"
-          "}\n");
-
-    // Select with vector condition
-    check(Select::make(EQ::make(Ramp::make(-1, 1, 4), Broadcast::make(0, 4)),
-                       Broadcast::make(1.f, 4),
-                       Broadcast::make(2.f, 4)),
-          "vec4 $ = vec4(2.0, 1.0, 2.0, 2.0);\n");
-
-    check(log(1.0f), "float $ = log(1.0);\n");
-    check(exp(1.0f), "float $ = exp(1.0);\n");
-
-    // Integer powers are expanded
-    check(pow(1.4f, 2), "float $ = 1.39999998 * 1.39999998;\n");
-    check(pow(1.0f, 2.1f), "float $ = pow(1.0, 2.0999999);\n");
-
-    std::cout << "CodeGen_GLSL test Success!\n";
-}
-
-}  // namespace Internal
-}  // namespace Halide
diff --git a/src/CodeGen_OpenGL_Dev.h b/src/CodeGen_OpenGL_Dev.h
deleted file mode 100644
index b180b5e0ef12..000000000000
--- a/src/CodeGen_OpenGL_Dev.h
+++ /dev/null
@@ -1,155 +0,0 @@
-#ifndef HALIDE_CODEGEN_OPENGL_DEV_H
-#define HALIDE_CODEGEN_OPENGL_DEV_H
-
-/** \file
- * Defines the code-generator for producing GLSL kernel code
- */
-
-#include <map>
-#include <set>
-#include <sstream>
-
-#include "CodeGen_C.h"
-#include "CodeGen_GPU_Dev.h"
-#include "Target.h"
-
-namespace Halide {
-namespace Internal {
-
-class CodeGen_GLSL;
-
-class CodeGen_OpenGL_Dev : public CodeGen_GPU_Dev {
-public:
-    CodeGen_OpenGL_Dev(const Target &target);
-    ~CodeGen_OpenGL_Dev() override;
-
-    // CodeGen_GPU_Dev interface
-    void add_kernel(Stmt stmt, const std::string &name,
-                    const std::vector<DeviceArgument> &args) override;
-
-    void init_module() override;
-
-    std::vector<char> compile_to_src() override;
-
-    std::string get_current_kernel_name() override;
-
-    void dump() override;
-
-    std::string api_unique_name() override {
-        return "opengl";
-    }
-
-private:
-    CodeGen_GLSL *glc;
-
-    std::string print_gpu_name(const std::string &name) override;
-
-    std::ostringstream src_stream;
-    std::string cur_kernel_name;
-    Target target;
-};
-
-/**
-  * This class handles GLSL arithmetic, shared by CodeGen_GLSL and CodeGen_OpenGLCompute_C.
-  */
-class CodeGen_GLSLBase : public CodeGen_C {
-public:
-    CodeGen_GLSLBase(std::ostream &s, Target t);
-
-    std::string print_name(const std::string &name) override;
-    std::string print_type(Type type, AppendSpaceIfNeeded space_option = DoNotAppendSpace) override;
-
-protected:
-    using CodeGen_C::visit;
-
-    void visit(const Cast *) override;
-
-    void visit(const FloatImm *) override;
-    void visit(const UIntImm *) override;
-    void visit(const IntImm *) override;
-
-    void visit(const Max *op) override;
-    void visit(const Min *op) override;
-    void visit(const Call *op) override;
-
-    void visit(const Mod *) override;
-
-    // these have specific functions
-    // in GLSL that operate on vectors
-    void visit(const EQ *) override;
-    void visit(const NE *) override;
-    void visit(const LT *) override;
-    void visit(const LE *) override;
-    void visit(const GT *) override;
-    void visit(const GE *) override;
-
-    void visit(const Shuffle *) override;
-
-    Type map_type(const Type &);
-
-    std::map<std::string, std::string> builtin;
-
-    // empty for GL 3.x and GLCompute which do not care about this (due to implicit conversion)
-    // while GL 2.0 only support a small subset of builtin functions with ivec arguments
-    std::set<std::string> support_non_float_type_builtin;
-
-    // true for GL 3.x (GLSL >= 130 or ESSL >= 300) and GLCompute
-    // false for GL 2.x which does not support uint/uvec
-    bool support_native_uint = true;
-
-    // true for GL 2.1 and 3.x (GLSL == 120, >= 130) and GLCompute
-    // true for GL ES 3.1 with EXT_shader_implicit_conversions
-    // false for GL 2.0 and GL ES 3.0
-    bool support_int_to_float_implicit_conversion = true;
-
-    // it seems that only GLSL ES implicitly does not support rounding of integer division
-    // while GLSL specification does not talk about this issue
-    // see GLSL ES Specification 1.00, issues 10.28, Rounding of Integer Division
-    // see GLSL ES Specification 3.00, issues 12.33, Rounding of Integer Division
-    bool support_integer_division_rounding = true;
-};
-
-/** Compile one statement into GLSL. */
-class CodeGen_GLSL : public CodeGen_GLSLBase {
-public:
-    CodeGen_GLSL(std::ostream &s, const Target &t);
-
-    void add_kernel(const Stmt &stmt,
-                    const std::string &name,
-                    const std::vector<DeviceArgument> &args);
-
-    static void test();
-
-protected:
-    using CodeGen_GLSLBase::visit;
-
-    void visit(const Div *) override;
-
-    void visit(const Let *) override;
-    void visit(const For *) override;
-    void visit(const Select *) override;
-
-    void visit(const Load *) override;
-    void visit(const Store *) override;
-    void visit(const Allocate *) override;
-    void visit(const Free *) override;
-
-    void visit(const AssertStmt *) override;
-    void visit(const Ramp *op) override;
-    void visit(const Broadcast *) override;
-
-    void visit(const Evaluate *) override;
-    void visit(const Atomic *) override;
-
-private:
-    std::string get_vector_suffix(const Expr &e);
-
-    std::vector<std::string> print_lanes(const Expr &expr);
-
-    Scope<int> scalar_vars, vector_vars;
-};
-
-}  // namespace Internal
-}  // namespace Halide
-
-#endif