diff --git a/.github/workflows/presubmit.yml b/.github/workflows/presubmit.yml
index e7d50e4d2891..ec763c9b0935 100644
--- a/.github/workflows/presubmit.yml
+++ b/.github/workflows/presubmit.yml
@@ -49,6 +49,5 @@ jobs:
           (cd test/error               && comm -23 <(ls *.{c,cpp} | sort) <(grep -P '^\s*#?\s*[A-Za-z0-9_.]+$' CMakeLists.txt | tr -d '# ' | sort) | tee missing_files && [ ! -s missing_files ])
           (cd test/generator           && comm -23 <(ls *.{c,cpp} | sort) <(grep -P '^\s*#?\s*[A-Za-z0-9_.]+$' CMakeLists.txt | tr -d '# ' | sort) | tee missing_files && [ ! -s missing_files ])
           (cd test/failing_with_issue  && comm -23 <(ls *.{c,cpp} | sort) <(grep -P '^\s*#?\s*[A-Za-z0-9_.]+$' CMakeLists.txt | tr -d '# ' | sort) | tee missing_files && [ ! -s missing_files ])
-          (cd test/opengl              && comm -23 <(ls *.{c,cpp} | sort) <(grep -P '^\s*#?\s*[A-Za-z0-9_.]+$' CMakeLists.txt | tr -d '# ' | sort) | tee missing_files && [ ! -s missing_files ])
           (cd test/performance         && comm -23 <(ls *.{c,cpp} | sort) <(grep -P '^\s*#?\s*[A-Za-z0-9_.]+$' CMakeLists.txt | tr -d '# ' | sort) | tee missing_files && [ ! -s missing_files ])
           (cd test/warning             && comm -23 <(ls *.{c,cpp} | sort) <(grep -P '^\s*#?\s*[A-Za-z0-9_.]+$' CMakeLists.txt | tr -d '# ' | sort) | tee missing_files && [ ! -s missing_files ])
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 9e288dd687d9..f7e1dbea8c97 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -148,12 +148,6 @@ jobs:
           libpng-dev \
           ninja-build
 
-        # TODO(srj): OpenGL is only needed to build the opengl tests (which we don't even run)...
-        sudo apt-get install \
-          freeglut3-dev \
-          libglu1-mesa-dev \
-          mesa-common-dev
-
     - name: Configure MacOS Host
       if: startsWith(matrix.host_os, 'macos')
       shell: bash
@@ -193,12 +187,6 @@ jobs:
           libjpeg-dev:i386 \
           libpng-dev:i386 \
 
-        # TODO(srj): OpenGL is only needed to build the opengl tests (which we don't even run)...
-        sudo apt-get install \
-          freeglut3-dev:i386 \
-          libglu1-mesa-dev:i386 \
-          mesa-common-dev:i386
-
     - name: Configure Arm32 Crosscompilation
       if: matrix.target_os == 'linux' && matrix.target_arch == 'arm' && matrix.target_bits == 32
       shell: bash
@@ -449,9 +437,8 @@ jobs:
         TEST_GROUPS_SERIAL="tutorial"
 
         # performance is never going to be reliable on VMs.
-        # opengl won't work on the buildbots.
         # auto_schedule is just flaky.
-        TEST_GROUPS_BROKEN="performance opengl auto_schedule"
+        TEST_GROUPS_BROKEN="performance auto_schedule"
 
         if [[ ${{matrix.target_bits}} == 32 ]]; then
           # TODO: Skip testing apps on 32-bit systems for now;
@@ -487,9 +474,8 @@ jobs:
         TEST_GROUPS_SERIAL="tutorial"
 
         # performance is never going to be reliable on VMs.
-        # opengl won't work on the buildbots.
         # auto_schedule is just flaky.
-        TEST_GROUPS_BROKEN="performance|opengl|auto_schedule"
+        TEST_GROUPS_BROKEN="performance|auto_schedule"
 
         export TEST_TMPDIR="${HALIDE_TEMP_DIR}"
         cd ${HALIDE_BUILD_DIR}
diff --git a/Makefile b/Makefile
index 8ad0a3970a0a..921f8e388e3b 100644
--- a/Makefile
+++ b/Makefile
@@ -466,7 +466,6 @@ SOURCE_FILES = \
   ImageParam.cpp \
   InferArguments.cpp \
   InjectHostDevBufferCopies.cpp \
-  InjectOpenGLIntrinsics.cpp \
   Inline.cpp \
   InlineReductions.cpp \
   IntegerDivisionTable.cpp \
@@ -560,7 +559,6 @@ SOURCE_FILES = \
   UnsafePromises.cpp \
   Util.cpp \
   Var.cpp \
-  VaryingAttributes.cpp \
   VectorizeLoops.cpp \
   WasmExecutor.cpp \
   WrapCalls.cpp
@@ -645,7 +643,6 @@ HEADER_FILES = \
   ImageParam.h \
   InferArguments.h \
   InjectHostDevBufferCopies.h \
-  InjectOpenGLIntrinsics.h \
   Inline.h \
   InlineReductions.h \
   IntegerDivisionTable.h \
@@ -728,7 +725,6 @@ HEADER_FILES = \
   UnsafePromises.h \
   Util.h \
   Var.h \
-  VaryingAttributes.h \
   VectorizeLoops.h \
   WrapCalls.h
 
@@ -779,7 +775,6 @@ RUNTIME_CPP_COMPONENTS = \
   msan \
   msan_stubs \
   opencl \
-  opengl \
   openglcompute \
   opengl_egl_context \
   opengl_glx_context \
@@ -851,7 +846,6 @@ RUNTIME_EXPORTED_INCLUDES = $(INCLUDE_DIR)/HalideRuntime.h \
                             $(INCLUDE_DIR)/HalideRuntimeHexagonDma.h \
                             $(INCLUDE_DIR)/HalideRuntimeHexagonHost.h \
                             $(INCLUDE_DIR)/HalideRuntimeOpenCL.h \
-                            $(INCLUDE_DIR)/HalideRuntimeOpenGL.h \
                             $(INCLUDE_DIR)/HalideRuntimeOpenGLCompute.h \
                             $(INCLUDE_DIR)/HalideRuntimeMetal.h	\
                             $(INCLUDE_DIR)/HalideRuntimeQurt.h \
@@ -1110,14 +1104,11 @@ CORRECTNESS_TESTS = $(shell ls $(ROOT_DIR)/test/correctness/*.cpp) $(shell ls $(
 PERFORMANCE_TESTS = $(shell ls $(ROOT_DIR)/test/performance/*.cpp)
 ERROR_TESTS = $(shell ls $(ROOT_DIR)/test/error/*.cpp)
 WARNING_TESTS = $(shell ls $(ROOT_DIR)/test/warning/*.cpp)
-OPENGL_TESTS := $(shell ls $(ROOT_DIR)/test/opengl/*.cpp)
 GENERATOR_EXTERNAL_TESTS := $(shell ls $(ROOT_DIR)/test/generator/*test.cpp)
 GENERATOR_EXTERNAL_TEST_GENERATOR := $(shell ls $(ROOT_DIR)/test/generator/*_generator.cpp)
 TUTORIALS = $(filter-out %_generate.cpp, $(shell ls $(ROOT_DIR)/tutorial/*.cpp))
 AUTO_SCHEDULE_TESTS = $(shell ls $(ROOT_DIR)/test/auto_schedule/*.cpp)
 
--include $(OPENGL_TESTS:$(ROOT_DIR)/test/opengl/%.cpp=$(BUILD_DIR)/test_opengl_%.d)
-
 test_correctness: $(CORRECTNESS_TESTS:$(ROOT_DIR)/test/correctness/%.cpp=quiet_correctness_%) $(CORRECTNESS_TESTS:$(ROOT_DIR)/test/correctness/%.c=quiet_correctness_%)
 test_performance: $(PERFORMANCE_TESTS:$(ROOT_DIR)/test/performance/%.cpp=performance_%)
 test_error: $(ERROR_TESTS:$(ROOT_DIR)/test/error/%.cpp=error_%)
@@ -1125,7 +1116,6 @@ test_warning: $(WARNING_TESTS:$(ROOT_DIR)/test/warning/%.cpp=warning_%)
 test_tutorial: $(TUTORIALS:$(ROOT_DIR)/tutorial/%.cpp=tutorial_%)
 test_valgrind: $(CORRECTNESS_TESTS:$(ROOT_DIR)/test/correctness/%.cpp=valgrind_%)
 test_avx512: $(CORRECTNESS_TESTS:$(ROOT_DIR)/test/correctness/%.cpp=avx512_%)
-test_opengl: $(OPENGL_TESTS:$(ROOT_DIR)/test/opengl/%.cpp=opengl_%)
 test_auto_schedule: test_mullapudi2016 test_li2018 test_adams2019
 
 .PHONY: test_correctness_multi_gpu
@@ -1230,7 +1220,6 @@ ALL_TESTS = test_internal test_correctness test_error test_tutorial test_warning
 # For generator tests they time the compile time only. The times are recorded in CSV files.
 time_compilation_correctness: init_time_compilation_correctness $(CORRECTNESS_TESTS:$(ROOT_DIR)/test/correctness/%.cpp=time_compilation_test_%)
 time_compilation_performance: init_time_compilation_performance $(PERFORMANCE_TESTS:$(ROOT_DIR)/test/performance/%.cpp=time_compilation_performance_%)
-time_compilation_opengl: init_time_compilation_opengl $(OPENGL_TESTS:$(ROOT_DIR)/test/opengl/%.cpp=time_compilation_opengl_%)
 time_compilation_generator: init_time_compilation_generator $(GENERATOR_TESTS:$(ROOT_DIR)/test/generator/%_aottest.cpp=time_compilation_generator_%)
 
 init_time_compilation_%:
@@ -1250,14 +1239,6 @@ build_tests: $(CORRECTNESS_TESTS:$(ROOT_DIR)/test/correctness/%.cpp=$(BIN_DIR)/c
 	$(GENERATOR_EXTERNAL_TESTS:$(ROOT_DIR)/test/generator/%_jittest.cpp=$(BIN_DIR)/generator_jit_%) \
 	$(AUTO_SCHEDULE_TESTS:$(ROOT_DIR)/test/auto_schedule/%.cpp=$(BIN_DIR)/auto_schedule_%)
 
-# OpenGL doesn't build on every host platform we support (eg. ARM).
-.PHONY: build_opengl_tests
-build_opengl_tests: $(OPENGL_TESTS:$(ROOT_DIR)/test/opengl/%.cpp=$(BIN_DIR)/opengl_%)
-
-ifneq ($(WITH_OPENGL),)
-build_tests: build_opengl_tests
-endif
-
 clean_generator:
 	rm -rf $(BIN_DIR)/*.generator
 	rm -rf $(BIN_DIR)/*/runtime.a
@@ -1321,9 +1302,6 @@ $(BIN_DIR)/error_%: $(ROOT_DIR)/test/error/%.cpp $(BIN_DIR)/libHalide.$(SHARED_E
 $(BIN_DIR)/warning_%: $(ROOT_DIR)/test/warning/%.cpp $(BIN_DIR)/libHalide.$(SHARED_EXT) $(INCLUDE_DIR)/Halide.h
 	$(CXX) $(TEST_CXX_FLAGS) $(OPTIMIZE_FOR_BUILD_TIME) $< -I$(INCLUDE_DIR) $(TEST_LD_FLAGS) -o $@
 
-$(BIN_DIR)/opengl_%: $(ROOT_DIR)/test/opengl/%.cpp $(BIN_DIR)/libHalide.$(SHARED_EXT) $(INCLUDE_DIR)/Halide.h $(INCLUDE_DIR)/HalideRuntime.h $(INCLUDE_DIR)/HalideRuntimeOpenGL.h
-	$(CXX) $(TEST_CXX_FLAGS) $(OPTIMIZE_FOR_BUILD_TIME) $< -I$(INCLUDE_DIR) -I$(SRC_DIR) $(TEST_LD_FLAGS) $(OPENGL_LD_FLAGS) -o $@ -MMD -MF $(BUILD_DIR)/test_opengl_$*.d
-
 # Auto schedule tests that link against libHalide
 $(BIN_DIR)/auto_schedule_%: $(ROOT_DIR)/test/auto_schedule/%.cpp $(BIN_DIR)/libHalide.$(SHARED_EXT) $(INCLUDE_DIR)/Halide.h
 	$(CXX) $(TEST_CXX_FLAGS) $(OPTIMIZE_FOR_BUILD_TIME) $< -I$(INCLUDE_DIR) $(TEST_LD_FLAGS) -o $@
@@ -1874,11 +1852,6 @@ warning_%: $(BIN_DIR)/warning_%
 	cd $(TMP_DIR) ; $(CURDIR)/$< 2>&1 | egrep --q "^Warning"
 	@-echo
 
-opengl_%: $(BIN_DIR)/opengl_%
-	@-mkdir -p $(TMP_DIR)
-	cd $(TMP_DIR) ; $(CURDIR)/$< 2>&1
-	@-echo
-
 generator_jit_%: $(BIN_DIR)/generator_jit_%
 	@-mkdir -p $(TMP_DIR)
 	cd $(TMP_DIR) ; $(CURDIR)/$<
@@ -1928,9 +1901,6 @@ time_compilation_test_%: $(BIN_DIR)/test_%
 time_compilation_performance_%: $(BIN_DIR)/performance_%
 	$(TIME_COMPILATION) compile_times_performance.csv make -f $(THIS_MAKEFILE) $(@:time_compilation_performance_%=performance_%)
 
-time_compilation_opengl_%: $(BIN_DIR)/opengl_%
-	$(TIME_COMPILATION) compile_times_opengl.csv make -f $(THIS_MAKEFILE) $(@:time_compilation_opengl_%=opengl_%)
-
 time_compilation_generator_%: $(BIN_DIR)/%.generator
 	$(TIME_COMPILATION) compile_times_generator.csv make -f $(THIS_MAKEFILE) $(@:time_compilation_generator_%=$(FILTERS_DIR)/%.a)
 
diff --git a/README.md b/README.md
index 6fe672968347..b80441df5535 100644
--- a/README.md
+++ b/README.md
@@ -6,7 +6,7 @@ currently targets:
 
 - CPU architectures: X86, ARM, MIPS, Hexagon, PowerPC
 - Operating systems: Linux, Windows, Mac OS X, Android, iOS, Qualcomm QuRT
-- GPU Compute APIs: CUDA, OpenCL, OpenGL, OpenGL Compute Shaders, Apple Metal,
+- GPU Compute APIs: CUDA, OpenCL, OpenGL Compute Shaders, Apple Metal,
   Microsoft Direct X 12
 
 Rather than being a standalone programming language, Halide is embedded in C++.
@@ -336,140 +336,7 @@ an older XCode which does not default to libc++.
 
 # Halide OpenGL/GLSL backend
 
-Halide's OpenGL backend offloads image processing operations to the GPU by
-generating GLSL-based fragment shaders.
-
-Compared to other GPU-based processing options such as CUDA and OpenCL, OpenGL
-has two main advantages: it is available on basically every desktop computer and
-mobile device, and it is generally well supported across different hardware
-vendors.
-
-The main disadvantage of OpenGL as an image processing framework is that the
-computational capabilities of fragment shaders are quite restricted. In general,
-the processing model provided by OpenGL is most suitable for filters where each
-output pixel can be expressed as a simple function of the input pixels. This
-covers a wide range of interesting operations like point-wise filters and
-convolutions; but a few common image processing operations such as histograms or
-recursive filters are notoriously hard to express in GLSL.
-
-#### Writing OpenGL-Based Filters
-
-To enable code generation for OpenGL, include `opengl` in the target specifier
-passed to Halide. Since OpenGL shaders are limited in their computational power,
-you must also specify a CPU target for those parts of the filter that cannot or
-should not be computed on the GPU. Examples of valid target specifiers are
-
-```
-host-opengl
-x86-opengl-debug
-```
-
-Adding `debug`, as in the second example, adds additional logging output and is
-highly recommended during development.
-
-By default, filters compiled for OpenGL targets run completely on the CPU.
-Execution on the GPU must be enabled for individual Funcs by appropriate
-scheduling calls.
-
-GLSL fragment shaders implicitly iterate over two spatial dimensions x,y and the
-color channel. Due to the way color channels handled in GLSL, only filters for
-which the color index is a compile-time constant can be scheduled. The main
-consequence is that the range of color variables must be explicitly specified
-for both input and output buffers before scheduling:
-
-```
-ImageParam input;
-Func f;
-Var x, y, c;
-f(x, y, c) = ...;
-
-input.set_bounds(2, 0, 3);   // specify color range for input
-f.bound(c, 0, 3);            // and output
-f.glsl(x, y, c);
-```
-
-#### JIT Compilation
-
-For JIT compilation Halide attempts to load the system libraries for opengl and
-creates a new context to use for each module. Windows is not yet supported.
-
-Examples for JIT execution of OpenGL-based filters can be found in test/opengl.
-
-#### AOT Compilation
-
-When AOT (ahead-of-time) compilation is used, Halide generates OpenGL-enabled
-object files that can be linked to and called from a host application. In
-general, this is fairly straightforward, but a few things must be taken care of.
-
-On Linux, OS X, and Android, Halide creates its own OpenGL context unless the
-current thread already has an active context. On other platforms you have to
-link implementations of the following two functions with your Halide code:
-
-```
-extern "C" int halide_opengl_create_context(void *) {
-    return 0;  // if successful
-}
-
-extern "C" void *halide_opengl_get_proc_addr(void *, const char *name) {
-    ...
-}
-```
-
-Halide allocates and deletes textures as necessary. Applications may manage the
-textures by hand by setting the `halide_buffer_t::device` field; this is most
-useful for reusing image data that is already stored in textures. Some
-rudimentary checks are performed to ensure that externally allocated textures
-have the correct format, but in general that's the responsibility of the
-application.
-
-It is possible to let render directly to the current framebuffer; to do this,
-set the `dev` field of the output buffer to the value returned by
-`halide_opengl_output_client_bound`. The example in apps/HelloAndroidGL
-demonstrates this technique.
-
-Some operating systems can delete the OpenGL context of suspended applications.
-If this happens, Halide needs to re-initialize itself with the new context after
-the application resumes. Call `halide_opengl_context_lost` to reset Halide's
-OpenGL state after this has happened.
-
-#### Limitations
-
-The current implementation of the OpenGL backend targets the common subset of
-OpenGL 2.0 and OpenGL ES 2.0 which is widely available on both mobile devices
-and traditional computers. As a consequence, only a subset of the Halide
-language can be scheduled to run using OpenGL. Some important limitations are:
-
-- Reductions cannot be implemented in GLSL and must be run on the CPU.
-
-- OpenGL ES 2.0 only supports uint8 buffers.
-
-  Support for floating point texture is available, but requires OpenGL (ES) 3.0
-  or the texture_float extension, which may not work on all mobile devices.
-
-- OpenGL ES 2.0 has very limited support for integer arithmetic. For maximum
-  compatibility, consider doing all computations using floating point, even when
-  using integer textures.
-
-- Only 2D images with 3 or 4 color channels can be scheduled. Images with one or
-  two channels require OpenGL (ES) 3.0 or the texture_rg extension.
-
-- Not all builtin functions provided by Halide are currently supported, for
-  example `fast_log`, `fast_exp`, `fast_pow`, `reinterpret`, bit operations,
-  `random_float`, `random_int` cannot be used in GLSL code.
-
-The maximum texture size in OpenGL is `GL_MAX_TEXTURE_SIZE`, which is often
-smaller than the image of interest; on mobile devices, for example,
-`GL_MAX_TEXTURE_SIZE` is commonly 2048. Tiling must be used to process larger
-images.
-
-Planned features:
-
-- Support for half-float textures and arithmetic
-
-- Support for integer textures and arithmetic
-
-(Note that OpenGL Compute Shaders are supported with a separate OpenGLCompute
-backend.)
+TODO(https://github.com/halide/Halide/issues/5633): update this for OpenGLCompute, which is staying
 
 # Halide for Hexagon HVX
 
diff --git a/README_cmake.md b/README_cmake.md
index d078c41775b0..421d93007bee 100644
--- a/README_cmake.md
+++ b/README_cmake.md
@@ -392,7 +392,6 @@ apply when `WITH_TESTS=ON`:
 | `WITH_TEST_ERROR`         | `ON`    | enable the expected-error tests   |
 | `WITH_TEST_WARNING`       | `ON`    | enable the expected-warning tests |
 | `WITH_TEST_PERFORMANCE`   | `ON`    | enable performance testing        |
-| `WITH_TEST_OPENGL`        | `OFF`   | enable the OpenGL tests           |
 | `WITH_TEST_GENERATOR`     | `ON`    | enable the AOT generator tests    |
 
 The following options enable/disable various LLVM backends (they correspond to
@@ -416,7 +415,6 @@ The following options enable/disable various Halide-specific backends:
 | Option                | Default | Description                            |
 | --------------------- | ------- | -------------------------------------- |
 | `TARGET_OPENCL`       | `ON`    | Enable the OpenCL-C backend            |
-| `TARGET_OPENGL`       | `ON`    | Enable the OpenGL/GLSL backend         |
 | `TARGET_METAL`        | `ON`    | Enable the Metal backend               |
 | `TARGET_D3D12COMPUTE` | `ON`    | Enable the Direct3D 12 Compute backend |
 
@@ -466,6 +464,8 @@ If the CMake version is lower than 3.18, the deprecated [`FindCUDA`][findcuda]
 module will be used instead. It reads the variable `CUDA_TOOLKIT_ROOT_DIR`
 instead of `CUDAToolkit_ROOT` above.
 
+TODO(https://github.com/halide/Halide/issues/5633): update this section for OpenGLCompute, which needs some (but maybe not all) of this.
+
 When targeting OpenGL, the [`FindOpenGL`][findopengl] and [`FindX11`][findx11]
 modules will be used to link AOT generated binaries. These modules can be
 overridden by setting the following variables:
diff --git a/apps/CMakeLists.txt b/apps/CMakeLists.txt
index 7e0ed08e4763..3effa0221125 100644
--- a/apps/CMakeLists.txt
+++ b/apps/CMakeLists.txt
@@ -9,7 +9,6 @@ enable_testing()
 
 # add_subdirectory(HelloAndroid)  # TODO(#5374): missing CMake build
 # add_subdirectory(HelloAndroidCamera2)  # TODO(#5374): missing CMake build
-# add_subdirectory(HelloAndroidGL)  # TODO(#5374): missing CMake build
 # add_subdirectory(HelloMatlab)  # TODO(#5374): missing CMake build
 # add_subdirectory(HelloPyTorch)  # TODO(#5374): missing CMake build
 # add_subdirectory(HelloWasm)  # TODO(#5374): missing CMake build
@@ -24,7 +23,6 @@ add_subdirectory(conv_layer)
 add_subdirectory(cuda_mat_mul)
 add_subdirectory(depthwise_separable_conv)
 add_subdirectory(fft)
-add_subdirectory(glsl)
 add_subdirectory(harris)
 # add_subdirectory(hexagon_benchmarks)  # TODO(#5374): missing CMake build
 # add_subdirectory(hexagon_dma)  # TODO(#5374): missing CMake build
@@ -39,7 +37,6 @@ add_subdirectory(max_filter)
 add_subdirectory(nl_means)
 # add_subdirectory(nn_ops)  # TODO(#5374): missing CMake build
 # add_subdirectory(onnx)  # TODO(#5374): missing CMake build
-# add_subdirectory(opengl_demo)  # TODO(#5374): missing CMake build
 # add_subdirectory(openglcompute)  # TODO(#5374): missing CMake build
 add_subdirectory(resize)
 # add_subdirectory(resnet_50)  # TODO(#5374): missing CMake build
diff --git a/apps/HelloAndroidGL/AndroidManifest.xml b/apps/HelloAndroidGL/AndroidManifest.xml
deleted file mode 100644
index de292f319f7f..000000000000
--- a/apps/HelloAndroidGL/AndroidManifest.xml
+++ /dev/null
@@ -1,21 +0,0 @@
-<?xml version="1.0" encoding="utf-8"?>
-<manifest xmlns:android="http://schemas.android.com/apk/res/android"
-          package="org.halide_lang.hellohalidegl"
-          android:versionCode="1"
-          android:versionName="1.0">
-  <application android:label="@string/app_name" android:icon="@drawable/ic_launcher">
-    <activity android:name=".HelloHalideGL"
-              android:label="@string/app_name"
-              android:screenOrientation="landscape"
-              android:theme="@android:style/Theme.Black.NoTitleBar.Fullscreen">
-
-      <intent-filter>
-        <action android:name="android.intent.action.MAIN" />
-        <category android:name="android.intent.category.LAUNCHER" />
-      </intent-filter>
-    </activity>
-  </application>
-
-  <uses-sdk android:minSdkVersion="17" />
-  <uses-feature android:glEsVersion="0x00020000" android:required="true" />
-</manifest>
diff --git a/apps/HelloAndroidGL/ant.properties b/apps/HelloAndroidGL/ant.properties
deleted file mode 100644
index b0971e891efd..000000000000
--- a/apps/HelloAndroidGL/ant.properties
+++ /dev/null
@@ -1,17 +0,0 @@
-# This file is used to override default values used by the Ant build system.
-#
-# This file must be checked into Version Control Systems, as it is
-# integral to the build system of your project.
-
-# This file is only used by the Ant script.
-
-# You can use this to override default values such as
-#  'source.dir' for the location of your java source folder and
-#  'out.dir' for the location of your output folder.
-
-# You can also use it define how the release builds are signed by declaring
-# the following properties:
-#  'key.store' for the location of your keystore and
-#  'key.alias' for the name of the key to use.
-# The password will be asked during the build when you use the 'release' target.
-
diff --git a/apps/HelloAndroidGL/build.sh b/apps/HelloAndroidGL/build.sh
deleted file mode 100755
index d9b1f395dc12..000000000000
--- a/apps/HelloAndroidGL/build.sh
+++ /dev/null
@@ -1,12 +0,0 @@
-#!/bin/bash
-set -e
-android update project -p . --target android-17
-cd jni
-c++ -std=c++11 halide_gl_filter.cpp -L ../../../bin -lHalide -I ../../../include -ldl -lpthread -lz
-HL_TARGET=arm-32-android-opengl-debug DYLD_LIBRARY_PATH=../../../bin LD_LIBRARY_PATH=../../../bin ./a.out
-cd ..
-pwd
-ndk-build
-ant debug
-adb install -r bin/HelloAndroidGL-debug.apk
-adb logcat
diff --git a/apps/HelloAndroidGL/build.xml b/apps/HelloAndroidGL/build.xml
deleted file mode 100644
index 1e79c7ee52fa..000000000000
--- a/apps/HelloAndroidGL/build.xml
+++ /dev/null
@@ -1,92 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<project name="HelloAndroidGL" default="help">
-
-    <!-- The local.properties file is created and updated by the 'android' tool.
-         It contains the path to the SDK. It should *NOT* be checked into
-         Version Control Systems. -->
-    <property file="local.properties" />
-
-    <!-- The ant.properties file can be created by you. It is only edited by the
-         'android' tool to add properties to it.
-         This is the place to change some Ant specific build properties.
-         Here are some properties you may want to change/update:
-
-         source.dir
-             The name of the source directory. Default is 'src'.
-         out.dir
-             The name of the output directory. Default is 'bin'.
-
-         For other overridable properties, look at the beginning of the rules
-         files in the SDK, at tools/ant/build.xml
-
-         Properties related to the SDK location or the project target should
-         be updated using the 'android' tool with the 'update' action.
-
-         This file is an integral part of the build system for your
-         application and should be checked into Version Control Systems.
-
-         -->
-    <property file="ant.properties" />
-
-    <!-- if sdk.dir was not set from one of the property file, then
-         get it from the ANDROID_HOME env var.
-         This must be done before we load project.properties since
-         the proguard config can use sdk.dir -->
-    <property environment="env" />
-    <condition property="sdk.dir" value="${env.ANDROID_HOME}">
-        <isset property="env.ANDROID_HOME" />
-    </condition>
-
-    <!-- The project.properties file is created and updated by the 'android'
-         tool, as well as ADT.
-
-         This contains project specific properties such as project target, and library
-         dependencies. Lower level build properties are stored in ant.properties
-         (or in .classpath for Eclipse projects).
-
-         This file is an integral part of the build system for your
-         application and should be checked into Version Control Systems. -->
-    <loadproperties srcFile="project.properties" />
-
-    <!-- quick check on sdk.dir -->
-    <fail
-            message="sdk.dir is missing. Make sure to generate local.properties using 'android update project' or to inject it through the ANDROID_HOME environment variable."
-            unless="sdk.dir"
-    />
-
-    <!--
-        Import per project custom build rules if present at the root of the project.
-        This is the place to put custom intermediary targets such as:
-            -pre-build
-            -pre-compile
-            -post-compile (This is typically used for code obfuscation.
-                           Compiled code location: ${out.classes.absolute.dir}
-                           If this is not done in place, override ${out.dex.input.absolute.dir})
-            -post-package
-            -post-build
-            -pre-clean
-    -->
-    <import file="custom_rules.xml" optional="true" />
-
-    <!-- Import the actual build file.
-
-         To customize existing targets, there are two options:
-         - Customize only one target:
-             - copy/paste the target into this file, *before* the
-               <import> task.
-             - customize it to your needs.
-         - Customize the whole content of build.xml
-             - copy/paste the content of the rules files (minus the top node)
-               into this file, replacing the <import> task.
-             - customize to your needs.
-
-         ***********************
-         ****** IMPORTANT ******
-         ***********************
-         In all cases you must update the value of version-tag below to read 'custom' instead of an integer,
-         in order to avoid having your file be overridden by tools such as "android update project"
-    -->
-    <!-- version-tag: 1 -->
-    <import file="${sdk.dir}/tools/ant/build.xml" />
-
-</project>
diff --git a/apps/HelloAndroidGL/jni/Android.mk b/apps/HelloAndroidGL/jni/Android.mk
deleted file mode 100644
index c30cec7bf54b..000000000000
--- a/apps/HelloAndroidGL/jni/Android.mk
+++ /dev/null
@@ -1,15 +0,0 @@
-LOCAL_PATH := $(call my-dir)
-
-include $(CLEAR_VARS)
-
-LOCAL_MODULE    := android_halide_gl_native
-LOCAL_ARM_MODE  := arm
-LOCAL_SRC_FILES := android_halide_gl_native.cpp
-LOCAL_LDFLAGS   := -Ljni
-LOCAL_LDLIBS    := -lm -llog -landroid -lEGL -lGLESv2 jni/halide_gl_filter.o
-LOCAL_STATIC_LIBRARIES := android_native_app_glue
-LOCAL_C_INCLUDES := $(LOCAL_PATH)/../../../include
-
-include $(BUILD_SHARED_LIBRARY)
-
-$(call import-module,android/native_app_glue)
diff --git a/apps/HelloAndroidGL/jni/Application.mk b/apps/HelloAndroidGL/jni/Application.mk
deleted file mode 100644
index 56005dabf161..000000000000
--- a/apps/HelloAndroidGL/jni/Application.mk
+++ /dev/null
@@ -1,3 +0,0 @@
-# The ARMv7 is significanly faster due to the use of the hardware FPU
-APP_ABI := armeabi-v7a
-APP_PLATFORM := android-17
diff --git a/apps/HelloAndroidGL/jni/android_halide_gl_native.cpp b/apps/HelloAndroidGL/jni/android_halide_gl_native.cpp
deleted file mode 100644
index 20e3de6bfe56..000000000000
--- a/apps/HelloAndroidGL/jni/android_halide_gl_native.cpp
+++ /dev/null
@@ -1,51 +0,0 @@
-#include <android/bitmap.h>
-#include <android/log.h>
-#include <android/native_window_jni.h>
-#include <jni.h>
-#include <stdlib.h>
-#include <string.h>
-#include <time.h>
-
-#include "HalideBuffer.h"
-#include "HalideRuntimeOpenGL.h"
-#include "halide_gl_filter.h"
-
-#define LOGD(...) __android_log_print(ANDROID_LOG_DEBUG, "halide_native", __VA_ARGS__)
-#define LOGE(...) __android_log_print(ANDROID_LOG_ERROR, "halide_native", __VA_ARGS__)
-
-void *const user_context = NULL;
-
-extern "C" JNIEXPORT void JNICALL Java_org_halide_1lang_hellohalidegl_HalideGLView_processTextureHalide(
-    JNIEnv *env, jobject obj, jint dst, jint width, jint height) {
-
-    auto dstBuf = Halide::Runtime::Buffer<uint8_t>::make_interleaved(NULL, width, height, 4);
-    // If dst == 0, let Halide render directly to the current render target.
-    if (dst == 0) {
-        int result = halide_opengl_wrap_render_target(user_context, dstBuf);
-        if (result != 0) {
-            halide_error(user_context, "halide_opengl_wrap_render_target failed");
-        }
-    } else {
-        int result = halide_opengl_wrap_texture(user_context, dstBuf, dst);
-        if (result != 0) {
-            halide_error(user_context, "halide_opengl_wrap_texture failed");
-        }
-    }
-
-    static float time = 0.0f;
-    if (int err = halide_gl_filter(time, dstBuf)) {
-        LOGD("Halide filter failed with error code %d\n", err);
-    }
-    time += 1.0f / 16.0f;
-
-    uintptr_t detached = halide_opengl_detach_texture(user_context, dstBuf);
-    if (detached != dst) {
-        halide_error(user_context, "halide_opengl_detach_texture failed");
-    }
-}
-
-extern "C" JNIEXPORT void JNICALL Java_org_halide_1lang_hellohalidegl_HalideGLView_halideContextLost(
-    JNIEnv *env, jobject obj) {
-
-    halide_opengl_context_lost(NULL);
-}
diff --git a/apps/HelloAndroidGL/jni/halide_gl_filter.cpp b/apps/HelloAndroidGL/jni/halide_gl_filter.cpp
deleted file mode 100644
index 15e949312539..000000000000
--- a/apps/HelloAndroidGL/jni/halide_gl_filter.cpp
+++ /dev/null
@@ -1,40 +0,0 @@
-#include "Halide.h"
-
-using namespace Halide;
-
-int main(int argc, char **argv) {
-    Param<float> time;
-
-    const float pi = 3.1415926536;
-
-    Var x, y, c;
-    Func result;
-
-    Expr kx, ky;
-    Expr xx, yy;
-    kx = x / 150.0f;
-    ky = y / 150.0f;
-
-    xx = kx + sin(time / 3.0f);
-    yy = ky + sin(time / 2.0f);
-
-    Expr angle;
-    angle = 2 * pi * sin(time / 20.0f);
-    kx = kx * cos(angle) - ky * sin(angle);
-    ky = kx * sin(angle) + ky * cos(angle);
-
-    Expr v = 0.0f;
-    v += sin((ky + time) / 2.0f);
-    v += sin((kx + ky + time) / 2.0f);
-    v += sin(sqrt(xx * xx + yy * yy + 1.0f) + time);
-
-    result(x, y, c) = cast<uint8_t>(selecy_by_index(c, {32, cos(pi * v), sin(pi * v)}) * 80 + (255 - 80));
-
-    result.output_buffer().set_stride(0, 4);
-    result.bound(c, 0, 4);
-    result.glsl(x, y, c);
-
-    result.compile_to_file("halide_gl_filter", {time}, "halide_gl_filter");
-
-    return 0;
-}
diff --git a/apps/HelloAndroidGL/project.properties b/apps/HelloAndroidGL/project.properties
deleted file mode 100644
index a3ee5ab64f5e..000000000000
--- a/apps/HelloAndroidGL/project.properties
+++ /dev/null
@@ -1,14 +0,0 @@
-# This file is automatically generated by Android Tools.
-# Do not modify this file -- YOUR CHANGES WILL BE ERASED!
-#
-# This file must be checked in Version Control Systems.
-#
-# To customize properties used by the Ant build system edit
-# "ant.properties", and override values to adapt the script to your
-# project structure.
-#
-# To enable ProGuard to shrink and obfuscate your code, uncomment this (available properties: sdk.dir, user.home):
-#proguard.config=${sdk.dir}/tools/proguard/proguard-android.txt:proguard-project.txt
-
-# Project target.
-target=android-17
diff --git a/apps/HelloAndroidGL/res/drawable-hdpi/ic_launcher.png b/apps/HelloAndroidGL/res/drawable-hdpi/ic_launcher.png
deleted file mode 100644
index 96a442e5b8e9..000000000000
Binary files a/apps/HelloAndroidGL/res/drawable-hdpi/ic_launcher.png and /dev/null differ
diff --git a/apps/HelloAndroidGL/res/drawable-ldpi/ic_launcher.png b/apps/HelloAndroidGL/res/drawable-ldpi/ic_launcher.png
deleted file mode 100644
index 99238729d875..000000000000
Binary files a/apps/HelloAndroidGL/res/drawable-ldpi/ic_launcher.png and /dev/null differ
diff --git a/apps/HelloAndroidGL/res/drawable-mdpi/ic_launcher.png b/apps/HelloAndroidGL/res/drawable-mdpi/ic_launcher.png
deleted file mode 100644
index 359047dfa4ed..000000000000
Binary files a/apps/HelloAndroidGL/res/drawable-mdpi/ic_launcher.png and /dev/null differ
diff --git a/apps/HelloAndroidGL/res/drawable-xhdpi/ic_launcher.png b/apps/HelloAndroidGL/res/drawable-xhdpi/ic_launcher.png
deleted file mode 100644
index 71c6d760f051..000000000000
Binary files a/apps/HelloAndroidGL/res/drawable-xhdpi/ic_launcher.png and /dev/null differ
diff --git a/apps/HelloAndroidGL/res/layout/main.xml b/apps/HelloAndroidGL/res/layout/main.xml
deleted file mode 100644
index 5a8da6d73556..000000000000
--- a/apps/HelloAndroidGL/res/layout/main.xml
+++ /dev/null
@@ -1,15 +0,0 @@
-<?xml version="1.0" encoding="utf-8"?>
-<LinearLayout xmlns:android="http://schemas.android.com/apk/res/android"
-    android:orientation="horizontal"
-    android:layout_width="fill_parent"
-    android:layout_height="fill_parent"
-    >
-
-  <FrameLayout
-      android:id="@+id/camera_preview"
-      android:layout_width="fill_parent"
-      android:layout_height="fill_parent"
-      android:layout_weight="1"
-      />
-
-</LinearLayout>
\ No newline at end of file
diff --git a/apps/HelloAndroidGL/res/values/strings.xml b/apps/HelloAndroidGL/res/values/strings.xml
deleted file mode 100644
index 2673566b97f0..000000000000
--- a/apps/HelloAndroidGL/res/values/strings.xml
+++ /dev/null
@@ -1,4 +0,0 @@
-<?xml version="1.0" encoding="utf-8"?>
-<resources>
-    <string name="app_name">Halide GL Demo</string>
-</resources>
diff --git a/apps/HelloAndroidGL/src/org/halide_lang/hellohalidegl/HelloHalideGL.java b/apps/HelloAndroidGL/src/org/halide_lang/hellohalidegl/HelloHalideGL.java
deleted file mode 100644
index 78843d3d498f..000000000000
--- a/apps/HelloAndroidGL/src/org/halide_lang/hellohalidegl/HelloHalideGL.java
+++ /dev/null
@@ -1,208 +0,0 @@
-package org.halide_lang.hellohalidegl;
-
-import android.app.Activity;
-import android.content.Context;
-import android.os.Bundle;
-import android.hardware.Camera;
-import android.util.Log;
-import android.widget.FrameLayout;
-import android.view.SurfaceView;
-import android.view.Surface;
-import android.graphics.Bitmap;
-import android.graphics.Canvas;
-import android.opengl.GLSurfaceView;
-import javax.microedition.khronos.egl.EGLConfig;
-import javax.microedition.khronos.opengles.GL10;
-import java.nio.ByteBuffer;
-import java.nio.FloatBuffer;
-import java.nio.ByteOrder;
-
-class HalideGLView extends GLSurfaceView {
-    static {
-        System.loadLibrary("android_halide_gl_native");
-    }
-    private static native void processTextureHalide(int dst, int width, int height);
-    private static native void halideContextLost();
-
-    private static final android.opengl.GLES20 gl = new android.opengl.GLES20();
-
-    // If set to true, let Halide render directly to the framebuffer.
-    // Otherwise, Halide renders to a texture which we then blit to the
-    // screen.
-    private boolean halideDirectRender = true;
-
-    HalideGLView(Context context) {
-        super(context);
-        setEGLContextClientVersion(2);
-        setPreserveEGLContextOnPause(true);
-        setDebugFlags(DEBUG_CHECK_GL_ERROR);
-        setRenderer(new MyRenderer());
-    }
-
-    class MyRenderer implements GLSurfaceView.Renderer {
-        private int output;
-        private int surfaceWidth, surfaceHeight;
-        private int program;
-
-        private FloatBuffer quad_vertices;
-
-        final String vs_source =
-            "attribute vec2 position;\n" +
-            "varying vec2 texpos;\n" +
-            "void main(void) {\n" +
-            "  gl_Position = vec4(position, 0.0, 1.0);\n" +
-            "  texpos = position * 0.5 + 0.5;\n" +
-            "}\n";
-        final String fs_source =
-            "uniform sampler2D tex;\n" +
-            "varying highp vec2 texpos;\n" +
-            "void main(void) {\n" +
-            "  gl_FragColor = texture2D(tex, texpos.xy);\n" +
-            "}\n";
-
-        public MyRenderer() {
-            final float[] vertices = new float[] {
-                -1.0f, -1.0f,
-                1.0f, -1.0f,
-                -1.0f, 1.0f,
-                1.0f, 1.0f,
-            };
-            quad_vertices =
-                ByteBuffer.allocateDirect(4 * vertices.length)
-                .order(ByteOrder.nativeOrder())
-                .asFloatBuffer();
-            quad_vertices.put(vertices);
-        }
-
-        /** Compile a single vertex or fragment shader. */
-        private int compileShader(int type, String source) {
-            int shader = gl.glCreateShader(type);
-            gl.glShaderSource(shader, source);
-            gl.glCompileShader(shader);
-            int[] status = new int[1];
-            gl.glGetShaderiv(shader, gl.GL_COMPILE_STATUS, status, 0);
-            if (status[0] == 0) {
-                String log = gl.glGetShaderInfoLog(shader);
-                Log.e(HelloHalideGL.TAG, log);
-                throw new RuntimeException("Compiling shader failed");
-            }
-            return shader;
-        }
-
-        /** Compile and link simple vertex and fragment shader for rendering
-         * 2D graphics. */
-        private void prepareShaders() {
-            int vertex_shader = compileShader(gl.GL_VERTEX_SHADER,
-                                              vs_source);
-            int fragment_shader = compileShader(gl.GL_FRAGMENT_SHADER,
-                                                fs_source);
-
-            program = gl.glCreateProgram();
-            if (program == 0) {
-                throw new RuntimeException("Invalid GLSL program");
-            }
-            gl.glAttachShader(program, vertex_shader);
-            gl.glAttachShader(program, fragment_shader);
-            gl.glBindAttribLocation(program, 0, "position");
-            gl.glLinkProgram(program);
-
-            int[] status = new int[1];
-            gl.glGetProgramiv(program, gl.GL_LINK_STATUS, status, 0);
-            if (status[0] == 0) {
-                String log = gl.glGetProgramInfoLog(program);
-                Log.e(HelloHalideGL.TAG, log);
-                throw new RuntimeException("Linking GLSL program failed");
-            }
-        }
-
-        private int createTexture(int w, int h) {
-            int[] id = new int[1];
-            gl.glGenTextures(1, id, 0);
-            gl.glBindTexture(gl.GL_TEXTURE_2D, id[0]);
-            gl.glTexParameteri(gl.GL_TEXTURE_2D, gl.GL_TEXTURE_MIN_FILTER, gl.GL_NEAREST);
-            gl.glTexParameteri(gl.GL_TEXTURE_2D, gl.GL_TEXTURE_MAG_FILTER, gl.GL_NEAREST);
-
-            ByteBuffer buf = ByteBuffer.allocate(w * h * 4);
-            gl.glTexImage2D(gl.GL_TEXTURE_2D, 0, gl.GL_RGBA, w, h, 0,
-                            gl.GL_RGBA, gl.GL_UNSIGNED_BYTE, buf);
-            return id[0];
-        }
-
-        @Override
-        public void onSurfaceCreated(GL10 unused, EGLConfig config) {
-            Log.d("Hello", "onSurfaceCreated");
-            prepareShaders();
-        }
-
-        @Override
-        public void onSurfaceChanged(GL10 unused, int w, int h) {
-            halideContextLost();
-            int[] textures = { output };
-            gl.glDeleteTextures(1, textures, 0);
-            output = createTexture(w, h);
-            surfaceWidth = w;
-            surfaceHeight = h;
-        }
-
-        @Override
-        public void onDrawFrame(GL10 unused) {
-            Log.d("Hello", "onDrawFrame");
-
-            if (halideDirectRender) {
-                // Call Halide filter; 0 as the texture ID in this case
-                // indicates render to framebuffer.
-                processTextureHalide(0, surfaceWidth, surfaceHeight);
-            } else {
-                // Call Halide filter
-                processTextureHalide(output, surfaceWidth, surfaceHeight);
-
-                // Draw result to screen
-                gl.glViewport(0, 0, surfaceWidth, surfaceHeight);
-
-                gl.glUseProgram(program);
-
-                int positionLoc = gl.glGetAttribLocation(program, "position");
-                quad_vertices.position(0);
-                gl.glVertexAttribPointer(positionLoc, 2, gl.GL_FLOAT, false, 0, quad_vertices);
-                gl.glEnableVertexAttribArray(positionLoc);
-
-                int texLoc = gl.glGetUniformLocation(program, "tex");
-                gl.glUniform1i(texLoc, 0);
-                gl.glActiveTexture(gl.GL_TEXTURE0);
-                gl.glBindTexture(gl.GL_TEXTURE_2D, output);
-
-                gl.glDrawArrays(gl.GL_TRIANGLE_STRIP, 0, 4);
-
-                gl.glDisableVertexAttribArray(positionLoc);
-                gl.glBindTexture(gl.GL_TEXTURE_2D, 0);
-                gl.glUseProgram(0);
-                gl.glDisableVertexAttribArray(0);
-            }
-        }
-    }
-}
-
-public class HelloHalideGL extends Activity {
-    static final String TAG = "HelloHalideGL";
-
-    private GLSurfaceView view;
-
-    @Override
-    public void onCreate(Bundle b) {
-        super.onCreate(b);
-        view = new HalideGLView(this);
-        setContentView(view);
-    }
-
-    @Override
-    public void onResume() {
-        super.onResume();
-        view.onResume();
-    }
-
-    @Override
-    public void onPause() {
-        super.onPause();
-        view.onPause();
-    }
-}
diff --git a/apps/bgu/Makefile b/apps/bgu/Makefile
index a05070a3039c..ab8f69baaae5 100644
--- a/apps/bgu/Makefile
+++ b/apps/bgu/Makefile
@@ -24,7 +24,7 @@ $(BIN)/%/runtime.a: $(GENERATOR_BIN)/bgu.generator
 
 $(BIN)/%/filter: filter.cpp $(BIN)/%/bgu.a $(BIN)/%/bgu_auto_schedule.a $(BIN)/%/runtime.a
 	@mkdir -p $(@D)
-	$(CXX) $(CXXFLAGS) -I$(BIN)/$* -Wall -O3 $^ -o $@ $(LDFLAGS) $(IMAGE_IO_FLAGS) $(CUDA_LDFLAGS) $(OPENCL_LDFLAGS) $(OPENGL_LDFLAGS)
+	$(CXX) $(CXXFLAGS) -I$(BIN)/$* -Wall -O3 $^ -o $@ $(LDFLAGS) $(IMAGE_IO_FLAGS) $(CUDA_LDFLAGS) $(OPENCL_LDFLAGS)
 
 $(BIN)/%/out.png: $(BIN)/%/filter
 	$< ../images/rgb.png $(BIN)/$*/out.png
diff --git a/apps/glsl/CMakeLists.txt b/apps/glsl/CMakeLists.txt
deleted file mode 100644
index e9a8a5f13765..000000000000
--- a/apps/glsl/CMakeLists.txt
+++ /dev/null
@@ -1,41 +0,0 @@
-if (WIN32)
-    # Halide OpenGL is broken on Windows.
-    return()
-endif ()
-
-cmake_minimum_required(VERSION 3.16)
-project(glsl)
-
-enable_testing()
-
-# Set up language settings
-set(CMAKE_CXX_STANDARD 11)
-set(CMAKE_CXX_STANDARD_REQUIRED YES)
-set(CMAKE_CXX_EXTENSIONS NO)
-
-# Find Halide
-find_package(Halide REQUIRED)
-
-find_package(OpenGL REQUIRED)
-set(opengl_features opengl)
-if (TARGET OpenGL::OpenGL AND TARGET OpenGL::EGL)
-    # EGL requires GLVND (which is found iff ::OpenGL is present)
-    list(APPEND opengl_features egl)
-endif ()
-
-# Generators
-add_executable(glsl_blur.generator halide_blur_glsl_generator.cpp)
-target_link_libraries(glsl_blur.generator PRIVATE Halide::Generator)
-
-add_executable(ycc.generator halide_ycc_glsl_generator.cpp)
-target_link_libraries(ycc.generator PRIVATE Halide::Generator)
-
-# Libraries
-add_halide_library(halide_blur_glsl FROM glsl_blur.generator FEATURES ${opengl_features} debug)
-add_halide_library(halide_ycc_glsl FROM ycc.generator FEATURES ${opengl_features} debug)
-
-# Final executable
-add_executable(opengl_test opengl_test.cpp)
-target_link_libraries(opengl_test PRIVATE halide_blur_glsl halide_ycc_glsl)
-
-add_test(NAME opengl_test COMMAND opengl_test)
diff --git a/apps/glsl/Makefile b/apps/glsl/Makefile
deleted file mode 100644
index dc12d94ae504..000000000000
--- a/apps/glsl/Makefile
+++ /dev/null
@@ -1,36 +0,0 @@
-include ../support/Makefile.inc
-
-# Note: using the -g flag in conjunction with the -debug Feature on OSX may
-# produce "failed to insert symbol" warnings at link time; this is annoying but harmless.
-CXXFLAGS += -g -O0
-
-all: $(BIN)/$(HL_TARGET)/opengl_test
-
-$(GENERATOR_BIN)/halide_blur_glsl.generator: halide_blur_glsl_generator.cpp $(GENERATOR_DEPS)
-	@mkdir -p $(@D)
-	$(CXX) $(CXXFLAGS) $(filter %.cpp,$^) -o $@ $(LIBHALIDE_LDFLAGS)
-
-$(BIN)/%/halide_blur_glsl.a: $(GENERATOR_BIN)/halide_blur_glsl.generator
-	@mkdir -p $(@D)
-	$^ -g halide_blur_glsl -e $(GENERATOR_OUTPUTS) -o $(@D) target=$*-opengl-debug
-
-$(GENERATOR_BIN)/halide_ycc_glsl.generator: halide_ycc_glsl_generator.cpp $(GENERATOR_DEPS)
-	@mkdir -p $(@D)
-	$(CXX) $(CXXFLAGS) $(filter %.cpp,$^) -o $@ $(LIBHALIDE_LDFLAGS)
-
-$(BIN)/%/halide_ycc_glsl.a: $(GENERATOR_BIN)/halide_ycc_glsl.generator
-	@mkdir -p $(@D)
-	$^ -g halide_ycc_glsl -e $(GENERATOR_OUTPUTS) -o $(@D) target=$*-opengl-debug
-
-$(BIN)/%/opengl_test: opengl_test.cpp $(BIN)/%/halide_blur_glsl.a $(BIN)/%/halide_ycc_glsl.a
-	@mkdir -p $(@D)
-	$(CXX) $(CXXFLAGS) -I$(BIN)/$* $^ -o $@ $(LDFLAGS) -L$(TOP)/bin $(PLATFORM_OPENGL_LDFLAGS)
-
-run: $(BIN)/$(HL_TARGET)/opengl_test
-	LD_LIBRARY_PATH=../../bin $<
-
-test: run
-
-.PHONY: clean
-clean:
-	rm -rf $(BIN)
diff --git a/apps/glsl/halide_blur_glsl_generator.cpp b/apps/glsl/halide_blur_glsl_generator.cpp
deleted file mode 100644
index 1d9a2eae47dc..000000000000
--- a/apps/glsl/halide_blur_glsl_generator.cpp
+++ /dev/null
@@ -1,33 +0,0 @@
-#include "Halide.h"
-
-namespace {
-
-class HalideBlurGLSL : public Halide::Generator<HalideBlurGLSL> {
-public:
-    Input<Buffer<uint8_t>> input8{"input8", 3};
-    Output<Buffer<uint8_t>> blur_filter{"blur_filter", 3};
-    void generate() {
-        assert(get_target().has_feature(Target::OpenGL));
-
-        Func blur_x("blur_x"), blur_y("blur_y");
-        Var x("x"), y("y"), c("c");
-
-        // The algorithm
-        Func input;
-        input(x, y, c) = cast<float>(input8(clamp(x, input8.dim(0).min(), input8.dim(0).max()),
-                                            clamp(y, input8.dim(1).min(), input8.dim(1).max()), c)) /
-                         255.f;
-        blur_x(x, y, c) = (input(x, y, c) + input(x + 1, y, c) + input(x + 2, y, c)) / 3;
-        blur_y(x, y, c) = (blur_x(x, y, c) + blur_x(x, y + 1, c) + blur_x(x, y + 2, c)) / 3;
-        blur_filter(x, y, c) = cast<uint8_t>(blur_y(x, y, c) * 255.f);
-
-        // Schedule for GLSL
-        input8.dim(2).set_bounds(0, 3);
-        blur_filter.bound(c, 0, 3);
-        blur_filter.glsl(x, y, c);
-    }
-};
-
-}  // namespace
-
-HALIDE_REGISTER_GENERATOR(HalideBlurGLSL, halide_blur_glsl)
diff --git a/apps/glsl/halide_ycc_glsl_generator.cpp b/apps/glsl/halide_ycc_glsl_generator.cpp
deleted file mode 100644
index 47c05e8f4f3a..000000000000
--- a/apps/glsl/halide_ycc_glsl_generator.cpp
+++ /dev/null
@@ -1,39 +0,0 @@
-#include "Halide.h"
-
-namespace {
-
-class RgbToYcc : public Halide::Generator<RgbToYcc> {
-public:
-    Input<Buffer<uint8_t>> input8{"input8", 3};
-    Output<Buffer<uint8_t>> out{"out", 3};
-    void generate() {
-        assert(get_target().has_feature(Target::OpenGL));
-        Var x("x"), y("y"), c("c");
-
-        // The algorithm
-        Func input("input");
-        input(x, y, c) = cast<float>(input8(x, y, c)) / 255.0f;
-
-        Func Y("Y"), Cb("Cb"), Cr("Cr");
-        Y(x, y) = 16.f / 255.f + (0.257f * input(x, y, 0) +
-                                  0.504f * input(x, y, 1) +
-                                  0.098f * input(x, y, 2));
-        Cb(x, y) = 128.f / 255.f + (0.439f * input(x, y, 0) +
-                                    -0.368f * input(x, y, 1) +
-                                    -0.071f * input(x, y, 2));
-        Cr(x, y) = 128.f / 255.f + (-0.148f * input(x, y, 0) +
-                                    -0.291f * input(x, y, 1) +
-                                    0.439f * input(x, y, 2));
-        out(x, y, c) = cast<uint8_t>(
-            mux(c, {Y(x, y), Cb(x, y), Cr(x, y), 0.0f}) * 255.f);
-
-        // Schedule for GLSL
-        input8.dim(2).set_bounds(0, 3);
-        out.bound(c, 0, 3);
-        out.glsl(x, y, c);
-    }
-};
-
-}  // namespace
-
-HALIDE_REGISTER_GENERATOR(RgbToYcc, halide_ycc_glsl)
diff --git a/apps/glsl/opengl_test.cpp b/apps/glsl/opengl_test.cpp
deleted file mode 100644
index 161805887daa..000000000000
--- a/apps/glsl/opengl_test.cpp
+++ /dev/null
@@ -1,58 +0,0 @@
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include "HalideBuffer.h"
-#include "HalideRuntime.h"
-#include "HalideRuntimeOpenGL.h"
-
-using Halide::Runtime::Buffer;
-
-#include "halide_blur_glsl.h"
-#include "halide_ycc_glsl.h"
-
-void test_blur() {
-    const int W = 12, H = 32, C = 3;
-    Buffer<uint8_t> input(W, H, C);
-    Buffer<uint8_t> output(W, H, C);
-
-    fprintf(stderr, "test_blur\n");
-    halide_blur_glsl(input, output);
-    fprintf(stderr, "test_blur complete\n");
-}
-
-void test_ycc() {
-    const int W = 12, H = 32, C = 3;
-    Buffer<uint8_t> input(W, H, C);
-    Buffer<uint8_t> output(W, H, C);
-
-    fprintf(stderr, "test_ycc\n");
-    halide_ycc_glsl(input, output);
-    fprintf(stderr, "Ycc complete\n");
-}
-
-void test_device_sync() {
-    const int W = 12, H = 32, C = 3;
-    Buffer<uint8_t> temp(W, H, C);
-
-    temp.set_host_dirty();
-    int result = temp.copy_to_device(halide_opengl_device_interface());
-    if (result != 0) {
-        fprintf(stderr, "halide_device_malloc failed with return %d.\n", result);
-        abort();
-    } else {
-        result = temp.device_sync();
-        if (result != 0) {
-            fprintf(stderr, "halide_device_sync failed with return %d.\n", result);
-            abort();
-        } else {
-            fprintf(stderr, "Test device sync complete.\n");
-        }
-    }
-}
-
-int main(int argc, char *argv[]) {
-    test_blur();
-    test_ycc();
-    test_device_sync();
-}
diff --git a/apps/harris/Makefile b/apps/harris/Makefile
index 3fef49815a1c..713c11d0c2c7 100644
--- a/apps/harris/Makefile
+++ b/apps/harris/Makefile
@@ -22,7 +22,7 @@ $(BIN)/%/runtime.a: $(GENERATOR_BIN)/harris.generator
 
 $(BIN)/%/filter: filter.cpp $(BIN)/%/harris.a $(BIN)/%/harris_auto_schedule.a $(BIN)/%/runtime.a
 	@mkdir -p $(@D)
-	$(CXX) $(CXXFLAGS) -I$(BIN)/$* -Wall -O3 $^ -o $@ $(LDFLAGS) $(IMAGE_IO_FLAGS) $(CUDA_LDFLAGS) $(OPENCL_LDFLAGS) $(OPENGL_LDFLAGS)
+	$(CXX) $(CXXFLAGS) -I$(BIN)/$* -Wall -O3 $^ -o $@ $(LDFLAGS) $(IMAGE_IO_FLAGS) $(CUDA_LDFLAGS) $(OPENCL_LDFLAGS)
 
 $(BIN)/%/out.png: $(BIN)/%/filter
 	$< ../images/rgba.png $(BIN)/$*/out.png
diff --git a/apps/hist/Makefile b/apps/hist/Makefile
index 8ab3e5785407..5f4faa1b835a 100644
--- a/apps/hist/Makefile
+++ b/apps/hist/Makefile
@@ -24,7 +24,7 @@ $(BIN)/%/runtime.a: $(GENERATOR_BIN)/hist.generator
 
 $(BIN)/%/filter: filter.cpp $(BIN)/%/hist.a $(BIN)/%/hist_auto_schedule.a $(BIN)/%/runtime.a
 	@mkdir -p $(@D)
-	$(CXX) $(CXXFLAGS) -I$(BIN)/$* -Wall -O3 $^ -o $@ $(LDFLAGS) $(IMAGE_IO_FLAGS) $(CUDA_LDFLAGS) $(OPENCL_LDFLAGS) $(OPENGL_LDFLAGS)
+	$(CXX) $(CXXFLAGS) -I$(BIN)/$* -Wall -O3 $^ -o $@ $(LDFLAGS) $(IMAGE_IO_FLAGS) $(CUDA_LDFLAGS) $(OPENCL_LDFLAGS)
 
 $(BIN)/%/out.png: $(BIN)/%/filter
 	$< ../images/rgba.png $(BIN)/$*/out.png
diff --git a/apps/iir_blur/Makefile b/apps/iir_blur/Makefile
index d195ffa7caf5..8c9983c8fa14 100644
--- a/apps/iir_blur/Makefile
+++ b/apps/iir_blur/Makefile
@@ -22,7 +22,7 @@ $(BIN)/%/runtime.a: $(GENERATOR_BIN)/iir_blur.generator
 
 $(BIN)/%/filter: filter.cpp $(BIN)/%/iir_blur.a $(BIN)/%/iir_blur_auto_schedule.a $(BIN)/%/runtime.a
 	@mkdir -p $(@D)
-	$(CXX) $(CXXFLAGS) -I$(BIN)/$* -Wall -O3 $^ -o $@ $(LDFLAGS) $(IMAGE_IO_FLAGS) $(CUDA_LDFLAGS) $(OPENCL_LDFLAGS) $(OPENGL_LDFLAGS)
+	$(CXX) $(CXXFLAGS) -I$(BIN)/$* -Wall -O3 $^ -o $@ $(LDFLAGS) $(IMAGE_IO_FLAGS) $(CUDA_LDFLAGS) $(OPENCL_LDFLAGS)
 
 $(BIN)/%/out.png: $(BIN)/%/filter
 	$< ../images/rgba.png $(BIN)/$*/out.png
diff --git a/apps/interpolate/Makefile b/apps/interpolate/Makefile
index fc5d8d7609f1..9d0d5c41f434 100644
--- a/apps/interpolate/Makefile
+++ b/apps/interpolate/Makefile
@@ -24,7 +24,7 @@ $(BIN)/%/runtime.a: $(GENERATOR_BIN)/interpolate.generator
 
 $(BIN)/%/filter: filter.cpp $(BIN)/%/interpolate.a $(BIN)/%/interpolate_auto_schedule.a $(BIN)/%/runtime.a
 	@mkdir -p $(@D)
-	$(CXX) $(CXXFLAGS) -I$(BIN)/$* -Wall -O3 $^ -o $@ $(LDFLAGS) $(IMAGE_IO_FLAGS) $(CUDA_LDFLAGS) $(OPENCL_LDFLAGS) $(OPENGL_LDFLAGS)
+	$(CXX) $(CXXFLAGS) -I$(BIN)/$* -Wall -O3 $^ -o $@ $(LDFLAGS) $(IMAGE_IO_FLAGS) $(CUDA_LDFLAGS) $(OPENCL_LDFLAGS)
 
 $(BIN)/%/out.png: $(BIN)/%/filter
 	$< ../images/rgba.png $(BIN)/$*/out.png
diff --git a/apps/lens_blur/Makefile b/apps/lens_blur/Makefile
index ea403c1e5bcf..8ede6b797ffe 100644
--- a/apps/lens_blur/Makefile
+++ b/apps/lens_blur/Makefile
@@ -19,7 +19,7 @@ $(BIN)/%/lens_blur_auto_schedule.a: $(GENERATOR_BIN)/lens_blur.generator
 
 $(BIN)/%/process: process.cpp $(BIN)/%/lens_blur.a $(BIN)/%/lens_blur_auto_schedule.a
 	@mkdir -p $(@D)
-	$(CXX) $(CXXFLAGS) -I$(BIN)/$* -Wall $^ -o $@ $(LDFLAGS) $(IMAGE_IO_FLAGS) $(CUDA_LDFLAGS) $(OPENCL_LDFLAGS) $(OPENGL_LDFLAGS)
+	$(CXX) $(CXXFLAGS) -I$(BIN)/$* -Wall $^ -o $@ $(LDFLAGS) $(IMAGE_IO_FLAGS) $(CUDA_LDFLAGS) $(OPENCL_LDFLAGS)
 
 $(BIN)/%/out.png: $(BIN)/%/process
 	@mkdir -p $(@D)
diff --git a/apps/local_laplacian/Makefile b/apps/local_laplacian/Makefile
index e9a9b69fe53f..21fa7bf74f6b 100644
--- a/apps/local_laplacian/Makefile
+++ b/apps/local_laplacian/Makefile
@@ -18,7 +18,7 @@ $(BIN)/%/local_laplacian_auto_schedule.a: $(GENERATOR_BIN)/local_laplacian.gener
 
 $(BIN)/%/process: process.cpp $(BIN)/%/local_laplacian.a $(BIN)/%/local_laplacian_auto_schedule.a
 	@mkdir -p $(@D)
-	$(CXX) $(CXXFLAGS) -I$(BIN)/$* -Wall $^ -o $@ $(LDFLAGS) $(IMAGE_IO_FLAGS) $(CUDA_LDFLAGS) $(OPENCL_LDFLAGS) $(OPENGL_LDFLAGS)
+	$(CXX) $(CXXFLAGS) -I$(BIN)/$* -Wall $^ -o $@ $(LDFLAGS) $(IMAGE_IO_FLAGS) $(CUDA_LDFLAGS) $(OPENCL_LDFLAGS)
 
 $(BIN)/%/out.png: $(BIN)/%/process
 	@mkdir -p $(@D)
@@ -30,7 +30,7 @@ $(BIN)/%/out.tiff: $(BIN)/%/process
 
 $(BIN)/%/process_viz: process.cpp $(BIN)/%-trace_all/local_laplacian.a
 	@mkdir -p $(@D)
-	$(CXX) $(CXXFLAGS) -DNO_AUTO_SCHEDULE -I$(BIN)/$*-trace_all -Wall $^ -o $@ $(LDFLAGS) $(IMAGE_IO_FLAGS) $(CUDA_LDFLAGS) $(OPENCL_LDFLAGS) $(OPENGL_LDFLAGS)
+	$(CXX) $(CXXFLAGS) -DNO_AUTO_SCHEDULE -I$(BIN)/$*-trace_all -Wall $^ -o $@ $(LDFLAGS) $(IMAGE_IO_FLAGS) $(CUDA_LDFLAGS) $(OPENCL_LDFLAGS)
 
 ../../bin/HalideTraceViz: ../../util/HalideTraceViz.cpp
 	$(MAKE) -C ../../ bin/HalideTraceViz
diff --git a/apps/max_filter/Makefile b/apps/max_filter/Makefile
index 6fcd9a59748c..bd755774b2f5 100644
--- a/apps/max_filter/Makefile
+++ b/apps/max_filter/Makefile
@@ -24,7 +24,7 @@ $(BIN)/%/runtime.a: $(GENERATOR_BIN)/max_filter.generator
 
 $(BIN)/%/filter: filter.cpp $(BIN)/%/max_filter.a $(BIN)/%/max_filter_auto_schedule.a $(BIN)/%/runtime.a
 	@mkdir -p $(@D)
-	$(CXX) $(CXXFLAGS) -I$(BIN)/$* -Wall -O3 $^ -o $@ $(LDFLAGS) $(IMAGE_IO_FLAGS) $(CUDA_LDFLAGS) $(OPENCL_LDFLAGS) $(OPENGL_LDFLAGS)
+	$(CXX) $(CXXFLAGS) -I$(BIN)/$* -Wall -O3 $^ -o $@ $(LDFLAGS) $(IMAGE_IO_FLAGS) $(CUDA_LDFLAGS) $(OPENCL_LDFLAGS)
 
 $(BIN)/%/out.png: $(BIN)/%/filter
 	$< ../images/rgba.png $(BIN)/$*/out.png
diff --git a/apps/nl_means/Makefile b/apps/nl_means/Makefile
index 13c9290cd3a2..2c7fecdccc47 100644
--- a/apps/nl_means/Makefile
+++ b/apps/nl_means/Makefile
@@ -18,7 +18,7 @@ $(BIN)/%/nl_means_auto_schedule.a: $(GENERATOR_BIN)/nl_means.generator
 
 $(BIN)/%/process: process.cpp $(BIN)/%/nl_means.a $(BIN)/%/nl_means_auto_schedule.a
 	@mkdir -p $(@D)
-	$(CXX) $(CXXFLAGS) -I$(BIN)/$* -Wall $^ -o $@ $(LDFLAGS) $(IMAGE_IO_FLAGS) $(CUDA_LDFLAGS) $(OPENCL_LDFLAGS) $(OPENGL_LDFLAGS)
+	$(CXX) $(CXXFLAGS) -I$(BIN)/$* -Wall $^ -o $@ $(LDFLAGS) $(IMAGE_IO_FLAGS) $(CUDA_LDFLAGS) $(OPENCL_LDFLAGS)
 
 $(BIN)/%/out.png: $(BIN)/%/process
 	@mkdir -p $(@D)
diff --git a/apps/opengl_demo/Makefile b/apps/opengl_demo/Makefile
deleted file mode 100644
index 5d6bc2bb23e7..000000000000
--- a/apps/opengl_demo/Makefile
+++ /dev/null
@@ -1,102 +0,0 @@
-#
-# This could be more DRY using some Makefile magic, but for the example
-# app will try to maximize clarity by making most rules explicit
-#
-
-# Where to find Halide.
-#
-# If you are building this demo using Halide installed systemwide (e.g. on
-# OS X installed via homebrew), you can set:
-#
-#  HALIDE_TOOLS_DIR = /usr/local/share/halide/tools
-#  HALIDE_LIB_PATH =
-#  HALIDE_INC_PATH =
-#
-# These settings are for building within the Halide source tree:
-HALIDE_TOOLS_DIR = ../../tools
-HALIDE_LIB_PATH  = -L ../../bin
-HALIDE_INC_PATH  = -I ../../include
-HL_TARGET ?= host
-
-# Platform-specific settings.
-#
-UNAME = $(shell uname)
-
-ifeq ($(UNAME),Darwin)
-
-  # These are for OS X:
-  DTX_FONT       = /Library/Fonts/Arial.ttf
-  OPENGL_LIBS    = -lglfw -framework OpenGL -framework GLUT
-  GENERATOR_LIBS = -lHalide -lz -lcurses
-
-else
-
-  # These are for Ubuntu Linux
-  DTX_FONT       = /usr/share/fonts/truetype/dejavu/DejaVuSans.ttf
-  OPENGL_LIBS    = `pkg-config glfw3 --libs` -lGL -lglut -lX11 -lpthread -ldl -lXxf86vm -lXinerama -lXcursor -lXrandr
-  GENERATOR_LIBS = -lHalide -lz -lcurses -Wl,--rpath=$(HALIDE_LIB_PATH)
-
-endif
-
-#
-# General build settings.  Should be good cross-platform.
-#
-MAIN_LIBS      = -lpng -ldrawtext $(OPENGL_LIBS)
-GENERATOR_LIBS = -lHalide -lz -lcurses
-CXXFLAGS       = -std=c++11 -g -DDTX_FONT=\"$(DTX_FONT)\" $(HALIDE_INC_PATH)
-
-# Output directory.
-BIN ?= bin
-
-.PHONY: run clean
-
-default:	run
-
-run:	$(BIN)/opengl_demo
-	$(BIN)/opengl_demo image.png
-
-clean:
-	rm -rf $(BIN)
-
-$(BIN)/opengl_demo: \
-    $(BIN)/main.o \
-    $(BIN)/layout.o \
-    $(BIN)/timer.o \
-    $(BIN)/glfw_helpers.o \
-    $(BIN)/opengl_helpers.o \
-    $(BIN)/png_helpers.o \
-    $(BIN)/sample_filter_cpu.o \
-    $(BIN)/sample_filter_opengl.o
-	$(CXX) $(CXXFLAGS) -o $@ $^ $(MAIN_LIBS)
-
-#
-# Explicitly list the dependency on the generated filter header files,
-# to ensure that they are created first.
-#
-$(BIN)/main.o: \
-    $(BIN)/sample_filter_cpu.h \
-    $(BIN)/sample_filter_opengl.h
-
-#
-# Rules to AOT-compile the halide filter for both CPU and OpenGL; the
-# compiled filters depend on $(BIN)/sample_filter.generator, which in turn
-# depends on the halide filter source in sample_filter.cpp
-#
-$(BIN)/sample_filter_cpu.o $(BIN)/sample_filter_cpu.h: $(BIN)/sample_filter.generator
-	LD_LIBRARY_PATH=../../bin $(BIN)/sample_filter.generator -g sample_filter -e object,c_header,stmt -o $(BIN) -f sample_filter_cpu target=$(HL_TARGET)
-
-$(BIN)/sample_filter_opengl.o $(BIN)/sample_filter_opengl.h: $(BIN)/sample_filter.generator
-	LD_LIBRARY_PATH=../../bin $(BIN)/sample_filter.generator -g sample_filter -e object,c_header,stmt -o $(BIN) -f sample_filter_opengl target=host-opengl-debug
-
-$(BIN)/sample_filter.generator: sample_filter_generator.cpp
-	@mkdir -p $(@D)
-	$(CXX) $(CXXFLAGS) -o $@ $^ $(HALIDE_TOOLS_DIR)/GenGen.cpp $(HALIDE_LIB_PATH) $(GENERATOR_LIBS) $(HALIDE_SYSTEM_LIBS)
-
-#
-# Build in subdir using auto-dependency mechanism
-#
-$(BIN)/%.o: %.cpp
-	@mkdir -p $(@D)
-	$(CXX) -c $(CXXFLAGS) -I$(BIN) -MMD -MF $(patsubst %.o,%.d,$@) -o $@ $<
-
--include $(wildcard $(BIN)/*.d)
diff --git a/apps/opengl_demo/README.md b/apps/opengl_demo/README.md
deleted file mode 100644
index d2e7e8c91b88..000000000000
--- a/apps/opengl_demo/README.md
+++ /dev/null
@@ -1,104 +0,0 @@
-# Halide OpenGL Demo
-
-This demo contains an OpenGL desktop app that displays an input image side by
-side with the result of running a sample halide filter in three different ways:
-
-1. On the CPU, not using OpenGL.
-
-2. In OpenGL, with Halide transfering the input data from the host and
-   transferring the result data back to the host.
-
-3. In OpenGL, with Halide accepting input data that's in an OpenGL texture, and
-   leaving the result in an OpenGL texture.
-
-The display reports the timing for each. You should expect to see that #3 is
-fastest as it runs entirely on the GPU, while #2 is slowest because of the data
-transfer times.
-
-In this example we use AOT compilation twice: Once with `target=host` to produce
-the filter that runs on the CPU; and once with `target=host-opengl` to produce
-the filter that runs in OpenGL (which we call twice).
-
-The sample filter inverts the RGB channels of the input image.
-
-_This demo is known to work on OS X 10.11 and Ubuntu Linux 14.04 & 16.04.
-Windows has not yet been tested._
-
-### Instructions:
-
-Build and run the app by simply running `make`. It should open a window showing
-the input and the three (identical) filtering results. You can close the window
-and exit by pressing ESCAPE.
-
-The `Makefile` has variables to specify where to find Halide, how to link
-OpenGL, and so forth. You may need to tweak them for your platform.
-
-See the Makefile for details on how the filter gets AOT-compiled for CPU and
-OpenGL. Note that the `Makefile` actually specifies `target=host-opengl-debug`
-when AOT-compiling the opengl filter; that enables tracing of Halide's
-management of its OpenGL pipeline.
-
-#### Dependencies:
-
-This app depends on:
-
-- [GLFW 3](http://www.glfw.org)
-- [libpng](http://www.libpng.org)
-- [libdrawtext](http://nuclear.mutantstargoat.com/sw/libdrawtext/)
-
-On OS X, all three can be installed using [homebrew](http://brew.sh)
-
-```sh
-brew install glfw
-brew install libpng
-brew install libdrawtext
-```
-
-Halide itself can be installed on OS X via
-
-```sh
-brew tap halide/halide
-brew install halide
-```
-
-On Ubuntu Linux, everything but libdrawtext can be installed via system
-packages:
-
-```sh
-sudo apt-get install libglfw3-dev libx11-dev freeglut3-dev libfreetype6-dev libgl-dev libpng-dev
-```
-
-For libdrawtext, try this:
-
-```
-git clone https://github.com/jtsiomb/libdrawtext.git
-cd libdrawtext
-./configure
-make
-sudo make install
-```
-
-### Files:
-
-- `sample_filter.cpp`
-
-  The Halide filter generator source.
-
-- `main.cpp`
-
-  Contains all the Halide client code.
-
-  Note that it `#include`s the generated files `build/sample_filter_cpu.h` and
-  `build/sample_filter_opengl.h`.
-
-- `layout.{h,cpp}`
-
-  A minimal rendering framework for this example app.
-
-- `timer.{h,cpp}`
-
-  A minimal timing & reporting library.
-
-- `{glfw,opengl,png}_helpers.{cpp,h}`
-
-  Conveniences that hide the dirty details of the low-level packages.
diff --git a/apps/opengl_demo/glfw_helpers.cpp b/apps/opengl_demo/glfw_helpers.cpp
deleted file mode 100644
index 07752597e42d..000000000000
--- a/apps/opengl_demo/glfw_helpers.cpp
+++ /dev/null
@@ -1,63 +0,0 @@
-#include "glfw_helpers.h"
-#include <GLFW/glfw3.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-
-using namespace GlfwHelpers;
-
-static GLFWwindow *window;
-
-static void die(const char *msg) {
-    fprintf(stderr, "%s\n", msg);
-    exit(EXIT_FAILURE);
-}
-
-static void error_callback(int error, const char *description) {
-    die(description);
-}
-
-static void key_callback(GLFWwindow *window, int key, int scancode, int action, int mods) {
-    if (key == GLFW_KEY_ESCAPE && action == GLFW_PRESS)
-        glfwSetWindowShouldClose(window, GL_TRUE);
-}
-
-static bool first_focus = false;
-static void focus_callback(GLFWwindow *window, int) {
-    first_focus = true;
-}
-
-struct info GlfwHelpers::setup(int width, int height) {
-    struct info info;
-
-    glfwSetErrorCallback(error_callback);
-    if (!glfwInit()) die("couldn't init glfw!");
-    glfwWindowHint(GLFW_DOUBLEBUFFER, GL_FALSE);  // Single buffer mode, to avoid any doublebuffering timing issues
-    window = glfwCreateWindow(width, height, "opengl_halide_test", NULL, NULL);
-    if (!window) die("couldn't create window!");
-    glfwSetKeyCallback(window, key_callback);
-    glfwSetWindowFocusCallback(window, focus_callback);
-    glfwMakeContextCurrent(window);
-
-    while (!first_focus) {
-        glfwWaitEvents();
-    }
-
-    int framebuffer_width, framebuffer_height;
-    glfwGetFramebufferSize(window, &framebuffer_width, &framebuffer_height);
-    info.dpi_scale = float(framebuffer_width) / float(width);
-
-    return info;
-}
-
-void GlfwHelpers::terminate() {
-    while (!glfwWindowShouldClose(window)) {
-        glfwPollEvents();
-    }
-    glfwDestroyWindow(window);
-    glfwTerminate();
-}
-
-void GlfwHelpers::set_opengl_context() {
-    glfwMakeContextCurrent(window);
-}
diff --git a/apps/opengl_demo/glfw_helpers.h b/apps/opengl_demo/glfw_helpers.h
deleted file mode 100644
index cd3a0f05bdcf..000000000000
--- a/apps/opengl_demo/glfw_helpers.h
+++ /dev/null
@@ -1,15 +0,0 @@
-#ifndef _GLFW_HELPERS_H_
-#define _GLFW_HELPERS_H_
-
-namespace GlfwHelpers {
-
-struct info {
-    float dpi_scale;
-};
-
-struct info setup(int width, int height);
-void set_opengl_context();
-void terminate();
-}  // namespace GlfwHelpers
-
-#endif
diff --git a/apps/opengl_demo/image.png b/apps/opengl_demo/image.png
deleted file mode 100644
index c73df2103613..000000000000
Binary files a/apps/opengl_demo/image.png and /dev/null differ
diff --git a/apps/opengl_demo/layout.cpp b/apps/opengl_demo/layout.cpp
deleted file mode 100644
index be313c068814..000000000000
--- a/apps/opengl_demo/layout.cpp
+++ /dev/null
@@ -1,59 +0,0 @@
-#include <string>
-
-#include "opengl_helpers.h"
-
-#include "layout.h"
-
-using namespace Layout;
-
-static const int border_sz = 10;  // pixels
-static const int header_sz = 20;  // pixels
-
-static struct info state;
-
-const struct info &Layout::setup(int image_width, int image_height) {
-    state.window_width = 2 * image_width + 3 * border_sz;
-    state.window_height = 2 * image_height + border_sz + 2 * header_sz;
-    return state;
-}
-
-void Layout::draw_texture(enum location location, GLuint texture_id, int width, int height, const std::string &label) {
-    int x0, x1, y0, y1, lx, ly;
-    switch (location) {  // set X coords
-    case LL:
-    case UL:
-        x0 = border_sz;
-        x1 = x0 + width;
-        lx = x0 + 2;
-        break;
-    case LR:
-    case UR:
-        x1 = state.window_width - border_sz;
-        x0 = x1 - width;
-        lx = x0 + 2;
-        break;
-    }
-    switch (location) {  // set Y coords
-    case LL:
-    case LR:
-        y0 = header_sz;
-        y1 = y0 + height;
-        ly = 6;
-        break;
-    case UL:
-    case UR:
-        y1 = state.window_height - header_sz;
-        y0 = y1 - height;
-        ly = y1 + 6;
-        break;
-    }
-
-    OpenGLHelpers::display_texture(texture_id, 2.0 * x0 / state.window_width - 1.0, 2.0 * x1 / state.window_width - 1.0, 2.0 * y0 / state.window_height - 1.0, 2.0 * y1 / state.window_height - 1.0);
-    OpenGLHelpers::draw_text(label, 2.0 * lx / state.window_width - 1.0, 2.0 * ly / state.window_height - 1.0);
-}
-
-void Layout::draw_image(enum location location, const uint8_t *data, int width, int height, const std::string &label) {
-    const auto texture_id = OpenGLHelpers::create_texture(width, height, data);
-    draw_texture(location, texture_id, width, height, label);
-    OpenGLHelpers::delete_texture(texture_id);
-}
diff --git a/apps/opengl_demo/layout.h b/apps/opengl_demo/layout.h
deleted file mode 100644
index be4947abc122..000000000000
--- a/apps/opengl_demo/layout.h
+++ /dev/null
@@ -1,28 +0,0 @@
-#ifndef _LAYOUT_HELPERS_H_
-#define _LAYOUT_HELPERS_H_
-
-#if defined(__APPLE__)
-#include <OpenGL/gl.h>
-#else
-#include <GL/gl.h>
-#endif
-
-namespace Layout {
-
-enum location { UL,
-                UR,
-                LL,
-                LR };
-
-struct info {
-    int window_width;
-    int window_height;
-};
-
-const struct info &setup(int image_width, int image_height);
-
-void draw_image(enum location location, const uint8_t *data, int width, int height, const std::string &label);
-void draw_texture(enum location location, GLuint texture_id, int width, int height, const std::string &label);
-}  // namespace Layout
-
-#endif
diff --git a/apps/opengl_demo/main.cpp b/apps/opengl_demo/main.cpp
deleted file mode 100644
index 955f21ac812c..000000000000
--- a/apps/opengl_demo/main.cpp
+++ /dev/null
@@ -1,172 +0,0 @@
-#include <iostream>
-#include <stdlib.h>
-
-#include "glfw_helpers.h"
-#include "layout.h"
-#include "opengl_helpers.h"
-#include "png_helpers.h"
-#include "timer.h"
-
-#include "sample_filter_cpu.h"
-#include "sample_filter_opengl.h"
-#include <HalideBuffer.h>
-#include <HalideRuntimeOpenGL.h>
-
-/*
- * Initializes a halide_buffer_t object for 8-bit RGBA data stored
- * interleaved as rgbargba... in row-major order.
- */
-Halide::Runtime::Buffer<uint8_t> create_buffer(uint8_t *data, int width, int height) {
-    return Halide::Runtime::Buffer<uint8_t>::make_interleaved(data, width, height, 4);
-}
-
-/*
- * Runs the filter on the CPU.  Takes a pointer to memory with the image
- * data to filter, and a pointer to memory in which to place the result
- * data.
- */
-std::string run_cpu_filter(const uint8_t *image_data, uint8_t *result_data, int width, int height) {
-    const auto time = Timer::start("CPU");
-
-    // Create halide input buffer and point it at the passed image data
-    auto input_buf = create_buffer((uint8_t *)image_data, width, height);
-
-    // Create halide output buffer and point it at the passed result data storage
-    auto output_buf = create_buffer(result_data, width, height);
-
-    // Run the AOT-compiled OpenGL filter
-    sample_filter_cpu(input_buf, output_buf);
-
-    return Timer::report(time);
-}
-
-/*
- * Runs the filter on OpenGL.  Takes a pointer to memory with the image
- * data to filter, and a pointer to memory in which to place the result
- * data.
- */
-std::string run_opengl_filter_from_host_to_host(const uint8_t *image_data, uint8_t *result_data, int width, int height) {
-    const auto time = Timer::start("OpenGL host-to-host");
-
-    // Create halide input buffer and point it at the passed image data for
-    // the host memory.  Halide will automatically allocate a texture to
-    // hold the data on the GPU.  Mark the host memory as "dirty" so halide
-    // will know it needs to transfer the data to the GPU texture.
-    auto input_buf = create_buffer((uint8_t *)image_data, width, height);
-    input_buf.set_host_dirty();
-
-    // Create halide output buffer and point it at the passed result data
-    // memory.  Halide will automatically allocate a texture to hold the
-    // data on the GPU.
-    auto output_buf = create_buffer(result_data, width, height);
-
-    // Run the AOT-compiled OpenGL filter
-    sample_filter_opengl(input_buf, output_buf);
-
-    // Ensure that halide copies the data back to the host
-    output_buf.copy_to_host();
-
-    return Timer::report(time);
-}
-
-/*
- * Runs the filter on OpenGL.  Assumes the data is already in a texture,
- * and leaves the output in a texture
- */
-std::string run_opengl_filter_from_texture_to_texture(GLuint input_texture_id, GLuint output_texture_id, int width, int height) {
-    const auto time = Timer::start("OpenGL texture-to-texture");
-
-    // Create halide input buffer and tell it to use the existing GPU
-    // texture.  No need to allocate memory on the host since this simple
-    // pipeline will run entirely on the GPU.
-    auto input_buf = create_buffer(nullptr, width, height);
-    halide_opengl_wrap_texture(nullptr, input_buf.raw_buffer(), input_texture_id);
-
-    // Create halide output buffer and tell it to use the existing GPU texture.
-    // No need to allocate memory on the host since this simple pipeline will run
-    // entirely on the GPU.
-    auto output_buf = create_buffer(nullptr, width, height);
-    halide_opengl_wrap_texture(nullptr, output_buf.raw_buffer(), output_texture_id);
-
-    // Run the AOT-compiled OpenGL filter
-    sample_filter_opengl(input_buf, output_buf);
-
-    // Tell halide we are finished using the textures
-    halide_opengl_detach_texture(nullptr, output_buf.raw_buffer());
-    halide_opengl_detach_texture(nullptr, input_buf.raw_buffer());
-
-    return Timer::report(time);
-}
-
-int main(const int argc, const char *argv[]) {
-    if (argc != 2) {
-        std::cerr << "Usage: " << argv[0] << " filename\n";
-        exit(1);
-    }
-    const std::string filename = argv[1];
-
-    const auto image = PNGHelpers::load(filename);
-    const auto width = image.width;
-    const auto height = image.height;
-
-    const auto layout = Layout::setup(width, height);
-    const auto glfw = GlfwHelpers::setup(layout.window_width, layout.window_height);
-    OpenGLHelpers::setup(glfw.dpi_scale);
-
-    /*
-     * Draw the original image
-     */
-    Layout::draw_image(Layout::UL, image.data, width, height, "Input");
-
-    std::string report;
-
-    /*
-     * Draw the result of running the filter on the CPU
-     */
-    const auto cpu_result_data = (uint8_t *)calloc(width * height * 4, sizeof(uint8_t));
-    report = run_cpu_filter(image.data, cpu_result_data, width, height);
-    Layout::draw_image(Layout::UR, cpu_result_data, width, height, report);
-    free((void *)cpu_result_data);
-
-    /*
-     * Draw the result of running the filter on OpenGL, with data starting
-     * from and ending up on the host
-     */
-    const auto opengl_result_data = (uint8_t *)calloc(width * height * 4, sizeof(uint8_t));
-    report = run_opengl_filter_from_host_to_host(image.data, opengl_result_data, width, height);
-    Layout::draw_image(Layout::LL, opengl_result_data, width, height, report);
-    free((void *)opengl_result_data);
-
-    /*
-     * Draw the result of running the filter on OpenGL, with data starting
-     * from and ending up in a texture on the device
-     */
-    const auto image_texture_id = OpenGLHelpers::create_texture(width, height, image.data);
-    const auto result_texture_id = OpenGLHelpers::create_texture(width, height, nullptr);
-    report = run_opengl_filter_from_texture_to_texture(image_texture_id, result_texture_id, width, height);
-    Layout::draw_texture(Layout::LR, result_texture_id, width, height, report);
-    OpenGLHelpers::delete_texture(image_texture_id);
-    OpenGLHelpers::delete_texture(result_texture_id);
-
-    // Release all Halide internal structures for the OpenGL context
-    halide_opengl_context_lost(nullptr);
-
-    GlfwHelpers::terminate();
-
-    free((void *)image.data);
-
-    return 0;
-}
-
-/*
- * Global definition required by halide with OpenGL backend, to prevent
- * Halide from allocating its own OpenGL context.
- *
- * In general, this function needs to set an active OpenGL context
- * and return 0 on success.
- */
-
-int halide_opengl_create_context(void * /*user_context*/) {
-    GlfwHelpers::set_opengl_context();
-    return 0;
-}
diff --git a/apps/opengl_demo/opengl_helpers.cpp b/apps/opengl_demo/opengl_helpers.cpp
deleted file mode 100644
index 1cf994f19879..000000000000
--- a/apps/opengl_demo/opengl_helpers.cpp
+++ /dev/null
@@ -1,67 +0,0 @@
-#include <drawtext.h>
-
-#include "opengl_helpers.h"
-
-using namespace OpenGLHelpers;
-
-static const int font_size = 12;
-
-void OpenGLHelpers::setup(float dpi_scale) {
-    const int scaled_font_size = font_size * dpi_scale;
-    dtx_use_font(dtx_open_font(DTX_FONT, scaled_font_size), scaled_font_size);
-    glClear(GL_COLOR_BUFFER_BIT);
-}
-
-GLuint OpenGLHelpers::create_texture(int width, int height, const uint8_t *data) {
-    GLuint texture_id;
-    glEnable(GL_TEXTURE_2D);
-    glGenTextures(1, &texture_id);
-    glBindTexture(GL_TEXTURE_2D, texture_id);
-    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
-    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
-    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
-    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
-    glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, width, height, 0, GL_RGBA, GL_UNSIGNED_BYTE, data);
-    return texture_id;
-}
-
-void OpenGLHelpers::delete_texture(GLuint texture_id) {
-    glDeleteTextures(1, &texture_id);
-}
-
-void OpenGLHelpers::display_texture(GLuint texture_id, float x0, float x1, float y0, float y1) {
-    glBindTexture(GL_TEXTURE_2D, texture_id);
-    glMatrixMode(GL_PROJECTION);
-    glLoadIdentity();
-    glMatrixMode(GL_MODELVIEW);
-    glLoadIdentity();
-    glMatrixMode(GL_TEXTURE);
-    glLoadIdentity();
-    glColor3f(1, 1, 1);
-    glBegin(GL_QUADS);
-    glTexCoord2d(1, 0);
-    glVertex2f(x1, y1);
-    glTexCoord2d(0, 0);
-    glVertex2f(x0, y1);
-    glTexCoord2d(0, 1);
-    glVertex2f(x0, y0);
-    glTexCoord2d(1, 1);
-    glVertex2f(x1, y0);
-    glEnd();
-    glFinish();
-}
-
-void OpenGLHelpers::draw_text(const std::string &text, float x, float y) {
-    glMatrixMode(GL_PROJECTION);
-    glLoadIdentity();
-    glOrtho(-1, 1, -1, 1, -1, 1);
-    glMatrixMode(GL_MODELVIEW);
-    glLoadIdentity();
-    glTranslatef(x, y, 0);
-    glColor3f(1, 1, 1);
-    GLint viewport[4];
-    glGetIntegerv(GL_VIEWPORT, viewport);
-    glScalef(2.0f / viewport[2], 2.0f / viewport[3], 1);
-    dtx_string(text.c_str());
-    glFinish();
-}
diff --git a/apps/opengl_demo/opengl_helpers.h b/apps/opengl_demo/opengl_helpers.h
deleted file mode 100644
index 962f61989928..000000000000
--- a/apps/opengl_demo/opengl_helpers.h
+++ /dev/null
@@ -1,20 +0,0 @@
-#ifndef _OPENGL_HELPERS_H_
-#define _OPENGL_HELPERS_H_
-
-#include <string>
-
-#if defined(__APPLE__)
-#include <OpenGL/gl.h>
-#else
-#include <GL/gl.h>
-#endif
-
-namespace OpenGLHelpers {
-void setup(float dpi_scale);
-GLuint create_texture(int width, int height, const uint8_t *data);
-void delete_texture(GLuint texture_id);
-void display_texture(GLuint texture_id, float x0, float x1, float y0, float y1);
-void draw_text(const std::string &text, float x, float y);
-}  // namespace OpenGLHelpers
-
-#endif
diff --git a/apps/opengl_demo/png_helpers.cpp b/apps/opengl_demo/png_helpers.cpp
deleted file mode 100644
index c7d48f00949f..000000000000
--- a/apps/opengl_demo/png_helpers.cpp
+++ /dev/null
@@ -1,73 +0,0 @@
-#include <cstdio>
-#include <iostream>
-#include <png.h>
-
-#include "png_helpers.h"
-
-using namespace PNGHelpers;
-
-struct image_info PNGHelpers::load(const std::string &filepath) {
-    const auto fp = fopen(filepath.c_str(), "rb");
-    if (fp == 0) {
-        perror(filepath.c_str());
-        exit(1);
-    }
-
-    // verify the header
-    png_byte header[8];
-    fread(header, 1, 8, fp);
-    if (png_sig_cmp(header, 0, 8)) {
-        std::cerr << "error: " << filepath << " is not a PNG file.\n";
-        exit(1);
-    }
-
-    auto png = png_create_read_struct(PNG_LIBPNG_VER_STRING, NULL, NULL, NULL);
-    auto png_info = png_create_info_struct(png);
-
-    if (setjmp(png_jmpbuf(png))) abort();
-
-    png_init_io(png, fp);
-    png_set_sig_bytes(png, 8);  // already read header
-    png_read_info(png, png_info);
-
-    const auto width = png_get_image_width(png, png_info);
-    const auto height = png_get_image_height(png, png_info);
-    const auto color_type = png_get_color_type(png, png_info);
-    const auto bit_depth = png_get_bit_depth(png, png_info);
-
-    if (bit_depth == 16)
-        png_set_strip_16(png);
-
-    if (color_type == PNG_COLOR_TYPE_PALETTE)
-        png_set_palette_to_rgb(png);
-
-    if (color_type == PNG_COLOR_TYPE_GRAY && bit_depth < 8)
-        png_set_expand_gray_1_2_4_to_8(png);
-
-    if (png_get_valid(png, png_info, PNG_INFO_tRNS))
-        png_set_tRNS_to_alpha(png);
-
-    if (color_type == PNG_COLOR_TYPE_RGB || color_type == PNG_COLOR_TYPE_GRAY || color_type == PNG_COLOR_TYPE_PALETTE)
-        png_set_filler(png, 0xFF, PNG_FILLER_AFTER);
-
-    if (color_type == PNG_COLOR_TYPE_GRAY || color_type == PNG_COLOR_TYPE_GRAY_ALPHA)
-        png_set_gray_to_rgb(png);
-
-    png_read_update_info(png, png_info);
-
-    const auto rowbytes = png_get_rowbytes(png, png_info);
-    const auto image_data = (png_byte *)malloc(rowbytes * height * sizeof(png_byte));
-
-    const auto row_pointers = (png_byte **)malloc(height * sizeof(png_byte *));
-    for (int i = 0; i < height; i++) {
-        row_pointers[i] = image_data + i * rowbytes;
-    }
-
-    png_read_image(png, row_pointers);
-
-    png_destroy_read_struct(&png, &png_info, nullptr);
-    free(row_pointers);
-    fclose(fp);
-
-    return {width, height, image_data};
-}
diff --git a/apps/opengl_demo/png_helpers.h b/apps/opengl_demo/png_helpers.h
deleted file mode 100644
index da1791526e00..000000000000
--- a/apps/opengl_demo/png_helpers.h
+++ /dev/null
@@ -1,15 +0,0 @@
-#ifndef _PNG_HELPERS_
-#define _PNG_HELPERS_
-
-namespace PNGHelpers {
-
-struct image_info {
-    unsigned int width;
-    unsigned int height;
-    const uint8_t *data;
-};
-
-struct image_info load(const std::string &filepath);
-}  // namespace PNGHelpers
-
-#endif
diff --git a/apps/opengl_demo/sample_filter_generator.cpp b/apps/opengl_demo/sample_filter_generator.cpp
deleted file mode 100644
index 4bf30eaf641b..000000000000
--- a/apps/opengl_demo/sample_filter_generator.cpp
+++ /dev/null
@@ -1,24 +0,0 @@
-#include "Halide.h"
-
-class SampleFilter : public Halide::Generator<SampleFilter> {
-public:
-    Input<Buffer<uint8_t>> input{"input", 3};
-    Output<Buffer<uint8_t>> output{"output", 3};
-
-    void generate() {
-        Var x, y, c;
-
-        output(x, y, c) = select(c == 3, input(x, y, c), cast<uint8_t>(255.0f - input(x, y, c)));
-
-        input.dim(0).set_stride(4).dim(2).set_stride(1).set_bounds(0, 4);
-
-        output.dim(0).set_stride(4).dim(2).set_stride(1);
-        output.bound(c, 0, 4);
-
-        if (get_target().has_feature(Target::OpenGL)) {
-            output.glsl(x, y, c);
-        }
-    }
-};
-
-HALIDE_REGISTER_GENERATOR(SampleFilter, sample_filter)
diff --git a/apps/opengl_demo/timer.cpp b/apps/opengl_demo/timer.cpp
deleted file mode 100644
index 2cd243a323ab..000000000000
--- a/apps/opengl_demo/timer.cpp
+++ /dev/null
@@ -1,24 +0,0 @@
-#include <iostream>
-#include <sstream>
-
-#include "timer.h"
-
-using namespace Timer;
-
-struct info Timer::start(const std::string &what) {
-    struct info info {
-        what
-    };
-    std::cerr << "\n-------------- Starting " << info.what << "\n";
-    info.time = std::chrono::high_resolution_clock::now();
-    return info;
-}
-
-std::string Timer::report(const struct info &info) {
-    const auto end_time = std::chrono::high_resolution_clock::now();
-    const auto ms = std::chrono::duration<double, std::milli>(end_time - info.time).count();
-    std::stringstream report;
-    report << info.what << ": " << ms << "ms";
-    std::cerr << "-------------- Finished " << report.str() << "\n";
-    return report.str();
-}
diff --git a/apps/opengl_demo/timer.h b/apps/opengl_demo/timer.h
deleted file mode 100644
index 596e5c78fe55..000000000000
--- a/apps/opengl_demo/timer.h
+++ /dev/null
@@ -1,16 +0,0 @@
-#ifndef _TIMER_H_
-#define _TIMER_H_
-
-#include <chrono>
-
-namespace Timer {
-struct info {
-    const std::string what;
-    std::chrono::time_point<std::chrono::high_resolution_clock> time;
-};
-
-struct info start(const std::string &what);
-std::string report(const struct info &);
-}  // namespace Timer
-
-#endif
diff --git a/apps/resnet_50/Makefile b/apps/resnet_50/Makefile
index 188620382940..3d1dd30c9ce8 100644
--- a/apps/resnet_50/Makefile
+++ b/apps/resnet_50/Makefile
@@ -13,7 +13,7 @@ $(BIN)/%/pytorch_weights/ok:
 
 $(GENERATOR_BIN)/resnet50.generator: Resnet50Generator.cpp $(GENERATOR_DEPS)
 	@mkdir -p $(@D)
-	$(CXX) $(CXXFLAGS) -g -fno-rtti $(filter %.cpp,$^) -o $@ $(LIBHALIDE_LDFLAGS) 
+	$(CXX) $(CXXFLAGS) -g -fno-rtti $(filter %.cpp,$^) -o $@ $(LIBHALIDE_LDFLAGS)
 
 $(BIN)/%/resnet50.a: $(GENERATOR_BIN)/resnet50.generator
 	@mkdir -p $(@D)
@@ -21,7 +21,7 @@ $(BIN)/%/resnet50.a: $(GENERATOR_BIN)/resnet50.generator
 
 $(BIN)/%/process: process.cpp $(BIN)/%/resnet50.a
 	@mkdir -p $(@D)
-	$(CXX) $(CXXFLAGS) -I$(BIN)/$* -Wall $^ -o $@ $(LDFLAGS) $(IMAGE_IO_FLAGS) $(CUDA_LDFLAGS) $(OPENCL_LDFLAGS) $(OPENGL_LDFLAGS)
+	$(CXX) $(CXXFLAGS) -I$(BIN)/$* -Wall $^ -o $@ $(LDFLAGS) $(IMAGE_IO_FLAGS) $(CUDA_LDFLAGS) $(OPENCL_LDFLAGS)
 
 benchmark_and_validate: $(BIN)/$(HL_TARGET)/process $(BIN)/$(HL_TARGET)/pytorch_weights/ok
 	$< 10 $* $(BIN)/$(HL_TARGET)/pytorch_weights/ $(SEED) $(BIN)/$(HL_TARGET)/res50gen_output.bin
diff --git a/apps/stencil_chain/Makefile b/apps/stencil_chain/Makefile
index 91750f988869..116922d03095 100644
--- a/apps/stencil_chain/Makefile
+++ b/apps/stencil_chain/Makefile
@@ -6,7 +6,7 @@ build: $(BIN)/$(HL_TARGET)/process
 
 $(GENERATOR_BIN)/stencil_chain.generator: stencil_chain_generator.cpp $(GENERATOR_DEPS)
 	@mkdir -p $(@D)
-	$(CXX) $(CXXFLAGS) $(filter %.cpp,$^) -o $@ $(LIBHALIDE_LDFLAGS) 
+	$(CXX) $(CXXFLAGS) $(filter %.cpp,$^) -o $@ $(LIBHALIDE_LDFLAGS)
 
 $(BIN)/%/stencil_chain.a: $(GENERATOR_BIN)/stencil_chain.generator
 	@mkdir -p $(@D)
@@ -18,7 +18,7 @@ $(BIN)/%/stencil_chain_auto_schedule.a: $(GENERATOR_BIN)/stencil_chain.generator
 
 $(BIN)/%/process: process.cpp $(BIN)/%/stencil_chain.a $(BIN)/%/stencil_chain_auto_schedule.a
 	@mkdir -p $(@D)
-	$(CXX) $(CXXFLAGS) -I$(BIN)/$* -Wall $^ -o $@ $(LDFLAGS) $(IMAGE_IO_FLAGS) $(CUDA_LDFLAGS) $(OPENCL_LDFLAGS) $(OPENGL_LDFLAGS)
+	$(CXX) $(CXXFLAGS) -I$(BIN)/$* -Wall $^ -o $@ $(LDFLAGS) $(IMAGE_IO_FLAGS) $(CUDA_LDFLAGS) $(OPENCL_LDFLAGS)
 
 $(BIN)/%/out.png: $(BIN)/%/process
 	@mkdir -p $(@D)
diff --git a/apps/support/Makefile.inc b/apps/support/Makefile.inc
index c72bf1c36735..8f647457dd9f 100644
--- a/apps/support/Makefile.inc
+++ b/apps/support/Makefile.inc
@@ -98,7 +98,6 @@ ANDROID_API_VERSION ?= 26
 
 CXX-host ?= $(CXX)
 CXX-host-opencl ?= $(CXX)
-CXX-host-opengl ?= $(CXX)
 CXX-host-cuda ?= $(CXX)
 CXX-host-metal ?= $(CXX)
 CXX-host-hvx_128 ?= $(CXX)
@@ -111,7 +110,6 @@ CXX-arm-32-profile-android ?= $(CXX-arm-32-android)
 
 CXXFLAGS-host ?= $(CXXFLAGS)
 CXXFLAGS-host-opencl ?= $(CXXFLAGS)
-CXXFLAGS-host-opengl ?= $(CXXFLAGS)
 CXXFLAGS-host-cuda ?= $(CXXFLAGS)
 CXXFLAGS-host-metal ?= $(CXXFLAGS)
 CXXFLAGS-host-hvx_128 ?= $(CXXFLAGS)
@@ -121,7 +119,6 @@ CXXFLAGS-arm-32-android ?= $(CXXFLAGS)
 
 LDFLAGS-host ?= $(LDFLAGS)
 LDFLAGS-host-opencl ?= $(LDFLAGS)
-LDFLAGS-host-opengl ?= $(LDFLAGS)
 LDFLAGS-host-cuda ?= $(LDFLAGS)
 LDFLAGS-host-metal ?= $(LDFLAGS)
 LDFLAGS-host-hvx_128 ?= $(LDFLAGS)
@@ -185,15 +182,6 @@ IMAGE_IO_CXX_FLAGS = $(LIBPNG_CXX_FLAGS) $(LIBJPEG_CXX_FLAGS)
 
 IMAGE_IO_FLAGS = $(IMAGE_IO_LIBS) $(IMAGE_IO_CXX_FLAGS)
 
-PLATFORM_OPENGL_LDFLAGS=-lGL -lX11
-ifeq ($(UNAME), Darwin)
-PLATFORM_OPENGL_LDFLAGS=-framework OpenGL
-endif
-
-ifneq (, $(findstring opengl,$(HL_TARGET)))
-  OPENGL_LDFLAGS=$(PLATFORM_OPENGL_LDFLAGS)
-endif
-
 ifneq (, $(findstring metal,$(HL_TARGET)))
   LDFLAGS += -framework Metal -framework Foundation
 endif
diff --git a/apps/unsharp/Makefile b/apps/unsharp/Makefile
index 1accb3c498ea..fa912ad172e1 100644
--- a/apps/unsharp/Makefile
+++ b/apps/unsharp/Makefile
@@ -22,7 +22,7 @@ $(BIN)/%/runtime.a: $(GENERATOR_BIN)/unsharp.generator
 
 $(BIN)/%/filter: filter.cpp $(BIN)/%/unsharp.a $(BIN)/%/unsharp_auto_schedule.a $(BIN)/%/runtime.a
 	@mkdir -p $(@D)
-	$(CXX) $(CXXFLAGS) -I$(BIN)/$* -Wall -O3 $^ -o $@ $(LDFLAGS) $(IMAGE_IO_FLAGS) $(CUDA_LDFLAGS) $(OPENCL_LDFLAGS) $(OPENGL_LDFLAGS)
+	$(CXX) $(CXXFLAGS) -I$(BIN)/$* -Wall -O3 $^ -o $@ $(LDFLAGS) $(IMAGE_IO_FLAGS) $(CUDA_LDFLAGS) $(OPENCL_LDFLAGS)
 
 $(BIN)/%/out.png: $(BIN)/%/filter
 	$< ../images/rgba.png $(BIN)/$*/out.png
diff --git a/cmake/HalideGeneratorHelpers.cmake b/cmake/HalideGeneratorHelpers.cmake
index 220f1f56ceb8..d48e02778970 100644
--- a/cmake/HalideGeneratorHelpers.cmake
+++ b/cmake/HalideGeneratorHelpers.cmake
@@ -342,7 +342,8 @@ function(_Halide_add_targets_to_runtime TARGET)
 endfunction()
 
 function(_Halide_target_link_gpu_libs TARGET VISIBILITY)
-    if ("${ARGN}" MATCHES "opengl")
+    # TODO(https://github.com/halide/Halide/issues/5633): verify that this is correct & necessary for OpenGLCompute
+    if ("${ARGN}" MATCHES "openglcompute")
         if ("${ARGN}" MATCHES "egl")
             find_package(OpenGL REQUIRED COMPONENTS OpenGL EGL)
             target_link_libraries(${TARGET} ${VISIBILITY} OpenGL::OpenGL OpenGL::EGL)
diff --git a/dependencies/CMakeLists.txt b/dependencies/CMakeLists.txt
index 3fc8ba95ce6e..002afd0bcd7d 100644
--- a/dependencies/CMakeLists.txt
+++ b/dependencies/CMakeLists.txt
@@ -6,6 +6,7 @@ set(THREADS_PREFER_PTHREAD_FLAG YES)
 find_package(Threads REQUIRED)
 set_target_properties(Threads::Threads PROPERTIES IMPORTED_GLOBAL TRUE)
 
+# TODO(https://github.com/halide/Halide/issues/5633): verify this is still correct / necessary for OpenGLCompute
 find_package(OpenGL)
 if (TARGET OpenGL::GL)
     set_target_properties(OpenGL::GL PROPERTIES IMPORTED_GLOBAL TRUE)
@@ -18,7 +19,7 @@ endif ()
 
 ##
 # Third-party dependencies in their own subdirectories
-## 
+##
 
 add_subdirectory(llvm)
 
diff --git a/python_bindings/correctness/target.py b/python_bindings/correctness/target.py
index b54fb2984969..3f8e8347b23b 100644
--- a/python_bindings/correctness/target.py
+++ b/python_bindings/correctness/target.py
@@ -46,10 +46,10 @@ def test_target():
     # Full specification round-trip, crazy features
     t1 = hl.Target(hl.TargetOS.Android, hl.TargetArch.ARM, 32,
                    [hl.TargetFeature.JIT, hl.TargetFeature.SSE41, hl.TargetFeature.AVX, hl.TargetFeature.AVX2,
-                    hl.TargetFeature.CUDA, hl.TargetFeature.OpenCL, hl.TargetFeature.OpenGL, hl.TargetFeature.OpenGLCompute,
+                    hl.TargetFeature.CUDA, hl.TargetFeature.OpenCL, hl.TargetFeature.OpenGLCompute,
                     hl.TargetFeature.Debug])
     ts = t1.to_string()
-    assert ts == "arm-32-android-avx-avx2-cuda-debug-jit-opencl-opengl-openglcompute-sse41"
+    assert ts == "arm-32-android-avx-avx2-cuda-debug-jit-opencl-openglcompute-sse41"
     assert hl.Target.validate_target_string(ts)
 
     # Expected failures:
diff --git a/python_bindings/src/PyEnums.cpp b/python_bindings/src/PyEnums.cpp
index c64352f73101..b47cd3e761a9 100644
--- a/python_bindings/src/PyEnums.cpp
+++ b/python_bindings/src/PyEnums.cpp
@@ -15,7 +15,6 @@ void define_enums(py::module &m) {
         .value("Default_GPU", DeviceAPI::Default_GPU)
         .value("CUDA", DeviceAPI::CUDA)
         .value("OpenCL", DeviceAPI::OpenCL)
-        .value("GLSL", DeviceAPI::GLSL)
         .value("OpenGLCompute", DeviceAPI::OpenGLCompute)
         .value("Metal", DeviceAPI::Metal)
         .value("Hexagon", DeviceAPI::Hexagon);
@@ -106,7 +105,6 @@ void define_enums(py::module &m) {
         .value("CLDoubles", Target::Feature::CLDoubles)
         .value("CLHalf", Target::Feature::CLHalf)
         .value("CLAtomics64", Target::Feature::CLAtomics64)
-        .value("OpenGL", Target::Feature::OpenGL)
         .value("OpenGLCompute", Target::Feature::OpenGLCompute)
         .value("EGL", Target::Feature::EGL)
         .value("UserContext", Target::Feature::UserContext)
diff --git a/python_bindings/src/PyFunc.cpp b/python_bindings/src/PyFunc.cpp
index 4cf8eb6736e2..2bb24193a962 100644
--- a/python_bindings/src/PyFunc.cpp
+++ b/python_bindings/src/PyFunc.cpp
@@ -342,10 +342,6 @@ void define_func(py::module &m) {
 
             .def("bound_extent", &Func::bound_extent, py::arg("var"), py::arg("extent"))
 
-            .def("shader", &Func::shader, py::arg("x"), py::arg("y"), py::arg("c"), py::arg("device_api"))
-
-            .def("glsl", &Func::glsl, py::arg("x"), py::arg("y"), py::arg("c"))
-
             .def("align_storage", &Func::align_storage, py::arg("dim"), py::arg("alignment"))
 
             .def("fold_storage", &Func::fold_storage, py::arg("dim"), py::arg("extent"), py::arg("fold_forward") = true)
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 29458c7db0d9..0b45adf43715 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -81,7 +81,6 @@ set(HEADER_FILES
     ImageParam.h
     InferArguments.h
     InjectHostDevBufferCopies.h
-    InjectOpenGLIntrinsics.h
     Inline.h
     InlineReductions.h
     IntegerDivisionTable.h
@@ -163,7 +162,6 @@ set(HEADER_FILES
     UnsafePromises.h
     Util.h
     Var.h
-    VaryingAttributes.h
     VectorizeLoops.h
     WasmExecutor.h
     WrapCalls.h
@@ -241,7 +239,6 @@ set(SOURCE_FILES
     ImageParam.cpp
     InferArguments.cpp
     InjectHostDevBufferCopies.cpp
-    InjectOpenGLIntrinsics.cpp
     Inline.cpp
     InlineReductions.cpp
     IntegerDivisionTable.cpp
@@ -335,7 +332,6 @@ set(SOURCE_FILES
     UnsafePromises.cpp
     Util.cpp
     Var.cpp
-    VaryingAttributes.cpp
     VectorizeLoops.cpp
     WasmExecutor.cpp
     WrapCalls.cpp
@@ -454,11 +450,6 @@ if (TARGET_OPENCL)
     target_compile_definitions(Halide PRIVATE WITH_OPENCL)
 endif ()
 
-option(TARGET_OPENGL "Include OpenGL/GLSL target" ON)
-if (TARGET_OPENGL)
-    target_compile_definitions(Halide PRIVATE WITH_OPENGL)
-endif ()
-
 option(TARGET_METAL "Include Metal target" ON)
 if (TARGET_METAL)
     target_compile_definitions(Halide PRIVATE WITH_METAL)
diff --git a/src/CodeGen_C.cpp b/src/CodeGen_C.cpp
index 9aac7010eee3..72cc3ef29969 100644
--- a/src/CodeGen_C.cpp
+++ b/src/CodeGen_C.cpp
@@ -29,7 +29,6 @@ extern "C" unsigned char halide_internal_runtime_header_HalideRuntimeHexagonHost
 extern "C" unsigned char halide_internal_runtime_header_HalideRuntimeMetal_h[];
 extern "C" unsigned char halide_internal_runtime_header_HalideRuntimeOpenCL_h[];
 extern "C" unsigned char halide_internal_runtime_header_HalideRuntimeOpenGLCompute_h[];
-extern "C" unsigned char halide_internal_runtime_header_HalideRuntimeOpenGL_h[];
 extern "C" unsigned char halide_internal_runtime_header_HalideRuntimeQurt_h[];
 extern "C" unsigned char halide_internal_runtime_header_HalideRuntimeD3D12Compute_h[];
 
@@ -418,9 +417,6 @@ CodeGen_C::~CodeGen_C() {
             if (target.has_feature(Target::OpenGLCompute)) {
                 stream << halide_internal_runtime_header_HalideRuntimeOpenGLCompute_h << "\n";
             }
-            if (target.has_feature(Target::OpenGL)) {
-                stream << halide_internal_runtime_header_HalideRuntimeOpenGL_h << "\n";
-            }
             if (target.has_feature(Target::D3D12Compute)) {
                 stream << halide_internal_runtime_header_HalideRuntimeD3D12Compute_h << "\n";
             }
diff --git a/src/CodeGen_GPU_Host.cpp b/src/CodeGen_GPU_Host.cpp
index 488a8703bfdc..7f3d3a46fb68 100644
--- a/src/CodeGen_GPU_Host.cpp
+++ b/src/CodeGen_GPU_Host.cpp
@@ -22,7 +22,6 @@
 #include "LLVM_Headers.h"
 #include "Simplify.h"
 #include "Util.h"
-#include "VaryingAttributes.h"
 
 namespace Halide {
 namespace Internal {
@@ -104,13 +103,9 @@ template<typename CodeGen_CPU>
 CodeGen_GPU_Host<CodeGen_CPU>::CodeGen_GPU_Host(const Target &target)
     : CodeGen_CPU(target) {
     // For the default GPU, the order of preferences is: Metal,
-    // OpenCL, CUDA, OpenGLCompute, and OpenGL last.
+    // OpenCL, CUDA, OpenGLCompute last.
     // The code is in reverse order to allow later tests to override
     // earlier ones.
-    if (target.has_feature(Target::OpenGL)) {
-        debug(1) << "Constructing OpenGL device codegen\n";
-        cgdev[DeviceAPI::GLSL] = std::make_unique<CodeGen_OpenGL_Dev>(target);
-    }
     if (target.has_feature(Target::OpenGLCompute)) {
         debug(1) << "Constructing OpenGL Compute device codegen\n";
         cgdev[DeviceAPI::OpenGLCompute] = new_CodeGen_OpenGLCompute_Dev(target);
@@ -255,77 +250,31 @@ void CodeGen_GPU_Host<CodeGen_CPU>::visit(const For *loop) {
         Value *gpu_num_coords_dim0 = zero_int32;
         Value *gpu_num_coords_dim1 = zero_int32;
 
-        if (loop->device_api == DeviceAPI::GLSL) {
-
-            // GL draw calls that invoke the GLSL shader are issued for pairs of
-            // for-loops over spatial x and y dimensions. For each for-loop we create
-            // one scalar vertex attribute for the spatial dimension corresponding to
-            // that loop, plus one scalar attribute for each expression previously
-            // labeled as "glsl_varying"
-
-            // Pass variables created during setup_gpu_vertex_buffer to the
-            // dev run function call.
-            gpu_num_padded_attributes = codegen(Variable::make(Int(32), "glsl.num_padded_attributes"));
-            gpu_num_coords_dim0 = codegen(Variable::make(Int(32), "glsl.num_coords_dim0"));
-            gpu_num_coords_dim1 = codegen(Variable::make(Int(32), "glsl.num_coords_dim1"));
-
-            // Look up the allocation for the vertex buffer and cast it to the
-            // right type
-            gpu_vertex_buffer = codegen(Variable::make(type_of<float *>(), "glsl.vertex_buffer"));
-            gpu_vertex_buffer = builder->CreatePointerCast(gpu_vertex_buffer,
-                                                           CodeGen_LLVM::f32_t->getPointerTo());
-        }
-
         // compute a closure over the state passed into the kernel
         HostClosure c(loop->body, loop->name);
 
         // Determine the arguments that must be passed into the halide function
         vector<DeviceArgument> closure_args = c.arguments();
 
-        // Halide allows passing of scalar float and integer arguments. For
-        // OpenGL, pack these into vec4 uniforms and varying attributes
-        if (loop->device_api == DeviceAPI::GLSL) {
-
-            int num_uniform_floats = 0;
-
-            // The spatial x and y coordinates are passed in the first two
-            // scalar float varying slots
-            int num_varying_floats = 2;
-            int num_uniform_ints = 0;
-
-            // Pack scalar parameters into vec4
-            for (size_t i = 0; i < closure_args.size(); i++) {
-                if (closure_args[i].is_buffer) {
-                    continue;
-                } else if (ends_with(closure_args[i].name, ".varying")) {
-                    closure_args[i].packed_index = num_varying_floats++;
-                } else if (closure_args[i].type.is_float()) {
-                    closure_args[i].packed_index = num_uniform_floats++;
-                } else if (closure_args[i].type.is_int()) {
-                    closure_args[i].packed_index = num_uniform_ints++;
-                }
-            }
-        } else {
-            // Sort the args by the size of the underlying type. This is
-            // helpful for avoiding struct-packing ambiguities in metal,
-            // which passes the scalar args as a struct.
-            std::sort(closure_args.begin(), closure_args.end(),
-                      [](const DeviceArgument &a, const DeviceArgument &b) {
-                          if (a.is_buffer == b.is_buffer) {
-                              return a.type.bits() > b.type.bits();
-                          } else {
-                              // Ensure that buffer arguments come first:
-                              // for many OpenGL/Compute systems, the
-                              // legal indices for buffer args are much
-                              // more restrictive than for scalar args,
-                              // and scalar args can be 'grown' by
-                              // LICM. Putting buffers first makes it much
-                              // more likely we won't fail on some
-                              // hardware.
-                              return a.is_buffer > b.is_buffer;
-                          }
-                      });
-        }
+        // Sort the args by the size of the underlying type. This is
+        // helpful for avoiding struct-packing ambiguities in metal,
+        // which passes the scalar args as a struct.
+        std::sort(closure_args.begin(), closure_args.end(),
+                  [](const DeviceArgument &a, const DeviceArgument &b) {
+                      if (a.is_buffer == b.is_buffer) {
+                          return a.type.bits() > b.type.bits();
+                      } else {
+                          // Ensure that buffer arguments come first:
+                          // for many OpenGL/Compute systems, the
+                          // legal indices for buffer args are much
+                          // more restrictive than for scalar args,
+                          // and scalar args can be 'grown' by
+                          // LICM. Putting buffers first makes it much
+                          // more likely we won't fail on some
+                          // hardware.
+                          return a.is_buffer > b.is_buffer;
+                      }
+                  });
 
         for (size_t i = 0; i < closure_args.size(); i++) {
             if (closure_args[i].is_buffer && allocations.contains(closure_args[i].name)) {
diff --git a/src/CodeGen_Internal.cpp b/src/CodeGen_Internal.cpp
index f118ec9b9f2e..86a1baf60bb0 100644
--- a/src/CodeGen_Internal.cpp
+++ b/src/CodeGen_Internal.cpp
@@ -222,7 +222,6 @@ bool function_takes_user_context(const std::string &name) {
         "halide_memoization_cache_release",
         "halide_cuda_run",
         "halide_opencl_run",
-        "halide_opengl_run",
         "halide_openglcompute_run",
         "halide_metal_run",
         "halide_d3d12compute_run",
@@ -246,7 +245,6 @@ bool function_takes_user_context(const std::string &name) {
         "halide_vtcm_free",
         "halide_cuda_initialize_kernels",
         "halide_opencl_initialize_kernels",
-        "halide_opengl_initialize_kernels",
         "halide_openglcompute_initialize_kernels",
         "halide_metal_initialize_kernels",
         "halide_d3d12compute_initialize_kernels",
diff --git a/src/CodeGen_LLVM.cpp b/src/CodeGen_LLVM.cpp
index ae1bf51bf4fd..e5d86abc753a 100644
--- a/src/CodeGen_LLVM.cpp
+++ b/src/CodeGen_LLVM.cpp
@@ -240,7 +240,6 @@ std::unique_ptr<CodeGen_LLVM> CodeGen_LLVM::new_for_target(const Target &target,
     // The awkward mapping from targets to code generators
     if (target.features_any_of({Target::CUDA,
                                 Target::OpenCL,
-                                Target::OpenGL,
                                 Target::OpenGLCompute,
                                 Target::Metal,
                                 Target::D3D12Compute})) {
diff --git a/src/CodeGen_OpenGLCompute_Dev.cpp b/src/CodeGen_OpenGLCompute_Dev.cpp
index 1bc8c7d46cdb..1be9045bb1d1 100644
--- a/src/CodeGen_OpenGLCompute_Dev.cpp
+++ b/src/CodeGen_OpenGLCompute_Dev.cpp
@@ -7,7 +7,6 @@
 #include "IRMutator.h"
 #include "IROperator.h"
 #include "Simplify.h"
-#include "VaryingAttributes.h"
 #include <iomanip>
 #include <limits>
 #include <map>
diff --git a/src/CodeGen_OpenGL_Dev.cpp b/src/CodeGen_OpenGL_Dev.cpp
index 333d837eb64b..2081fd37b75c 100644
--- a/src/CodeGen_OpenGL_Dev.cpp
+++ b/src/CodeGen_OpenGL_Dev.cpp
@@ -6,7 +6,6 @@
 #include "IRMutator.h"
 #include "IROperator.h"
 #include "Simplify.h"
-#include "VaryingAttributes.h"
 #include <iomanip>
 #include <limits>
 #include <map>
@@ -759,127 +758,6 @@ void CodeGen_GLSL::visit(const Evaluate *op) {
     print_expr(op->value);
 }
 
-void CodeGen_GLSL::visit(const Call *op) {
-    ostringstream rhs;
-    if (op->is_intrinsic(Call::glsl_texture_load)) {
-        // This intrinsic takes five arguments
-        // glsl_texture_load(<tex name>, <buffer>, <x>, <y>, <c>)
-        internal_assert(op->args.size() == 5);
-
-        // The argument to the call is either a StringImm or a broadcasted
-        // StringImm if this is part of a vectorized expression
-        internal_assert(op->args[0].as<StringImm>() ||
-                        (op->args[0].as<Broadcast>() && op->args[0].as<Broadcast>()->value.as<StringImm>()));
-
-        const StringImm *string_imm = op->args[0].as<StringImm>();
-        if (!string_imm) {
-            string_imm = op->args[0].as<Broadcast>()->value.as<StringImm>();
-        }
-
-        // Determine the halide buffer associated with this load
-        string buffername = string_imm->value;
-
-        internal_assert((op->type.code() == Type::UInt || op->type.code() == Type::Float) &&
-                        (op->type.lanes() >= 1 && op->type.lanes() <= 4));
-
-        if (op->type.is_uint()) {
-            rhs << print_type(op->type) << "(floor(";
-        }
-
-        if (op->type.is_vector()) {
-            // The channel argument must be a ramp or a broadcast of a constant.
-            Expr c = op->args[4];
-            internal_assert(is_const(c));
-
-            const Ramp *rc = c.as<Ramp>();
-            const Broadcast *bx = op->args[2].as<Broadcast>();
-            const Broadcast *by = op->args[3].as<Broadcast>();
-            if (rc && is_const_zero(rc->base) && is_const_one(rc->stride) && bx && by) {
-                // If the x and y coordinates are broadcasts, and the c
-                // coordinate is a dense ramp, we can do a single
-                // texture2D call.
-                rhs << "texture2D(" << print_name(buffername) << ", vec2("
-                    << print_expr(bx->value) << ", "
-                    << print_expr(by->value) << "))";
-
-                // texture2D always returns a vec4. Swizzle out the lanes we want.
-                switch (op->type.lanes()) {
-                case 1:
-                    rhs << ".r";
-                    break;
-                case 2:
-                    rhs << ".rg";
-                    break;
-                case 3:
-                    rhs << ".rgb";
-                    break;
-                default:
-                    break;
-                }
-            } else {
-                // Otherwise do one load per lane and make a vector
-                vector<string> xs = print_lanes(op->args[2]);
-                vector<string> ys = print_lanes(op->args[3]);
-                vector<string> cs = print_lanes(op->args[4]);
-                string name = print_name(buffername);
-
-                string x = print_expr(op->args[2]), y = print_expr(op->args[3]);
-                rhs << print_type(op->type) << "(";
-                for (int i = 0; i < op->type.lanes(); i++) {
-                    if (i > 0) {
-                        rhs << ", ";
-                    }
-                    rhs << "texture2D(" << name << ", vec2("
-                        << xs[i] << ", " << ys[i] << "))[" << cs[i] << "]";
-                }
-                rhs << ")";
-            }
-        } else if (const int64_t *ic = as_const_int(op->args[4])) {
-            internal_assert(*ic >= 0 && *ic < 4);
-            rhs << "texture2D(" << print_name(buffername) << ", vec2("
-                << print_expr(op->args[2]) << ", "
-                << print_expr(op->args[3]) << "))."
-                << get_lane_suffix(*ic);
-        } else {
-            rhs << "texture2D(" << print_name(buffername) << ", vec2("
-                << print_expr(op->args[2]) << ", "
-                << print_expr(op->args[3]) << "))["
-                << print_expr(op->args[4]) << "]";
-        }
-
-        if (op->type.is_uint()) {
-            rhs << " * " << print_expr(cast<float>(op->type.max())) << " + 0.5))";
-        }
-
-    } else if (op->is_intrinsic(Call::glsl_texture_store)) {
-        internal_assert(op->args.size() == 6);
-        string sval = print_expr(op->args[5]);
-        string suffix = get_vector_suffix(op->args[4]);
-        stream << get_indent() << "gl_FragColor" << suffix
-               << " = " << sval;
-        if (op->args[5].type().is_uint()) {
-            stream << " / " << print_expr(cast<float>(op->args[5].type().max()));
-        }
-        stream << ";\n";
-        // glsl_texture_store is called only for its side effect; there is
-        // no return value.
-        id = "";
-        return;
-    } else if (op->is_intrinsic(Call::glsl_varying)) {
-        // Varying attributes should be substituted out by this point in
-        // codegen.
-        debug(2) << "Found skipped varying attribute: " << op->args[0] << "\n";
-
-        // Output the tagged expression.
-        print_expr(op->args[1]);
-        return;
-    } else {
-        CodeGen_GLSLBase::visit(op);
-        return;
-    }
-    print_assignment(op->type, rhs.str());
-}
-
 namespace {
 class AllAccessConstant : public IRVisitor {
     using IRVisitor::visit;
@@ -1256,16 +1134,6 @@ void CodeGen_GLSL::test() {
                        Broadcast::make(2.f, 4)),
           "vec4 $ = vec4(2.0, 1.0, 2.0, 2.0);\n");
 
-    // Test codegen for texture loads
-    Expr load4 = Call::make(Float(32, 4), Call::glsl_texture_load,
-                            {string("buf"),
-                             0,
-                             Broadcast::make(0, 4),
-                             Broadcast::make(0, 4),
-                             Ramp::make(0, 1, 4)},
-                            Call::Intrinsic);
-    check(load4, "vec4 $ = texture2D($buf, vec2(int(0), int(0)));\n");
-
     check(log(1.0f), "float $ = log(1.0);\n");
     check(exp(1.0f), "float $ = exp(1.0);\n");
 
diff --git a/src/CodeGen_OpenGL_Dev.h b/src/CodeGen_OpenGL_Dev.h
index 03cf43e1a1c8..b180b5e0ef12 100644
--- a/src/CodeGen_OpenGL_Dev.h
+++ b/src/CodeGen_OpenGL_Dev.h
@@ -134,7 +134,6 @@ class CodeGen_GLSL : public CodeGen_GLSLBase {
     void visit(const Allocate *) override;
     void visit(const Free *) override;
 
-    void visit(const Call *) override;
     void visit(const AssertStmt *) override;
     void visit(const Ramp *op) override;
     void visit(const Broadcast *) override;
diff --git a/src/Deinterleave.cpp b/src/Deinterleave.cpp
index f5bd78b41d97..9ca31a012ee2 100644
--- a/src/Deinterleave.cpp
+++ b/src/Deinterleave.cpp
@@ -320,14 +320,6 @@ class Deinterleaver : public IRGraphMutator {
         // Don't mutate scalars
         if (op->type.is_scalar()) {
             return op;
-        } else if (op->is_intrinsic(Call::glsl_texture_load)) {
-            // glsl_texture_load returns a <uint x 4> result. Deinterleave by
-            // wrapping the call in a shuffle_vector
-            std::vector<int> indices;
-            for (int i = 0; i < new_lanes; i++) {
-                indices.push_back(i * lane_stride + starting_lane);
-            }
-            return Shuffle::make({op}, indices);
         } else {
 
             // Vector calls are always parallel across the lanes, so we
diff --git a/src/DeviceAPI.h b/src/DeviceAPI.h
index ab132e091f8d..e75711592558 100644
--- a/src/DeviceAPI.h
+++ b/src/DeviceAPI.h
@@ -18,7 +18,6 @@ enum class DeviceAPI {
     Default_GPU,
     CUDA,
     OpenCL,
-    GLSL,
     OpenGLCompute,
     Metal,
     Hexagon,
@@ -33,7 +32,6 @@ const DeviceAPI all_device_apis[] = {DeviceAPI::None,
                                      DeviceAPI::Default_GPU,
                                      DeviceAPI::CUDA,
                                      DeviceAPI::OpenCL,
-                                     DeviceAPI::GLSL,
                                      DeviceAPI::OpenGLCompute,
                                      DeviceAPI::Metal,
                                      DeviceAPI::Hexagon,
diff --git a/src/DeviceArgument.cpp b/src/DeviceArgument.cpp
index 5746958235b4..77f81c5001ff 100644
--- a/src/DeviceArgument.cpp
+++ b/src/DeviceArgument.cpp
@@ -40,9 +40,7 @@ std::vector<DeviceArgument> HostClosure::arguments() {
 }
 
 void HostClosure::visit(const Call *op) {
-    if (op->is_intrinsic(Call::glsl_texture_load) ||
-        op->is_intrinsic(Call::image_load) ||
-        op->is_intrinsic(Call::glsl_texture_store) ||
+    if (op->is_intrinsic(Call::image_load) ||
         op->is_intrinsic(Call::image_store)) {
 
         // The argument to the call is either a StringImm or a broadcasted
@@ -64,12 +62,10 @@ void HostClosure::visit(const Call *op) {
                               MemoryType::GPUTexture :
                               MemoryType::Auto;
 
-        if (op->is_intrinsic(Call::glsl_texture_load) ||
-            op->is_intrinsic(Call::image_load)) {
+        if (op->is_intrinsic(Call::image_load)) {
             ref.read = true;
             ref.dimensions = (op->args.size() - 2) / 2;
-        } else if (op->is_intrinsic(Call::glsl_texture_store) ||
-                   op->is_intrinsic(Call::image_store)) {
+        } else if (op->is_intrinsic(Call::image_store)) {
             ref.write = true;
             ref.dimensions = op->args.size() - 3;
         }
diff --git a/src/DeviceInterface.cpp b/src/DeviceInterface.cpp
index 3285d7f3b3ac..4d72805e153a 100644
--- a/src/DeviceInterface.cpp
+++ b/src/DeviceInterface.cpp
@@ -96,8 +96,6 @@ const halide_device_interface_t *get_device_interface_for_device_api(DeviceAPI d
         name = "cuda";
     } else if (d == DeviceAPI::OpenGLCompute) {
         name = "openglcompute";
-    } else if (d == DeviceAPI::GLSL) {
-        name = "opengl";
     } else if (d == DeviceAPI::HexagonDma) {
         name = "hexagon_dma";
     } else if (d == DeviceAPI::D3D12Compute) {
@@ -152,8 +150,6 @@ DeviceAPI get_default_device_api_for_target(const Target &target) {
         return DeviceAPI::CUDA;
     } else if (target.has_feature(Target::OpenGLCompute)) {
         return DeviceAPI::OpenGLCompute;
-    } else if (target.has_feature(Target::OpenGL)) {
-        return DeviceAPI::GLSL;
     } else if (target.has_feature(Target::HexagonDma)) {
         return DeviceAPI::HexagonDma;
     } else if (target.has_feature(Target::D3D12Compute)) {
@@ -184,9 +180,6 @@ Expr make_device_interface_call(DeviceAPI device_api, MemoryType memory_type) {
     case DeviceAPI::Metal:
         interface_name = "halide_metal_device_interface";
         break;
-    case DeviceAPI::GLSL:
-        interface_name = "halide_opengl_device_interface";
-        break;
     case DeviceAPI::OpenGLCompute:
         interface_name = "halide_openglcompute_device_interface";
         break;
diff --git a/src/Func.cpp b/src/Func.cpp
index 0cbb5a563c2d..cc6ab6d2ea21 100644
--- a/src/Func.cpp
+++ b/src/Func.cpp
@@ -2454,35 +2454,6 @@ Func &Func::gpu_tile(const VarOrRVar &x, const VarOrRVar &y, const VarOrRVar &z,
     return *this;
 }
 
-Func &Func::shader(const Var &x, const Var &y, const Var &c, DeviceAPI device_api) {
-    invalidate_cache();
-
-    reorder(c, x, y);
-    // GLSL outputs must be stored interleaved
-    reorder_storage(c, x, y);
-
-    // TODO: Set appropriate constraints if this is the output buffer?
-
-    Stage(func, func.definition(), 0).gpu_blocks(x, y, device_api);
-
-    bool constant_bounds = false;
-    FuncSchedule &sched = func.schedule();
-    for (size_t i = 0; i < sched.bounds().size(); i++) {
-        if (c.name() == sched.bounds()[i].var) {
-            constant_bounds = is_const(sched.bounds()[i].min) &&
-                              is_const(sched.bounds()[i].extent);
-            break;
-        }
-    }
-    user_assert(constant_bounds)
-        << "The color channel for image loops must have constant bounds, e.g., .bound(c, 0, 3).\n";
-    return *this;
-}
-
-Func &Func::glsl(const Var &x, const Var &y, const Var &c) {
-    return shader(x, y, c, DeviceAPI::GLSL).vectorize(c);
-}
-
 Func &Func::hexagon(const VarOrRVar &x) {
     invalidate_cache();
     Stage(func, func.definition(), 0).hexagon(x);
diff --git a/src/Func.h b/src/Func.h
index a0c3e82e242a..d6cb36e2f35e 100644
--- a/src/Func.h
+++ b/src/Func.h
@@ -1974,16 +1974,6 @@ class Func {
                    DeviceAPI device_api = DeviceAPI::Default_GPU);
     // @}
 
-    /** Schedule for execution using coordinate-based hardware api.
-     * GLSL is an example of this. Conceptually, this is
-     * similar to parallelization over 'x' and 'y' (since GLSL shaders compute
-     * individual output pixels in parallel) and vectorization over 'c'
-     * (since GLSL/RS implicitly vectorizes the color channel). */
-    Func &shader(const Var &x, const Var &y, const Var &c, DeviceAPI device_api);
-
-    /** Schedule for execution as GLSL kernel. */
-    Func &glsl(const Var &x, const Var &y, const Var &c);
-
     /** Schedule for execution on Hexagon. When a loop is marked with
      * Hexagon, that loop is executed on a Hexagon DSP. */
     Func &hexagon(const VarOrRVar &x = Var::outermost());
diff --git a/src/FuseGPUThreadLoops.cpp b/src/FuseGPUThreadLoops.cpp
index 9faf0d7a41df..6b1798b25528 100644
--- a/src/FuseGPUThreadLoops.cpp
+++ b/src/FuseGPUThreadLoops.cpp
@@ -1442,10 +1442,6 @@ class FuseGPUThreadLoops : public IRMutator {
     using IRMutator::visit;
 
     Stmt visit(const For *op) override {
-        if (op->device_api == DeviceAPI::GLSL) {
-            return op;
-        }
-
         user_assert(!(CodeGen_GPU_Dev::is_gpu_thread_var(op->name)))
             << "Loops over GPU thread variable: \"" << op->name
             << "\" is outside of any loop over a GPU block variable. "
diff --git a/src/Generator.cpp b/src/Generator.cpp
index e732fca13889..e973885ee53d 100644
--- a/src/Generator.cpp
+++ b/src/Generator.cpp
@@ -757,7 +757,7 @@ std::string halide_type_to_c_type(const Type &t) {
 
 int generate_filter_main_inner(int argc, char **argv, std::ostream &cerr) {
     const char kUsage[] =
-        "gengen \n"
+        "gengen\n"
         "  [-g GENERATOR_NAME] [-f FUNCTION_NAME] [-o OUTPUT_DIR] [-r RUNTIME_NAME] [-d 1|0]\n"
         "  [-e EMIT_OPTIONS] [-n FILE_BASE_NAME] [-p PLUGIN_NAME] [-s AUTOSCHEDULER_NAME]\n"
         "       target=target-string[,target-string...] [generator_arg=value [...]]\n"
diff --git a/src/Generator.h b/src/Generator.h
index bc60b7ff4fb5..8edec2015961 100644
--- a/src/Generator.h
+++ b/src/Generator.h
@@ -2220,7 +2220,6 @@ class GeneratorOutputBase : public GIOBase {
     HALIDE_FORWARD_METHOD_CONST(Func, defined)
     HALIDE_FORWARD_METHOD(Func, fold_storage)
     HALIDE_FORWARD_METHOD(Func, fuse)
-    HALIDE_FORWARD_METHOD(Func, glsl)
     HALIDE_FORWARD_METHOD(Func, gpu)
     HALIDE_FORWARD_METHOD(Func, gpu_blocks)
     HALIDE_FORWARD_METHOD(Func, gpu_single_thread)
@@ -2242,7 +2241,6 @@ class GeneratorOutputBase : public GIOBase {
     HALIDE_FORWARD_METHOD_CONST(Func, rvars)
     HALIDE_FORWARD_METHOD(Func, serial)
     HALIDE_FORWARD_METHOD(Func, set_estimate)
-    HALIDE_FORWARD_METHOD(Func, shader)
     HALIDE_FORWARD_METHOD(Func, specialize)
     HALIDE_FORWARD_METHOD(Func, specialize_fail)
     HALIDE_FORWARD_METHOD(Func, split)
diff --git a/src/IR.cpp b/src/IR.cpp
index edc293b6ce71..96b56ed01fed 100644
--- a/src/IR.cpp
+++ b/src/IR.cpp
@@ -597,9 +597,6 @@ const char *const intrinsic_op_names[] = {
     "div_round_to_zero",
     "dynamic_shuffle",
     "extract_mask_element",
-    "glsl_texture_load",
-    "glsl_texture_store",
-    "glsl_varying",
     "gpu_thread_barrier",
     "hvx_gather",
     "hvx_scatter",
diff --git a/src/IR.h b/src/IR.h
index ce45483882e6..da57626e7c42 100644
--- a/src/IR.h
+++ b/src/IR.h
@@ -509,9 +509,6 @@ struct Call : public ExprNode<Call> {
         div_round_to_zero,
         dynamic_shuffle,
         extract_mask_element,
-        glsl_texture_load,
-        glsl_texture_store,
-        glsl_varying,
         gpu_thread_barrier,
         hvx_gather,
         hvx_scatter,
diff --git a/src/IRPrinter.cpp b/src/IRPrinter.cpp
index 79318513ca31..240005b9c8a8 100644
--- a/src/IRPrinter.cpp
+++ b/src/IRPrinter.cpp
@@ -93,9 +93,6 @@ ostream &operator<<(ostream &out, const DeviceAPI &api) {
     case DeviceAPI::OpenGLCompute:
         out << "<OpenGLCompute>";
         break;
-    case DeviceAPI::GLSL:
-        out << "<GLSL>";
-        break;
     case DeviceAPI::Metal:
         out << "<Metal>";
         break;
diff --git a/src/InjectOpenGLIntrinsics.cpp b/src/InjectOpenGLIntrinsics.cpp
deleted file mode 100644
index b9e1d8c3fa46..000000000000
--- a/src/InjectOpenGLIntrinsics.cpp
+++ /dev/null
@@ -1,111 +0,0 @@
-#include "InjectOpenGLIntrinsics.h"
-#include "CodeGen_GPU_Dev.h"
-#include "FuseGPUThreadLoops.h"
-#include "IRMutator.h"
-#include "IROperator.h"
-#include "Scope.h"
-#include "Substitute.h"
-
-namespace Halide {
-namespace Internal {
-
-using std::string;
-using std::vector;
-
-namespace {
-
-/** Normalizes image loads/stores and produces glsl_texture_load/stores. */
-class InjectOpenGLIntrinsics : public IRMutator {
-public:
-    InjectOpenGLIntrinsics() = default;
-    Scope<int> scope;
-    bool inside_kernel_loop = false;
-
-private:
-    using IRMutator::visit;
-
-    Expr visit(const Call *call) override {
-        if (call->is_intrinsic(Call::image_load)) {
-            vector<Expr> call_args = call->args;
-            //
-            // Create
-            //  glsl_texture_load("name",
-            //                    name.buffer,
-            //                    (x - x_min + 0.5)/x_extent,
-            //                    (y - y_min + 0.5)/y_extent,
-            //                    c)
-            // from
-            //  image_load("name",
-            //                   name.buffer,
-            //                   x - x_min, x_extent,
-            //                   y - y_min, y_extent,
-            //                   c - c_min, c_extent
-            //                   )
-            //
-            int dims = (call_args.size() - 2) / 2;
-            internal_assert(dims >= 1 && dims <= 3);
-
-            vector<Expr> args(5);
-            args[0] = call_args[0];  // "name"
-            args[1] = call_args[1];  // name.buffer
-
-            // Normalize first two coordinates.
-            for (int i = 0; i < std::min(dims, 2); i++) {
-                int to_index = 2 + i;
-                int from_index = 2 + i * 2;
-                args[to_index] =
-                    (Cast::make(Float(32), mutate(call_args[from_index])) + 0.5f) /
-                    mutate(call_args[from_index + 1]);
-            }
-
-            if (dims < 3) {
-                args[3] = FloatImm::make(Float(32), 0.5f);
-                args[4] = IntImm::make(Int(32), 0);
-            } else {
-                // Confirm that user explicitly specified constant value for min
-                // value of c dimension for ImageParams accessed by GLSL-based filters.
-                if (call->param.defined()) {
-                    bool const_min_constraint =
-                        call->param.min_constraint(2).defined() &&
-                        is_const(call->param.min_constraint(2));
-                    user_assert(const_min_constraint)
-                        << "GLSL: Requires minimum for c-dimension set to constant "
-                        << "for ImageParam '" << args[0] << "'. "
-                        << "Call set_min(2, min) or set_bounds(2, min, extent) to set.\n";
-                }
-
-                Expr c_coordinate = mutate(call_args[2 + 2 * 2]);
-                args[4] = c_coordinate;
-            }
-
-            return Call::make(call->type, Call::glsl_texture_load,
-                              vector<Expr>(&args[0], &args[5]),
-                              Call::Intrinsic, FunctionPtr(), 0,
-                              call->image, call->param);
-        } else if (call->is_intrinsic(Call::image_store)) {
-            user_assert(call->args.size() == 6)
-                << "GLSL stores require three coordinates.\n";
-
-            // Create
-            //    gl_texture_store(name, name.buffer, x, y, c, value)
-            // out of
-            //    image_store(name, name.buffer, x, y, c, value)
-            vector<Expr> args(call->args);
-            args[5] = mutate(call->args[5]);  // mutate value
-            return Call::make(call->type, Call::glsl_texture_store,
-                              args, Call::Intrinsic);
-        } else {
-            return IRMutator::visit(call);
-        }
-    }
-};
-
-}  // namespace
-
-Stmt inject_opengl_intrinsics(const Stmt &s) {
-    InjectOpenGLIntrinsics gl;
-    return gl.mutate(s);
-}
-
-}  // namespace Internal
-}  // namespace Halide
diff --git a/src/InjectOpenGLIntrinsics.h b/src/InjectOpenGLIntrinsics.h
deleted file mode 100644
index 3fcf9875d024..000000000000
--- a/src/InjectOpenGLIntrinsics.h
+++ /dev/null
@@ -1,22 +0,0 @@
-#ifndef HALIDE_INJECT_OPENGL_INTRINSICS_H
-#define HALIDE_INJECT_OPENGL_INTRINSICS_H
-
-/** \file
- * Defines the lowering pass that injects texture loads and texture
- * stores for opengl.
- */
-
-#include "Expr.h"
-
-namespace Halide {
-namespace Internal {
-
-/** Take a statement with for kernel for loops and turn loads and
- * stores inside the loops into OpenGL texture load and store
- * intrinsics. Should only be run when the OpenGL target is active. */
-Stmt inject_opengl_intrinsics(const Stmt &s);
-
-}  // namespace Internal
-}  // namespace Halide
-
-#endif
diff --git a/src/JITModule.cpp b/src/JITModule.cpp
index dd67ca1d2dcd..0e3837ec6913 100644
--- a/src/JITModule.cpp
+++ b/src/JITModule.cpp
@@ -624,14 +624,12 @@ enum RuntimeKind {
     OpenCL,
     Metal,
     CUDA,
-    OpenGL,  // NOTE: this feature is deprecated and will be removed in Halide 12.
     OpenGLCompute,
     Hexagon,
     D3D12Compute,
     OpenCLDebug,
     MetalDebug,
     CUDADebug,
-    OpenGLDebug,  // NOTE: this feature is deprecated and will be removed in Halide 12.
     OpenGLComputeDebug,
     HexagonDebug,
     D3D12ComputeDebug,
@@ -668,7 +666,6 @@ JITModule &make_module(llvm::Module *for_module, Target target,
         one_gpu.set_feature(Target::Metal, false);
         one_gpu.set_feature(Target::CUDA, false);
         one_gpu.set_feature(Target::HVX, false);
-        one_gpu.set_feature(Target::OpenGL, false);
         one_gpu.set_feature(Target::OpenGLCompute, false);
         one_gpu.set_feature(Target::D3D12Compute, false);
         string module_name;
@@ -702,17 +699,6 @@ JITModule &make_module(llvm::Module *for_module, Target target,
             one_gpu.set_feature(Target::CUDA);
             module_name += "cuda";
             break;
-        case OpenGLDebug:
-            one_gpu.set_feature(Target::Debug);
-            one_gpu.set_feature(Target::OpenGL);
-            module_name = "debug_opengl";
-            load_opengl(one_gpu.has_feature(Target::EGL));
-            break;
-        case OpenGL:
-            one_gpu.set_feature(Target::OpenGL);
-            module_name += "opengl";
-            load_opengl(one_gpu.has_feature(Target::EGL));
-            break;
         case OpenGLComputeDebug:
             one_gpu.set_feature(Target::Debug);
             one_gpu.set_feature(Target::OpenGLCompute);
@@ -874,13 +860,6 @@ std::vector<JITModule> JITSharedRuntime::get(llvm::Module *for_module, const Tar
             result.push_back(m);
         }
     }
-    if (target.has_feature(Target::OpenGL)) {
-        auto kind = target.has_feature(Target::Debug) ? OpenGLDebug : OpenGL;
-        JITModule m = make_module(for_module, target, kind, result, create);
-        if (m.compiled()) {
-            result.push_back(m);
-        }
-    }
     if (target.has_feature(Target::OpenGLCompute)) {
         auto kind = target.has_feature(Target::Debug) ? OpenGLComputeDebug : OpenGLCompute;
         JITModule m = make_module(for_module, target, kind, result, create);
diff --git a/src/LICM.cpp b/src/LICM.cpp
index 64360fd64b9d..bb80691f79b0 100644
--- a/src/LICM.cpp
+++ b/src/LICM.cpp
@@ -246,9 +246,6 @@ class LICM : public IRMutator {
         if (old_in_gpu_loop && in_gpu_loop) {
             // Don't lift lets to in-between gpu blocks/threads
             return IRMutator::visit(op);
-        } else if (op->device_api == DeviceAPI::GLSL) {
-            // GLSL uses magic names for varying things. Just skip LICM.
-            return IRMutator::visit(op);
         } else {
 
             // Lift invariants
diff --git a/src/LLVM_Runtime_Linker.cpp b/src/LLVM_Runtime_Linker.cpp
index d715ed4e984d..75b5d5c212d4 100644
--- a/src/LLVM_Runtime_Linker.cpp
+++ b/src/LLVM_Runtime_Linker.cpp
@@ -105,7 +105,6 @@ DECLARE_CPP_INITMOD(module_jit_ref_count)
 DECLARE_CPP_INITMOD(msan)
 DECLARE_CPP_INITMOD(msan_stubs)
 DECLARE_CPP_INITMOD(opencl)
-DECLARE_CPP_INITMOD(opengl)
 DECLARE_CPP_INITMOD(openglcompute)
 DECLARE_CPP_INITMOD(opengl_egl_context)
 DECLARE_CPP_INITMOD(opengl_glx_context)
@@ -1091,22 +1090,6 @@ std::unique_ptr<llvm::Module> get_initial_module_for_target(Target t, llvm::LLVM
                 modules.push_back(get_initmod_opencl(c, bits_64, debug));
             }
         }
-        if (t.has_feature(Target::OpenGL)) {
-            modules.push_back(get_initmod_opengl(c, bits_64, debug));
-            if (t.os == Target::Linux) {
-                if (t.has_feature(Target::EGL)) {
-                    modules.push_back(get_initmod_opengl_egl_context(c, bits_64, debug));
-                } else {
-                    modules.push_back(get_initmod_opengl_glx_context(c, bits_64, debug));
-                }
-            } else if (t.os == Target::OSX) {
-                modules.push_back(get_initmod_osx_opengl_context(c, bits_64, debug));
-            } else if (t.os == Target::Android) {
-                modules.push_back(get_initmod_opengl_egl_context(c, bits_64, debug));
-            } else {
-                // You're on your own to provide definitions of halide_opengl_get_proc_address and halide_opengl_create_context
-            }
-        }
         if (t.has_feature(Target::OpenGLCompute)) {
             modules.push_back(get_initmod_openglcompute(c, bits_64, debug));
             if (t.os == Target::Android) {
diff --git a/src/Lower.cpp b/src/Lower.cpp
index a25d18de30b3..35f2096addd2 100644
--- a/src/Lower.cpp
+++ b/src/Lower.cpp
@@ -34,7 +34,6 @@
 #include "IRPrinter.h"
 #include "InferArguments.h"
 #include "InjectHostDevBufferCopies.h"
-#include "InjectOpenGLIntrinsics.h"
 #include "Inline.h"
 #include "LICM.h"
 #include "LoopCarry.h"
@@ -68,7 +67,6 @@
 #include "UnpackBuffers.h"
 #include "UnrollLoops.h"
 #include "UnsafePromises.h"
-#include "VaryingAttributes.h"
 #include "VectorizeLoops.h"
 #include "WrapCalls.h"
 
@@ -205,7 +203,6 @@ Module lower(const vector<Function> &output_funcs,
     bool will_inject_host_copies =
         (t.has_gpu_feature() ||
          t.has_feature(Target::OpenGLCompute) ||
-         t.has_feature(Target::OpenGL) ||
          t.has_feature(Target::HexagonDma) ||
          (t.arch != Target::Hexagon && (t.has_feature(Target::HVX))));
 
@@ -257,8 +254,7 @@ Module lower(const vector<Function> &output_funcs,
     // OpenGL relies on GPU var canonicalization occurring before
     // storage flattening.
     if (t.has_gpu_feature() ||
-        t.has_feature(Target::OpenGLCompute) ||
-        t.has_feature(Target::OpenGL)) {
+        t.has_feature(Target::OpenGLCompute)) {
         debug(1) << "Canonicalizing GPU var names...\n";
         s = canonicalize_gpu_vars(s);
         debug(2) << "Lowering after canonicalizing GPU var names:\n"
@@ -312,13 +308,6 @@ Module lower(const vector<Function> &output_funcs,
                  << s << "\n\n";
     }
 
-    if (t.has_feature(Target::OpenGL)) {
-        debug(1) << "Injecting OpenGL texture intrinsics...\n";
-        s = inject_opengl_intrinsics(s);
-        debug(2) << "Lowering after OpenGL intrinsics:\n"
-                 << s << "\n\n";
-    }
-
     debug(1) << "Simplifying...\n";
     s = simplify(s);
     s = unify_duplicate_lets(s);
@@ -416,18 +405,6 @@ Module lower(const vector<Function> &output_funcs,
     debug(1) << "Simplifying...\n";
     s = common_subexpression_elimination(s);
 
-    if (t.has_feature(Target::OpenGL)) {
-        debug(1) << "Detecting varying attributes...\n";
-        s = find_linear_expressions(s);
-        debug(2) << "Lowering after detecting varying attributes:\n"
-                 << s << "\n\n";
-
-        debug(1) << "Moving varying attribute expressions out of the shader...\n";
-        s = setup_gpu_vertex_buffer(s);
-        debug(2) << "Lowering after removing varying attributes:\n"
-                 << s << "\n\n";
-    }
-
     debug(1) << "Lowering unsafe promises...\n";
     s = lower_unsafe_promises(s, t);
     debug(2) << "Lowering after lowering unsafe promises:\n"
diff --git a/src/Module.cpp b/src/Module.cpp
index 618c8ea7eb39..119635535e69 100644
--- a/src/Module.cpp
+++ b/src/Module.cpp
@@ -557,10 +557,6 @@ std::map<std::string, std::string> Module::get_metadata_name_map() const {
 void Module::compile(const std::map<Output, std::string> &output_files) const {
     validate_outputs(output_files);
 
-    if (target().has_feature(Target::OpenGL)) {
-        user_warning << "WARNING: OpenGL is deprecated in Halide 11 and will be removed in Halide 12.\n";
-    }
-
     // output stmt and html prior to resolving submodules. We need to
     // clear the output after writing it, otherwise the output will
     // be overwritten by recursive calls after submodules are resolved.
diff --git a/src/PartitionLoops.cpp b/src/PartitionLoops.cpp
index dfe9a30d1931..0a5381972000 100644
--- a/src/PartitionLoops.cpp
+++ b/src/PartitionLoops.cpp
@@ -496,12 +496,6 @@ class PartitionLoops : public IRMutator {
             return IRMutator::visit(op);
         }
 
-        // We shouldn't partition GLSL loops - they have control-flow
-        // constraints.
-        if (op->device_api == DeviceAPI::GLSL) {
-            return op;
-        }
-
         // Find simplifications in this loop body
         FindSimplifications finder(op->name);
         body.accept(&finder);
@@ -777,11 +771,6 @@ class RenormalizeGPULoops : public IRMutator {
     vector<pair<string, Expr>> lifted_lets;
 
     Stmt visit(const For *op) override {
-        if (op->device_api == DeviceAPI::GLSL) {
-            // The partitioner did not enter GLSL loops
-            return op;
-        }
-
         bool old_in_gpu_loop = in_gpu_loop;
         Stmt stmt;
 
diff --git a/src/Pipeline.cpp b/src/Pipeline.cpp
index 349a94c22ea2..b46b879b88a6 100644
--- a/src/Pipeline.cpp
+++ b/src/Pipeline.cpp
@@ -1075,10 +1075,6 @@ void Pipeline::realize(RealizationArg outputs, const Target &t,
     Target target = t;
     user_assert(defined()) << "Can't realize an undefined Pipeline\n";
 
-    if (t.has_feature(Target::OpenGL)) {
-        user_warning << "WARNING: OpenGL is deprecated in Halide 11 and will be removed in Halide 12.\n";
-    }
-
     debug(2) << "Realizing Pipeline for " << target << "\n";
 
     if (target.has_unknowns()) {
diff --git a/src/StorageFlattening.cpp b/src/StorageFlattening.cpp
index d78662fe73b2..2ed7fa278939 100644
--- a/src/StorageFlattening.cpp
+++ b/src/StorageFlattening.cpp
@@ -39,8 +39,7 @@ class FlattenDimensions : public IRMutator {
     set<string> outputs;
     set<string> textures;
     const Target &target;
-    Scope<> realizations, shader_scope_realizations;
-    bool in_shader = false;
+    Scope<> realizations;
     bool in_gpu = false;
 
     Expr make_shape_var(string name, const string &field, size_t dim,
@@ -110,10 +109,6 @@ class FlattenDimensions : public IRMutator {
     Stmt visit(const Realize *op) override {
         realizations.push(op->name);
 
-        if (in_shader) {
-            shader_scope_realizations.push(op->name);
-        }
-
         if (op->memory_type == MemoryType::GPUTexture) {
             textures.insert(op->name);
             debug(2) << "found texture " << op->name << "\n";
@@ -131,10 +126,6 @@ class FlattenDimensions : public IRMutator {
 
         realizations.pop(op->name);
 
-        if (in_shader) {
-            shader_scope_realizations.pop(op->name);
-        }
-
         // The allocation extents of the function taken into account of
         // the align_storage directives. It is only used to determine the
         // host allocation size and the strides in halide_buffer_t objects (which
@@ -247,19 +238,7 @@ class FlattenDimensions : public IRMutator {
         }
 
         Expr value = mutate(op->values[0]);
-        if (in_shader && !shader_scope_realizations.contains(op->name)) {
-            user_assert(op->args.size() == 3)
-                << "Image stores require three coordinates.\n";
-            Expr buffer_var =
-                Variable::make(type_of<halide_buffer_t *>(), op->name + ".buffer", output_buf);
-            vector<Expr> args = {
-                op->name, buffer_var,
-                op->args[0], op->args[1], op->args[2],
-                value};
-            Expr store = Call::make(value.type(), Call::image_store,
-                                    args, Call::Intrinsic);
-            return Evaluate::make(store);
-        } else if (in_gpu && textures.count(op->name)) {
+        if (in_gpu && textures.count(op->name)) {
             Expr buffer_var =
                 Variable::make(type_of<halide_buffer_t *>(), op->name + ".buffer", output_buf);
             vector<Expr> args(2);
@@ -296,7 +275,7 @@ class FlattenDimensions : public IRMutator {
 
             internal_assert(op->value_index == 0);
 
-            if ((in_shader && !shader_scope_realizations.contains(op->name)) || (in_gpu && textures.count(op->name))) {
+            if (in_gpu && textures.count(op->name)) {
                 ReductionDomain rdom;
                 Expr buffer_var =
                     Variable::make(type_of<halide_buffer_t *>(), op->name + ".buffer",
@@ -396,19 +375,12 @@ class FlattenDimensions : public IRMutator {
     }
 
     Stmt visit(const For *op) override {
-        bool old_in_shader = in_shader;
         bool old_in_gpu = in_gpu;
-        if ((op->for_type == ForType::GPUBlock ||
-             op->for_type == ForType::GPUThread) &&
-            op->device_api == DeviceAPI::GLSL) {
-            in_shader = true;
-        }
         if (op->for_type == ForType::GPUBlock ||
             op->for_type == ForType::GPUThread) {
             in_gpu = true;
         }
         Stmt stmt = IRMutator::visit(op);
-        in_shader = old_in_shader;
         in_gpu = old_in_gpu;
         return stmt;
     }
diff --git a/src/Target.cpp b/src/Target.cpp
index ac1e6548ce8b..c9ea844729c2 100644
--- a/src/Target.cpp
+++ b/src/Target.cpp
@@ -330,7 +330,6 @@ const std::map<std::string, Target::Feature> feature_name_map = {
     {"cl_doubles", Target::CLDoubles},
     {"cl_half", Target::CLHalf},
     {"cl_atomics64", Target::CLAtomics64},
-    {"opengl", Target::OpenGL},
     {"openglcompute", Target::OpenGLCompute},
     {"egl", Target::EGL},
     {"user_context", Target::UserContext},
@@ -661,7 +660,7 @@ bool Target::supported() const {
     bad |= has_feature(Target::Metal);
 #endif
 #if !defined(WITH_OPENGL)
-    bad |= has_feature(Target::OpenGL) || has_feature(Target::OpenGLCompute);
+    bad |= has_feature(Target::OpenGLCompute);
 #endif
 #if !defined(WITH_D3D12)
     bad |= has_feature(Target::D3D12Compute);
@@ -774,14 +773,12 @@ bool Target::supports_type(const Type &t) const {
     if (t.bits() == 64) {
         if (t.is_float()) {
             return !has_feature(Metal) &&
-                   !has_feature(OpenGL) &&
                    !has_feature(OpenGLCompute) &&
                    !has_feature(D3D12Compute) &&
                    (!has_feature(Target::OpenCL) || has_feature(Target::CLDoubles));
         } else {
             return (!has_feature(Metal) &&
                     !has_feature(OpenGLCompute) &&
-                    !has_feature(OpenGL) &&
                     !has_feature(D3D12Compute));
         }
     }
@@ -854,9 +851,6 @@ DeviceAPI Target::get_required_device_api() const {
     if (has_feature(Target::OpenCL)) {
         return DeviceAPI::OpenCL;
     }
-    if (has_feature(Target::OpenGL)) {
-        return DeviceAPI::GLSL;
-    }
     if (has_feature(Target::OpenGLCompute)) {
         return DeviceAPI::OpenGLCompute;
     }
@@ -869,8 +863,6 @@ Target::Feature target_feature_for_device_api(DeviceAPI api) {
         return Target::CUDA;
     case DeviceAPI::OpenCL:
         return Target::OpenCL;
-    case DeviceAPI::GLSL:
-        return Target::OpenGL;
     case DeviceAPI::OpenGLCompute:
         return Target::OpenGLCompute;
     case DeviceAPI::Metal:
@@ -957,7 +949,6 @@ bool Target::get_runtime_compatible_target(const Target &other, Target &result)
         Metal,
         NoNEON,
         OpenCL,
-        OpenGL,
         OpenGLCompute,
 
         // These features are actually intersection-y, but because targets only record the _highest_,
@@ -1123,7 +1114,6 @@ void target_test() {
         {{"x86-64-linux-cuda", "x86-64-linux", "x86-64-linux-cuda"}},
         {{"x86-64-linux-cuda-cuda_capability_50", "x86-64-linux-cuda", "x86-64-linux-cuda"}},
         {{"x86-64-linux-cuda-cuda_capability_50", "x86-64-linux-cuda-cuda_capability_30", "x86-64-linux-cuda-cuda_capability_30"}},
-        {{"x86-64-linux-cuda", "x86-64-linux-opengl", "x86-64-linux-cuda-opengl"}},
         {{"hexagon-32-qurt-hvx_v65", "hexagon-32-qurt-hvx_v62", "hexagon-32-qurt-hvx_v62"}},
         {{"hexagon-32-qurt-hvx_v62", "hexagon-32-qurt", "hexagon-32-qurt"}},
         {{"hexagon-32-qurt-hvx_v62-hvx", "hexagon-32-qurt", ""}},
diff --git a/src/Target.h b/src/Target.h
index 2ae94a71c3a0..84aeb07e0e87 100644
--- a/src/Target.h
+++ b/src/Target.h
@@ -82,7 +82,6 @@ struct Target {
         CLDoubles = halide_target_feature_cl_doubles,
         CLHalf = halide_target_feature_cl_half,
         CLAtomics64 = halide_target_feature_cl_atomic64,
-        OpenGL = halide_target_feature_opengl,  // NOTE: this feature is deprecated and will be removed in Halide 12.
         OpenGLCompute = halide_target_feature_openglcompute,
         EGL = halide_target_feature_egl,
         UserContext = halide_target_feature_user_context,
diff --git a/src/VaryingAttributes.cpp b/src/VaryingAttributes.cpp
deleted file mode 100644
index df9ebf94f6b1..000000000000
--- a/src/VaryingAttributes.cpp
+++ /dev/null
@@ -1,1389 +0,0 @@
-#include "VaryingAttributes.h"
-
-#include <algorithm>
-
-#include "CSE.h"
-#include "CodeGen_GPU_Dev.h"
-#include "IR.h"
-#include "IRMutator.h"
-#include "IROperator.h"
-#include "Simplify.h"
-
-namespace Halide {
-namespace Internal {
-
-namespace {
-
-Stmt make_block(Stmt first, Stmt rest) {
-    if (first.defined() && rest.defined()) {
-        return Block::make(first, rest);
-    } else if (first.defined()) {
-        return first;
-    } else {
-        return rest;
-    }
-}
-
-// Find expressions that we can evaluate with interpolation hardware in the GPU
-//
-// This visitor keeps track of the "order" of the expression in terms of the
-// specified variables. The order value 0 means that the expression is contant;
-// order value 1 means that it is linear in terms of only one variable, check
-// the member found to determine which; order value 2 means non-linear, it
-// could be disqualified due to being quadratic, bilinear or the result of an
-// unknown function.
-class FindLinearExpressions : public IRMutator {
-protected:
-    using IRMutator::visit;
-
-    bool in_glsl_loops = false;
-
-    Expr tag_linear_expression(Expr e, const std::string &name = unique_name('a')) {
-
-        internal_assert(name.length() > 0);
-
-        if (total_found >= max_expressions) {
-            return e;
-        }
-
-        // Wrap the expression with an intrinsic to tag that it is a varying
-        // attribute. These tagged variables will be pulled out of the fragment
-        // shader during a subsequent pass
-        Expr intrinsic = Call::make(e.type(), Call::glsl_varying,
-                                    {name + ".varying", e},
-                                    Call::Intrinsic);
-        ++total_found;
-
-        return intrinsic;
-    }
-
-    Expr visit(const Call *op) override {
-        std::vector<Expr> new_args = op->args;
-
-        // Check to see if this call is a load
-        if (op->is_intrinsic(Call::glsl_texture_load)) {
-            // Check if the texture coordinate arguments are linear wrt the GPU
-            // loop variables
-            internal_assert(!loop_vars.empty()) << "No GPU loop variables found at texture load\n";
-
-            // Iterate over the texture coordinate arguments
-            for (int i = 2; i != 4; ++i) {
-                new_args[i] = mutate(op->args[i]);
-                if (order == 1) {
-                    new_args[i] = tag_linear_expression(new_args[i]);
-                }
-            }
-        } else if (op->is_intrinsic(Call::glsl_texture_store)) {
-            // Check if the value expression is linear wrt the loop variables
-            internal_assert(!loop_vars.empty()) << "No GPU loop variables found at texture store\n";
-
-            // The value is the 5th argument to the intrinsic
-            new_args[5] = mutate(new_args[5]);
-            if (order == 1) {
-                new_args[5] = tag_linear_expression(new_args[5]);
-            }
-        }
-
-        // The texture lookup itself is counted as a non-linear operation
-        order = 2;
-        return Call::make(op->type, op->name, new_args, op->call_type,
-                          op->func, op->value_index, op->image, op->param);
-    }
-
-    Expr visit(const Let *op) override {
-        Expr mutated_value = mutate(op->value);
-        int value_order = order;
-
-        ScopedBinding<int> bind(scope, op->name, order);
-
-        Expr mutated_body = mutate(op->body);
-
-        if ((value_order == 1) && (total_found < max_expressions)) {
-            // Wrap the let value with a varying tag
-            mutated_value = Call::make(mutated_value.type(), Call::glsl_varying,
-                                       {op->name + ".varying", mutated_value},
-                                       Call::Intrinsic);
-            ++total_found;
-        }
-
-        return Let::make(op->name, mutated_value, mutated_body);
-    }
-
-    Stmt visit(const For *op) override {
-        bool old_in_glsl_loops = in_glsl_loops;
-        bool kernel_loop = op->device_api == DeviceAPI::GLSL;
-        bool within_kernel_loop = !kernel_loop && in_glsl_loops;
-        // Check if the loop variable is a GPU variable thread variable and for GLSL
-        if (kernel_loop) {
-            loop_vars.push_back(op->name);
-            in_glsl_loops = true;
-        } else if (within_kernel_loop) {
-            // The inner loop variable is non-linear w.r.t the glsl pixel coordinate.
-            scope.push(op->name, 2);
-        }
-
-        Stmt mutated_body = mutate(op->body);
-
-        if (kernel_loop) {
-            loop_vars.pop_back();
-        } else if (within_kernel_loop) {
-            scope.pop(op->name);
-        }
-
-        in_glsl_loops = old_in_glsl_loops;
-
-        if (mutated_body.same_as(op->body)) {
-            return op;
-        } else {
-            return For::make(op->name, op->min, op->extent, op->for_type, op->device_api, mutated_body);
-        }
-    }
-
-    Expr visit(const Variable *op) override {
-        if (std::find(loop_vars.begin(), loop_vars.end(), op->name) != loop_vars.end()) {
-            order = 1;
-        } else if (scope.contains(op->name)) {
-            order = scope.get(op->name);
-        } else {
-            // If the variable is not found in scope, then we assume it is
-            // constant in terms of the independent variables.
-            order = 0;
-        }
-        return op;
-    }
-
-    Expr visit(const IntImm *op) override {
-        order = 0;
-        return op;
-    }
-    Expr visit(const UIntImm *op) override {
-        order = 0;
-        return op;
-    }
-    Expr visit(const FloatImm *op) override {
-        order = 0;
-        return op;
-    }
-    Expr visit(const StringImm *op) override {
-        order = 0;
-        return op;
-    }
-
-    Expr visit(const Cast *op) override {
-
-        Expr mutated_value = mutate(op->value);
-        int value_order = order;
-
-        // We can only interpolate float values, disqualify the expression if
-        // this is a cast to a different type
-        if (order && (!op->type.is_float())) {
-            order = 2;
-        }
-
-        if ((order > 1) && (value_order == 1)) {
-            mutated_value = tag_linear_expression(mutated_value);
-        }
-
-        return Cast::make(op->type, mutated_value);
-    }
-
-    // Add and subtract do not make the expression non-linear, if it is already
-    // linear or constant
-    template<typename T>
-    Expr visit_binary_linear(T *op) {
-        Expr a = mutate(op->a);
-        unsigned int order_a = order;
-        Expr b = mutate(op->b);
-        unsigned int order_b = order;
-
-        order = std::max(order_a, order_b);
-
-        // If the whole expression is greater than linear, check to see if
-        // either argument is linear and if so, add it to a candidate list
-        if ((order > 1) && (order_a == 1)) {
-            a = tag_linear_expression(a);
-        }
-        if ((order > 1) && (order_b == 1)) {
-            b = tag_linear_expression(b);
-        }
-
-        return T::make(a, b);
-    }
-
-    Expr visit(const Add *op) override {
-        return visit_binary_linear(op);
-    }
-    Expr visit(const Sub *op) override {
-        return visit_binary_linear(op);
-    }
-
-    // Multiplying increases the order of the expression, possibly making it
-    // non-linear
-    Expr visit(const Mul *op) override {
-        Expr a = mutate(op->a);
-        unsigned int order_a = order;
-        Expr b = mutate(op->b);
-        unsigned int order_b = order;
-
-        order = order_a + order_b;
-
-        // If the whole expression is greater than linear, check to see if
-        // either argument is linear and if so, add it to a candidate list
-        if ((order > 1) && (order_a == 1)) {
-            a = tag_linear_expression(a);
-        }
-        if ((order > 1) && (order_b == 1)) {
-            b = tag_linear_expression(b);
-        }
-
-        return Mul::make(a, b);
-    }
-
-    // Dividing is either multiplying by a constant, or makes the result
-    // non-linear (i.e. order -1)
-    Expr visit(const Div *op) override {
-        Expr a = mutate(op->a);
-        unsigned int order_a = order;
-        Expr b = mutate(op->b);
-        unsigned int order_b = order;
-
-        if (order_a && !order_b) {
-            // Case: x / c
-            order = order_a;
-        } else if (!order_a && order_b) {
-            // Case: c / x
-            order = 2;
-        } else {
-            order = order_a + order_b;
-        }
-
-        if ((order > 1) && (order_a == 1)) {
-            a = tag_linear_expression(a);
-        }
-        if ((order > 1) && (order_b == 1)) {
-            b = tag_linear_expression(b);
-        }
-
-        return Div::make(a, b);
-    }
-
-    // For other binary operators, if either argument is non-constant, then the
-    // whole expression is non-linear
-    template<typename T>
-    Expr visit_binary(T *op) {
-
-        Expr a = mutate(op->a);
-        unsigned int order_a = order;
-        Expr b = mutate(op->b);
-        unsigned int order_b = order;
-
-        if (order_a || order_b) {
-            order = 2;
-        }
-
-        if ((order > 1) && (order_a == 1)) {
-            a = tag_linear_expression(a);
-        }
-        if ((order > 1) && (order_b == 1)) {
-            b = tag_linear_expression(b);
-        }
-
-        return T::make(a, b);
-    }
-
-    Expr visit(const Mod *op) override {
-        return visit_binary(op);
-    }
-
-    // Break the expression into a piecewise function, if the expressions are
-    // linear, we treat the piecewise behavior specially during codegen
-
-    // Once this is done, Min and Max should call visit_binary_linear and the code
-    // in setup_mesh will handle piecewise linear behavior introduced by these
-    // expressions
-    Expr visit(const Min *op) override {
-        return visit_binary(op);
-    }
-    Expr visit(const Max *op) override {
-        return visit_binary(op);
-    }
-
-    Expr visit(const EQ *op) override {
-        return visit_binary(op);
-    }
-    Expr visit(const NE *op) override {
-        return visit_binary(op);
-    }
-    Expr visit(const LT *op) override {
-        return visit_binary(op);
-    }
-    Expr visit(const LE *op) override {
-        return visit_binary(op);
-    }
-    Expr visit(const GT *op) override {
-        return visit_binary(op);
-    }
-    Expr visit(const GE *op) override {
-        return visit_binary(op);
-    }
-    Expr visit(const And *op) override {
-        return visit_binary(op);
-    }
-    Expr visit(const Or *op) override {
-        return visit_binary(op);
-    }
-
-    Expr visit(const Not *op) override {
-        Expr a = mutate(op->a);
-        unsigned int order_a = order;
-
-        if (order_a) {
-            order = 2;
-        }
-
-        return Not::make(a);
-    }
-
-    Expr visit(const Broadcast *op) override {
-        Expr a = mutate(op->value);
-
-        if (order == 1) {
-            a = tag_linear_expression(a);
-        }
-
-        if (order) {
-            order = 2;
-        }
-
-        return Broadcast::make(a, op->lanes);
-    }
-
-    Expr visit(const Select *op) override {
-
-        // If either the true expression or the false expression is non-linear
-        // in terms of the loop variables, then the select expression might
-        // evaluate to a non-linear expression and is disqualified.
-
-        // If both are either linear or constant, and the condition expression
-        // is constant with respect to the loop variables, then either the true
-        // or false expression will be evaluated across the whole loop domain,
-        // and the select expression is linear. Otherwise, the expression is
-        // disqualified.
-
-        // The condition expression must be constant (order == 0) with respect
-        // to the loop variables.
-        Expr mutated_condition = mutate(op->condition);
-        int condition_order = (order != 0) ? 2 : 0;
-
-        Expr mutated_true_value = mutate(op->true_value);
-        int true_value_order = order;
-
-        Expr mutated_false_value = mutate(op->false_value);
-        int false_value_order = order;
-
-        order = std::max(std::max(condition_order, true_value_order), false_value_order);
-
-        if ((order > 1) && (condition_order == 1)) {
-            mutated_condition = tag_linear_expression(mutated_condition);
-        }
-        if ((order > 1) && (true_value_order == 1)) {
-            mutated_true_value = tag_linear_expression(mutated_true_value);
-        }
-        if ((order > 1) && (false_value_order == 1)) {
-            mutated_false_value = tag_linear_expression(mutated_false_value);
-        }
-
-        return Select::make(mutated_condition, mutated_true_value, mutated_false_value);
-    }
-
-public:
-    std::vector<std::string> loop_vars;
-
-    Scope<int> scope;
-
-    unsigned int order;
-    bool found;
-
-    unsigned int total_found = 0;
-
-    // This parameter controls the maximum number of linearly varying
-    // expressions halide will pull out of the fragment shader and evaluate per
-    // vertex, and allow the GPU to linearly interpolate across the domain. For
-    // OpenGL ES 2.0 we can pass 16 vec4 varying attributes, or 64 scalars. Two
-    // scalar slots are used by boilerplate code to pass pixel coordinates.
-    const unsigned int max_expressions = 62;
-
-    FindLinearExpressions() = default;
-};
-
-}  // namespace
-
-Stmt find_linear_expressions(const Stmt &s) {
-
-    return FindLinearExpressions().mutate(s);
-}
-
-namespace {
-
-// This visitor produces a map containing name and expression pairs from varying
-// tagged intrinsics
-class FindVaryingAttributeTags : public IRVisitor {
-public:
-    FindVaryingAttributeTags(std::map<std::string, Expr> &varyings_)
-        : varyings(varyings_) {
-    }
-
-    using IRVisitor::visit;
-
-    void visit(const Call *op) override {
-        if (op->is_intrinsic(Call::glsl_varying)) {
-            std::string name = op->args[0].as<StringImm>()->value;
-            varyings[name] = op->args[1];
-        }
-        IRVisitor::visit(op);
-    }
-
-    std::map<std::string, Expr> &varyings;
-};
-
-// This visitor removes glsl_varying intrinsics.
-class RemoveVaryingAttributeTags : public IRMutator {
-public:
-    using IRMutator::visit;
-
-    Expr visit(const Call *op) override {
-        if (op->is_intrinsic(Call::glsl_varying)) {
-            // Replace the call expression with its wrapped argument expression
-            return op->args[1];
-        } else {
-            return IRMutator::visit(op);
-        }
-    }
-};
-
-}  // namespace
-
-Stmt remove_varying_attributes(const Stmt &s) {
-    return RemoveVaryingAttributeTags().mutate(s);
-}
-
-namespace {
-
-// This visitor removes glsl_varying intrinsics and replaces them with
-// variables. After this visitor is called, the varying attribute expressions
-// will no longer appear in the IR tree, only variables with the .varying tag
-// will remain.
-class ReplaceVaryingAttributeTags : public IRMutator {
-public:
-    using IRMutator::visit;
-
-    Expr visit(const Call *op) override {
-        if (op->is_intrinsic(Call::glsl_varying)) {
-            // Replace the intrinsic tag wrapper with a variable the variable
-            // name ends with the tag ".varying"
-            std::string name = op->args[0].as<StringImm>()->value;
-
-            internal_assert(ends_with(name, ".varying"));
-
-            return Variable::make(op->type, name);
-        } else {
-            return IRMutator::visit(op);
-        }
-    }
-};
-
-}  // namespace
-
-Stmt replace_varying_attributes(const Stmt &s) {
-    return ReplaceVaryingAttributeTags().mutate(s);
-}
-
-namespace {
-
-// This visitor produces a set of variable names that are tagged with
-// ".varying".
-class FindVaryingAttributeVars : public IRVisitor {
-public:
-    using IRVisitor::visit;
-
-    void visit(const Variable *op) override {
-        if (ends_with(op->name, ".varying")) {
-            variables.insert(op->name);
-        }
-    }
-
-    std::set<std::string> variables;
-};
-
-}  // namespace
-
-// Remove varying attributes from the varying's map if they do not appear in the
-// loop_stmt because they were simplified away.
-void prune_varying_attributes(const Stmt &loop_stmt, std::map<std::string, Expr> &varying) {
-    FindVaryingAttributeVars find;
-    loop_stmt.accept(&find);
-
-    std::vector<std::string> remove_list;
-
-    for (const std::pair<const std::string, Expr> &i : varying) {
-        const std::string &name = i.first;
-        if (find.variables.find(name) == find.variables.end()) {
-            debug(2) << "Removed varying attribute " << name << "\n";
-            remove_list.push_back(name);
-        }
-    }
-
-    for (const std::string &i : remove_list) {
-        varying.erase(i);
-    }
-}
-
-namespace {
-
-// This visitor changes the type of variables tagged with .varying to float,
-// since GLSL will only interpolate floats. In the case that the type of the
-// varying attribute was integer, the interpolated float value is snapped to the
-// integer grid and cast to the integer type. This case occurs with coordinate
-// expressions where the integer loop variables are manipulated without being
-// converted to floating point. In other cases, like an affine transformation of
-// image coordinates, the loop variables are cast to floating point within the
-// interpolated expression.
-class CastVaryingVariables : public IRMutator {
-protected:
-    using IRMutator::visit;
-
-    Expr visit(const Variable *op) override {
-        if ((ends_with(op->name, ".varying")) && (op->type != Float(32))) {
-            // The incoming variable will be float type because GLSL only
-            // interpolates floats
-            Expr v = Variable::make(Float(32), op->name);
-
-            // If the varying attribute expression that this variable replaced
-            // was integer type, snap the interpolated floating point variable
-            // back to the integer grid.
-            return Cast::make(op->type, floor(v + 0.5f));
-        } else {
-            // Otherwise, the variable keeps its float type.
-            return op;
-        }
-    }
-};
-
-// This visitor casts the named variables to float, and then propagates the
-// float type through the expression. The variable is offset by 0.5f
-class CastVariablesToFloatAndOffset : public IRMutator {
-protected:
-    using IRMutator::visit;
-
-    Expr visit(const Variable *op) override {
-
-        // Check to see if the variable matches a loop variable name
-        if (std::find(names.begin(), names.end(), op->name) != names.end()) {
-            // This case is used by integer type loop variables. They are cast
-            // to float and offset.
-            return Expr(op) - 0.5f;
-
-        } else if (scope.contains(op->name) && (op->type != scope.get(op->name).type())) {
-            // Otherwise, check to see if it is defined by a modified let
-            // expression and if so, change the type of the variable to match
-            // the modified expression
-            return Variable::make(scope.get(op->name).type(), op->name);
-        } else {
-            return op;
-        }
-    }
-
-    Type float_type(const Expr &e) {
-        return Float(e.type().bits(), e.type().lanes());
-    }
-
-    template<typename T>
-    Expr visit_binary_op(const T *op) {
-        Expr mutated_a = mutate(op->a);
-        Expr mutated_b = mutate(op->b);
-
-        bool a_float = mutated_a.type().is_float();
-        bool b_float = mutated_b.type().is_float();
-
-        // If either argument is a float, then make sure both are float
-        if (a_float || b_float) {
-            if (!a_float) {
-                mutated_a = Cast::make(float_type(op->b), mutated_a);
-            }
-            if (!b_float) {
-                mutated_b = Cast::make(float_type(op->a), mutated_b);
-            }
-        }
-
-        return T::make(mutated_a, mutated_b);
-    }
-
-    Expr visit(const Add *op) override {
-        return visit_binary_op(op);
-    }
-    Expr visit(const Sub *op) override {
-        return visit_binary_op(op);
-    }
-    Expr visit(const Mul *op) override {
-        return visit_binary_op(op);
-    }
-    Expr visit(const Div *op) override {
-        return visit_binary_op(op);
-    }
-    Expr visit(const Mod *op) override {
-        return visit_binary_op(op);
-    }
-    Expr visit(const Min *op) override {
-        return visit_binary_op(op);
-    }
-    Expr visit(const Max *op) override {
-        return visit_binary_op(op);
-    }
-    Expr visit(const EQ *op) override {
-        return visit_binary_op(op);
-    }
-    Expr visit(const NE *op) override {
-        return visit_binary_op(op);
-    }
-    Expr visit(const LT *op) override {
-        return visit_binary_op(op);
-    }
-    Expr visit(const LE *op) override {
-        return visit_binary_op(op);
-    }
-    Expr visit(const GT *op) override {
-        return visit_binary_op(op);
-    }
-    Expr visit(const GE *op) override {
-        return visit_binary_op(op);
-    }
-    Expr visit(const And *op) override {
-        return visit_binary_op(op);
-    }
-    Expr visit(const Or *op) override {
-        return visit_binary_op(op);
-    }
-
-    Expr visit(const Select *op) override {
-        Expr mutated_condition = mutate(op->condition);
-        Expr mutated_true_value = mutate(op->true_value);
-        Expr mutated_false_value = mutate(op->false_value);
-
-        bool t_float = mutated_true_value.type().is_float();
-        bool f_float = mutated_false_value.type().is_float();
-
-        // If either argument is a float, then make sure both are float
-        if (t_float || f_float) {
-            if (!t_float) {
-                mutated_true_value = Cast::make(float_type(op->true_value), mutated_true_value);
-            }
-            if (!f_float) {
-                mutated_false_value = Cast::make(float_type(op->false_value), mutated_false_value);
-            }
-        }
-
-        return Select::make(mutated_condition, mutated_true_value, mutated_false_value);
-    }
-
-    Expr visit(const Ramp *op) override {
-        Expr mutated_base = mutate(op->base);
-        Expr mutated_stride = mutate(op->stride);
-
-        // If either base or stride is a float, then make sure both are float
-        bool base_float = mutated_base.type().is_float();
-        bool stride_float = mutated_stride.type().is_float();
-        if (!base_float && stride_float) {
-            mutated_base = Cast::make(float_type(op->base), mutated_base);
-        } else if (base_float && !stride_float) {
-            mutated_stride = Cast::make(float_type(op->stride), mutated_stride);
-        }
-
-        if (mutated_base.same_as(op->base) && mutated_stride.same_as(op->stride)) {
-            return op;
-        } else {
-            return Ramp::make(mutated_base, mutated_stride, op->lanes);
-        }
-    }
-
-    Expr visit(const Let *op) override {
-        Expr mutated_value = mutate(op->value);
-
-        bool changed = op->value.type().is_float() != mutated_value.type().is_float();
-        if (changed) {
-            scope.push(op->name, mutated_value);
-        }
-
-        Expr mutated_body = mutate(op->body);
-
-        if (changed) {
-            scope.pop(op->name);
-        }
-
-        return Let::make(op->name, mutated_value, mutated_body);
-    }
-    Stmt visit(const LetStmt *op) override {
-
-        Expr mutated_value = mutate(op->value);
-
-        bool changed = op->value.type().is_float() != mutated_value.type().is_float();
-        if (changed) {
-            scope.push(op->name, mutated_value);
-        }
-
-        Stmt mutated_body = mutate(op->body);
-
-        if (changed) {
-            scope.pop(op->name);
-        }
-
-        return LetStmt::make(op->name, mutated_value, mutated_body);
-    }
-
-public:
-    CastVariablesToFloatAndOffset(const std::vector<std::string> &names_)
-        : names(names_) {
-    }
-
-    const std::vector<std::string> &names;
-    Scope<Expr> scope;
-};
-
-// This is the base class for a special mutator that, by default, turns an IR
-// tree into a tree of Stmts. Derived classes overload visit methods to filter
-// out specific expressions which are placed in Evaluate nodes within the new
-// tree.  This functionality is used by GLSL varying attributes to transform
-// tagged linear expressions into Store nodes for the vertex buffer. The
-// IRFilter allows these expressions to be filtered out while maintaining the
-// existing structure of Let variable scopes around them.
-//
-// TODO: could this be made to use the IRMutator pattern instead?
-class IRFilter : public IRVisitor {
-public:
-    virtual Stmt mutate(const Expr &e);
-    virtual Stmt mutate(const Stmt &s);
-
-protected:
-    using IRVisitor::visit;
-
-    Stmt stmt;
-
-    void visit(const IntImm *) override;
-    void visit(const FloatImm *) override;
-    void visit(const StringImm *) override;
-    void visit(const Cast *) override;
-    void visit(const Variable *) override;
-    void visit(const Add *) override;
-    void visit(const Sub *) override;
-    void visit(const Mul *) override;
-    void visit(const Div *) override;
-    void visit(const Mod *) override;
-    void visit(const Min *) override;
-    void visit(const Max *) override;
-    void visit(const EQ *) override;
-    void visit(const NE *) override;
-    void visit(const LT *) override;
-    void visit(const LE *) override;
-    void visit(const GT *) override;
-    void visit(const GE *) override;
-    void visit(const And *) override;
-    void visit(const Or *) override;
-    void visit(const Not *) override;
-    void visit(const Select *) override;
-    void visit(const Load *) override;
-    void visit(const Ramp *) override;
-    void visit(const Broadcast *) override;
-    void visit(const Call *) override;
-    void visit(const Let *) override;
-    void visit(const LetStmt *) override;
-    void visit(const AssertStmt *) override;
-    void visit(const ProducerConsumer *) override;
-    void visit(const For *) override;
-    void visit(const Store *) override;
-    void visit(const Provide *) override;
-    void visit(const Allocate *) override;
-    void visit(const Free *) override;
-    void visit(const Realize *) override;
-    void visit(const Block *) override;
-    void visit(const IfThenElse *) override;
-    void visit(const Evaluate *) override;
-};
-
-Stmt IRFilter::mutate(const Expr &e) {
-    if (e.defined()) {
-        e.accept(this);
-    } else {
-        stmt = Stmt();
-    }
-    return stmt;
-}
-
-Stmt IRFilter::mutate(const Stmt &s) {
-    if (s.defined()) {
-        s.accept(this);
-    } else {
-        stmt = Stmt();
-    }
-    return stmt;
-}
-
-template<typename T, typename A>
-void mutate_operator(IRFilter *mutator, const T *op, const A op_a, Stmt *stmt) {
-    Stmt a = mutator->mutate(op_a);
-    *stmt = a;
-}
-template<typename T, typename A, typename B>
-void mutate_operator(IRFilter *mutator, const T *op, const A op_a, const B op_b, Stmt *stmt) {
-    Stmt a = mutator->mutate(op_a);
-    Stmt b = mutator->mutate(op_b);
-    *stmt = make_block(a, b);
-}
-template<typename T, typename A, typename B, typename C>
-void mutate_operator(IRFilter *mutator, const T *op, const A op_a, const B op_b, const C op_c, Stmt *stmt) {
-    Stmt a = mutator->mutate(op_a);
-    Stmt b = mutator->mutate(op_b);
-    Stmt c = mutator->mutate(op_c);
-    *stmt = make_block(make_block(a, b), c);
-}
-
-void IRFilter::visit(const IntImm *op) {
-    stmt = Stmt();
-}
-void IRFilter::visit(const FloatImm *op) {
-    stmt = Stmt();
-}
-void IRFilter::visit(const StringImm *op) {
-    stmt = Stmt();
-}
-void IRFilter::visit(const Variable *op) {
-    stmt = Stmt();
-}
-
-void IRFilter::visit(const Cast *op) {
-    mutate_operator(this, op, op->value, &stmt);
-}
-
-void IRFilter::visit(const Add *op) {
-    mutate_operator(this, op, op->a, op->b, &stmt);
-}
-void IRFilter::visit(const Sub *op) {
-    mutate_operator(this, op, op->a, op->b, &stmt);
-}
-void IRFilter::visit(const Mul *op) {
-    mutate_operator(this, op, op->a, op->b, &stmt);
-}
-void IRFilter::visit(const Div *op) {
-    mutate_operator(this, op, op->a, op->b, &stmt);
-}
-void IRFilter::visit(const Mod *op) {
-    mutate_operator(this, op, op->a, op->b, &stmt);
-}
-void IRFilter::visit(const Min *op) {
-    mutate_operator(this, op, op->a, op->b, &stmt);
-}
-void IRFilter::visit(const Max *op) {
-    mutate_operator(this, op, op->a, op->b, &stmt);
-}
-void IRFilter::visit(const EQ *op) {
-    mutate_operator(this, op, op->a, op->b, &stmt);
-}
-void IRFilter::visit(const NE *op) {
-    mutate_operator(this, op, op->a, op->b, &stmt);
-}
-void IRFilter::visit(const LT *op) {
-    mutate_operator(this, op, op->a, op->b, &stmt);
-}
-void IRFilter::visit(const LE *op) {
-    mutate_operator(this, op, op->a, op->b, &stmt);
-}
-void IRFilter::visit(const GT *op) {
-    mutate_operator(this, op, op->a, op->b, &stmt);
-}
-void IRFilter::visit(const GE *op) {
-    mutate_operator(this, op, op->a, op->b, &stmt);
-}
-void IRFilter::visit(const And *op) {
-    mutate_operator(this, op, op->a, op->b, &stmt);
-}
-void IRFilter::visit(const Or *op) {
-    mutate_operator(this, op, op->a, op->b, &stmt);
-}
-
-void IRFilter::visit(const Not *op) {
-    mutate_operator(this, op, op->a, &stmt);
-}
-
-void IRFilter::visit(const Select *op) {
-    mutate_operator(this, op, op->condition, op->true_value, op->false_value, &stmt);
-}
-
-void IRFilter::visit(const Load *op) {
-    mutate_operator(this, op, op->predicate, op->index, &stmt);
-}
-
-void IRFilter::visit(const Ramp *op) {
-    mutate_operator(this, op, op->base, op->stride, &stmt);
-}
-
-void IRFilter::visit(const Broadcast *op) {
-    mutate_operator(this, op, op->value, &stmt);
-}
-
-void IRFilter::visit(const Call *op) {
-    std::vector<Stmt> new_args(op->args.size());
-
-    // Mutate the args
-    for (size_t i = 0; i < op->args.size(); i++) {
-        Expr old_arg = op->args[i];
-        Stmt new_arg = mutate(old_arg);
-        new_args[i] = new_arg;
-    }
-
-    stmt = Stmt();
-    for (size_t i = 0; i < new_args.size(); ++i) {
-        if (new_args[i].defined()) {
-            stmt = make_block(new_args[i], stmt);
-        }
-    }
-}
-
-void IRFilter::visit(const Let *op) {
-    mutate_operator(this, op, op->value, op->body, &stmt);
-}
-
-void IRFilter::visit(const LetStmt *op) {
-    mutate_operator(this, op, op->value, op->body, &stmt);
-}
-
-void IRFilter::visit(const AssertStmt *op) {
-    mutate_operator(this, op, op->condition, op->message, &stmt);
-}
-
-void IRFilter::visit(const ProducerConsumer *op) {
-    mutate_operator(this, op, op->body, &stmt);
-}
-
-void IRFilter::visit(const For *op) {
-    mutate_operator(this, op, op->min, op->extent, op->body, &stmt);
-}
-
-void IRFilter::visit(const Store *op) {
-    mutate_operator(this, op, op->predicate, op->value, op->index, &stmt);
-}
-
-void IRFilter::visit(const Provide *op) {
-    stmt = Stmt();
-    for (size_t i = 0; i < op->args.size(); i++) {
-        Stmt new_arg = mutate(op->args[i]);
-        if (new_arg.defined()) {
-            stmt = make_block(new_arg, stmt);
-        }
-        Stmt new_value = mutate(op->values[i]);
-        if (new_value.defined()) {
-            stmt = make_block(new_value, stmt);
-        }
-    }
-}
-
-void IRFilter::visit(const Allocate *op) {
-    stmt = Stmt();
-    for (size_t i = 0; i < op->extents.size(); i++) {
-        Stmt new_extent = mutate(op->extents[i]);
-        if (new_extent.defined()) {
-            stmt = make_block(new_extent, stmt);
-        }
-    }
-
-    Stmt body = mutate(op->body);
-    if (body.defined()) {
-        stmt = make_block(body, stmt);
-    }
-
-    Stmt condition = mutate(op->condition);
-    if (condition.defined()) {
-        stmt = make_block(condition, stmt);
-    }
-}
-
-void IRFilter::visit(const Free *op) {
-}
-
-void IRFilter::visit(const Realize *op) {
-    stmt = Stmt();
-
-    // Mutate the bounds
-    for (size_t i = 0; i < op->bounds.size(); i++) {
-        Expr old_min = op->bounds[i].min;
-        Expr old_extent = op->bounds[i].extent;
-        Stmt new_min = mutate(old_min);
-        Stmt new_extent = mutate(old_extent);
-
-        if (new_min.defined()) {
-            stmt = make_block(new_min, stmt);
-        }
-        if (new_extent.defined()) {
-            stmt = make_block(new_extent, stmt);
-        }
-    }
-
-    Stmt body = mutate(op->body);
-    if (body.defined()) {
-        stmt = make_block(body, stmt);
-    }
-
-    Stmt condition = mutate(op->condition);
-    if (condition.defined()) {
-        stmt = make_block(condition, stmt);
-    }
-}
-
-void IRFilter::visit(const Block *op) {
-    mutate_operator(this, op, op->first, op->rest, &stmt);
-}
-
-void IRFilter::visit(const IfThenElse *op) {
-    mutate_operator(this, op, op->condition, op->then_case, op->else_case, &stmt);
-}
-
-void IRFilter::visit(const Evaluate *op) {
-    mutate_operator(this, op, op->value, &stmt);
-}
-
-// This visitor takes a IR tree containing a set of .glsl scheduled for-loops
-// and creates a matching set of serial for-loops to setup a vertex buffer on
-// the  host. The visitor  filters out glsl_varying intrinsics and transforms
-// them into Store nodes to evaluate the linear expressions they tag within the
-// scope of all of the Let definitions they fall within.
-// The statement returned by this operation should be executed on the host
-// before the call to halide_dev_run.
-class CreateVertexBufferOnHost : public IRFilter {
-public:
-    using IRFilter::visit;
-
-    void visit(const Call *op) override {
-
-        // Transform glsl_varying intrinsics into store operations to output the
-        // vertex coordinate values.
-        if (op->is_intrinsic(Call::glsl_varying)) {
-
-            // Construct an expression for the offset of the coordinate value in
-            // terms of the current integer loop variables and the varying
-            // attribute channel number
-            std::string attribute_name = op->args[0].as<StringImm>()->value;
-
-            Expr offset_expression = Variable::make(Int(32), "gpu.vertex_offset") +
-                                     attribute_order[attribute_name];
-
-            stmt = Store::make(vertex_buffer_name, op->args[1], offset_expression,
-                               Parameter(), const_true(op->args[1].type().lanes()), ModulusRemainder());
-        } else {
-            IRFilter::visit(op);
-        }
-    }
-
-    void visit(const Let *op) override {
-        stmt = nullptr;
-
-        Stmt mutated_value = mutate(op->value);
-        Stmt mutated_body = mutate(op->body);
-
-        // If an operation was filtered out of the body, also filter out the
-        // whole let expression so that the body may be evaluated completely. In
-        // the case that the let variable is not used in the mutated body, it
-        // will be removed by simplification.
-        if (mutated_body.defined()) {
-            stmt = LetStmt::make(op->name, op->value, mutated_body);
-        }
-
-        // If an operation with a side effect was filtered out of the value, the
-        // stmt'ified value is placed in a Block, so that the side effect will
-        // be included in filtered IR tree.
-        if (mutated_value.defined()) {
-            stmt = make_block(mutated_value, stmt);
-        }
-    }
-
-    void visit(const LetStmt *op) override {
-        stmt = Stmt();
-
-        Stmt mutated_value = mutate(op->value);
-        Stmt mutated_body = mutate(op->body);
-
-        if (mutated_body.defined()) {
-            stmt = LetStmt::make(op->name, op->value, mutated_body);
-        }
-
-        if (mutated_value.defined()) {
-            stmt = make_block(mutated_value, stmt);
-        }
-    }
-
-    void visit(const For *op) override {
-        if (CodeGen_GPU_Dev::is_gpu_var(op->name) && op->device_api == DeviceAPI::GLSL) {
-            // Create a for-loop of integers iterating over the coordinates in
-            // this dimension
-
-            std::string name = op->name + ".idx";
-            const std::vector<Expr> &dim = dims[op->name];
-
-            internal_assert(for_loops.size() <= 1);
-            for_loops.push_back(op);
-
-            Expr loop_variable = Variable::make(Int(32), name);
-            loop_variables.push_back(loop_variable);
-
-            // TODO: When support for piecewise linear expressions is added this
-            // expression must support more than two coordinates in each
-            // dimension.
-            Expr coord_expr = select(loop_variable == 0, dim[0], dim[1]);
-
-            // Visit the body of the for-loop
-            Stmt mutated_body = mutate(op->body);
-
-            // If this was the inner most for-loop of the .glsl scheduled pair,
-            // add a let definition for the vertex index and Store the spatial
-            // coordinates
-            const For *nested_for = op->body.as<For>();
-            if (!(nested_for && CodeGen_GPU_Dev::is_gpu_var(nested_for->name))) {
-
-                // Create a variable to store the offset in floats of this
-                // vertex
-                Expr gpu_varying_offset = Variable::make(Int(32), "gpu.vertex_offset");
-
-                // Add expressions for the x and y vertex coordinates.
-                Expr coord1 = cast<float>(Variable::make(Int(32), for_loops[0]->name));
-                Expr coord0 = cast<float>(Variable::make(Int(32), for_loops[1]->name));
-
-                // Transform the vertex coordinates to GPU device coordinates on
-                // [-1,1]
-                coord1 = (coord1 / for_loops[0]->extent) * 2.0f - 1.0f;
-                coord0 = (coord0 / for_loops[1]->extent) * 2.0f - 1.0f;
-
-                // Remove varying attribute intrinsics from the vertex setup IR
-                // tree.
-                mutated_body = remove_varying_attributes(mutated_body);
-
-                // The GPU will take texture coordinates at pixel centers during
-                // interpolation, we offset the Halide integer grid by 0.5 so that
-                // these coordinates line up on integer coordinate values.
-                std::vector<std::string> names = {for_loops[0]->name, for_loops[1]->name};
-                CastVariablesToFloatAndOffset cast_and_offset(names);
-                mutated_body = cast_and_offset.mutate(mutated_body);
-
-                // Store the coordinates into the vertex buffer in interleaved
-                // order
-                mutated_body = make_block(Store::make(vertex_buffer_name,
-                                                      coord1,
-                                                      gpu_varying_offset + 1,
-                                                      Parameter(), const_true(),
-                                                      ModulusRemainder()),
-                                          mutated_body);
-
-                mutated_body = make_block(Store::make(vertex_buffer_name,
-                                                      coord0,
-                                                      gpu_varying_offset + 0,
-                                                      Parameter(), const_true(),
-                                                      ModulusRemainder()),
-                                          mutated_body);
-
-                // TODO: The value 2 in this expression must be changed to reflect
-                // addition coordinate values in the fastest changing dimension when
-                // support for piecewise linear functions is added
-                Expr offset_expression = (loop_variables[0] * num_padded_attributes * 2) +
-                                         (loop_variables[1] * num_padded_attributes);
-                mutated_body = LetStmt::make("gpu.vertex_offset",
-                                             offset_expression, mutated_body);
-            }
-
-            // Add a let statement for the for-loop name variable
-            Stmt loop_var = LetStmt::make(op->name, coord_expr, mutated_body);
-
-            stmt = For::make(name, 0, (int)dim.size(), ForType::Serial, DeviceAPI::None, loop_var);
-
-        } else {
-            IRFilter::visit(op);
-        }
-    }
-
-    // The name of the previously allocated vertex buffer to store values
-    std::string vertex_buffer_name;
-
-    // Expressions for the spatial values of each coordinate in the GPU scheduled
-    // loop dimensions.
-    typedef std::map<std::string, std::vector<Expr>> DimsType;
-    DimsType dims;
-
-    // The channel of each varying attribute in the interleaved vertex buffer
-    std::map<std::string, int> attribute_order;
-
-    // The number of attributes padded up to the next multiple of four. This is
-    // the stride from one vertex to the next in the buffer
-    int num_padded_attributes;
-
-    // Independent variable names in the linear expressions
-    std::vector<const For *> for_loops;
-
-    // Loop variables iterated across per GPU scheduled loop dimension to
-    // construct the vertex buffer
-    std::vector<Expr> loop_variables;
-};
-
-// These two methods provide a workaround to maintain unused let statements in
-// the IR tree util calls are added that used them in codegen.
-
-// TODO: We want to define a set of variables during lowering, and then use
-// them during GLSL host codegen to pass values to the
-// halide_dev_run function. It turns out that these variables will
-// be simplified away since the call to the function does not appear
-// in the IR. To avoid this we wrap the declaration in a
-// return_second intrinsic as well as add a return_second intrinsic
-// to consume the value.
-// This prevents simplification passes that occur before codegen
-// from removing the variables or substituting in their constant
-// values.
-
-Expr dont_simplify(const Expr &v_) {
-    return Internal::Call::make(v_.type(),
-                                Internal::Call::return_second,
-                                {0, v_},
-                                Internal::Call::Intrinsic);
-}
-
-Stmt used_in_codegen(Type type_, const std::string &v_) {
-    return Evaluate::make(Internal::Call::make(Int(32),
-                                               Internal::Call::return_second,
-                                               {Variable::make(type_, v_), 0},
-                                               Internal::Call::Intrinsic));
-}
-
-// This mutator inserts a set of serial for-loops to create the vertex buffer
-// on the host using CreateVertexBufferOnHost above.
-class CreateVertexBufferHostLoops : public IRMutator {
-public:
-    using IRMutator::visit;
-
-    Stmt visit(const For *op) override {
-        if (CodeGen_GPU_Dev::is_gpu_var(op->name) && op->device_api == DeviceAPI::GLSL) {
-
-            const For *loop1 = op;
-            const For *loop0 = loop1->body.as<For>();
-
-            internal_assert(loop1->body.as<For>()) << "Did not find pair of nested For loops";
-
-            // Construct a mesh of expressions to instantiate during runtime
-            std::map<std::string, Expr> varyings;
-
-            FindVaryingAttributeTags tag_finder(varyings);
-            op->accept(&tag_finder);
-
-            // Establish and order for the attributes in each vertex
-            std::map<std::string, int> attribute_order;
-
-            // Add the attribute names to the mesh in the order that they appear in
-            // each vertex
-            attribute_order["__vertex_x"] = 0;
-            attribute_order["__vertex_y"] = 1;
-
-            int idx = 2;
-            for (const std::pair<const std::string, Expr> &v : varyings) {
-                attribute_order[v.first] = idx++;
-            }
-
-            // Construct a list of expressions giving to coordinate locations along
-            // each dimension, starting with the minimum and maximum coordinates
-
-            attribute_order[loop0->name] = 0;
-            attribute_order[loop1->name] = 1;
-
-            Expr loop0_max = Add::make(loop0->min, loop0->extent);
-            Expr loop1_max = Add::make(loop1->min, loop1->extent);
-
-            std::vector<std::vector<Expr>> coords(2);
-
-            coords[0].push_back(loop0->min);
-            coords[0].push_back(loop0_max);
-
-            coords[1].push_back(loop1->min);
-            coords[1].push_back(loop1_max);
-
-            // Count the two spatial x and y coordinates plus the number of
-            // varying attribute expressions found
-            int num_attributes = varyings.size() + 2;
-
-            // Pad the number of attributes up to a multiple of four
-            int num_padded_attributes = (num_attributes + 0x3) & ~0x3;
-            int vertex_buffer_size = num_padded_attributes * coords[0].size() * coords[1].size();
-
-            // Filter out varying attribute expressions from the glsl scheduled
-            // loops. The expressions are filtered out in situ, among the
-            // variables in scope
-            CreateVertexBufferOnHost vs;
-            vs.vertex_buffer_name = "glsl.vertex_buffer";
-            vs.num_padded_attributes = num_padded_attributes;
-            vs.dims[loop0->name] = coords[0];
-            vs.dims[loop1->name] = coords[1];
-            vs.attribute_order = attribute_order;
-
-            Stmt vertex_setup = vs.mutate(loop1);
-
-            // Remove varying attribute intrinsics from the vertex setup IR
-            // tree. These may occur if an expression such as a Let-value was
-            // filtered out without being mutated.
-            vertex_setup = remove_varying_attributes(vertex_setup);
-
-            // Simplify the new host code.  Workaround for #588
-            vertex_setup = simplify(vertex_setup);
-            vertex_setup = simplify(vertex_setup);
-            vertex_setup = simplify(vertex_setup);
-            vertex_setup = simplify(vertex_setup);
-
-            // Replace varying attribute intriniscs in the gpu scheduled loops
-            // with variables with ".varying" tagged names
-            Stmt loop_stmt = replace_varying_attributes(op);
-
-            // Simplify
-            loop_stmt = simplify(loop_stmt, true);
-
-            // It is possible that linear expressions we tagged in higher-level
-            // intrinsics were removed by simplification if they were only used in
-            // subsequent tagged linear expressions. Run a pass to check for
-            // these and remove them from the varying attribute list
-            prune_varying_attributes(loop_stmt, varyings);
-
-            // At this point the varying attribute expressions have been removed from
-            // loop_stmt- it only contains variables tagged with .varying
-
-            // The GPU will only interpolate floating point values so the varying
-            // attribute variables must be converted to floating point. If the
-            // original varying expression was integer, casts are inserts to
-            // snap the value back to the integer grid.
-            loop_stmt = CastVaryingVariables().mutate(loop_stmt);
-
-            // clang-format off
-            // Insert two new for-loops for vertex buffer generation on the host
-            // before the two GPU scheduled for-loops
-            return LetStmt::make("glsl.num_coords_dim0", dont_simplify((int)(coords[0].size())),
-                   LetStmt::make("glsl.num_coords_dim1", dont_simplify((int)(coords[1].size())),
-                   LetStmt::make("glsl.num_padded_attributes", dont_simplify(num_padded_attributes),
-                   Allocate::make(vs.vertex_buffer_name, Float(32), MemoryType::Auto, {vertex_buffer_size}, const_true(),
-                   Block::make(vertex_setup,
-                   Block::make(loop_stmt,
-                   Block::make(used_in_codegen(Int(32), "glsl.num_coords_dim0"),
-                   Block::make(used_in_codegen(Int(32), "glsl.num_coords_dim1"),
-                   Block::make(used_in_codegen(Int(32), "glsl.num_padded_attributes"),
-                   Free::make(vs.vertex_buffer_name))))))))));
-            // clang-format on
-        } else {
-            return IRMutator::visit(op);
-        }
-    }
-};
-
-}  // namespace
-
-Stmt setup_gpu_vertex_buffer(const Stmt &s) {
-    CreateVertexBufferHostLoops vb;
-    return vb.mutate(s);
-}
-
-}  // namespace Internal
-}  // namespace Halide
diff --git a/src/VaryingAttributes.h b/src/VaryingAttributes.h
deleted file mode 100644
index 55475471e1aa..000000000000
--- a/src/VaryingAttributes.h
+++ /dev/null
@@ -1,33 +0,0 @@
-#ifndef __HALIDE_VARYING_ATTRIBUTES__H
-#define __HALIDE_VARYING_ATTRIBUTES__H
-
-/** \file
- * This file contains functions that detect expressions in a GLSL scheduled
- * function that may be evaluated per vertex and interpolated across the domain
- * instead of being evaluated at each pixel location across the image.
- */
-
-#include "Expr.h"
-
-namespace Halide {
-namespace Internal {
-
-/** find_linear_expressions(Stmt s) identifies expressions that may be moved
- * out of the generated fragment shader into a varying attribute. These
- * expressions are tagged by wrapping them in a glsl_varying intrinsic
- */
-Stmt find_linear_expressions(const Stmt &s);
-
-/** Compute a set of 2D mesh coordinates based on the behavior of varying
- * attribute expressions contained within a GLSL scheduled for loop. This
- * method is called during lowering to extract varying attribute
- * expressions and generate code to evalue them at each mesh vertex
- * location. The operation is performed on the host before the draw call
- * to invoke the shader
- */
-Stmt setup_gpu_vertex_buffer(const Stmt &s);
-
-}  // namespace Internal
-}  // namespace Halide
-
-#endif
diff --git a/src/runtime/CMakeLists.txt b/src/runtime/CMakeLists.txt
index 1385e04ae13a..25214f37e68f 100644
--- a/src/runtime/CMakeLists.txt
+++ b/src/runtime/CMakeLists.txt
@@ -47,7 +47,6 @@ set(RUNTIME_CPP
     msan
     msan_stubs
     opencl
-    opengl
     opengl_egl_context
     opengl_glx_context
     openglcompute
@@ -132,7 +131,6 @@ set(RUNTIME_HEADER_FILES
     HalideRuntimeHexagonHost.h
     HalideRuntimeMetal.h
     HalideRuntimeOpenCL.h
-    HalideRuntimeOpenGL.h
     HalideRuntimeOpenGLCompute.h
     HalideRuntimeQurt.h
     )
diff --git a/src/runtime/HalideRuntime.h b/src/runtime/HalideRuntime.h
index e4b7dc24fd54..3a88b91a0611 100644
--- a/src/runtime/HalideRuntime.h
+++ b/src/runtime/HalideRuntime.h
@@ -1283,7 +1283,6 @@ typedef enum halide_target_feature_t {
     halide_target_feature_cl_doubles,   ///< Enable double support on OpenCL targets
     halide_target_feature_cl_atomic64,  ///< Enable 64-bit atomics operations on OpenCL targets
 
-    halide_target_feature_opengl,         ///< Enable the OpenGL runtime. NOTE: this feature is deprecated and will be removed in Halide 12.
     halide_target_feature_openglcompute,  ///< Enable OpenGL Compute runtime.
 
     halide_target_feature_user_context,  ///< Generated code takes a user_context pointer as first argument
diff --git a/src/runtime/HalideRuntimeOpenGL.h b/src/runtime/HalideRuntimeOpenGL.h
deleted file mode 100644
index 14bb8b57f945..000000000000
--- a/src/runtime/HalideRuntimeOpenGL.h
+++ /dev/null
@@ -1,105 +0,0 @@
-#ifndef HALIDE_HALIDERUNTIMEOPENGL_H
-#define HALIDE_HALIDERUNTIMEOPENGL_H
-
-// Don't include HalideRuntime.h if the contents of it were already pasted into a generated header above this one
-#ifndef HALIDE_HALIDERUNTIME_H
-
-#include "HalideRuntime.h"
-
-#endif
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/** \file
- *  Routines specific to the Halide OpenGL runtime.
- */
-
-#define HALIDE_RUNTIME_OPENGL
-
-HALIDE_ATTRIBUTE_DEPRECATED("OpenGL is deprecated in Halide 11 and will be removed in Halide 12")
-extern const struct halide_device_interface_t *halide_opengl_device_interface();
-
-/** These are forward declared here to allow clients to override the
- *  Halide Glsl runtime. Do not call them. */
-// @{
-HALIDE_ATTRIBUTE_DEPRECATED("OpenGL is deprecated in Halide 11 and will be removed in Halide 12")
-extern int halide_opengl_initialize_kernels(void *user_context, void **state_ptr,
-                                            const char *src, int size);
-
-HALIDE_ATTRIBUTE_DEPRECATED("OpenGL is deprecated in Halide 11 and will be removed in Halide 12")
-extern int halide_opengl_run(void *user_context,
-                             void *state_ptr,
-                             const char *entry_name,
-                             int blocksX, int blocksY, int blocksZ,
-                             int threadsX, int threadsY, int threadsZ,
-                             int shared_mem_bytes,
-                             size_t arg_sizes[],
-                             void *args[],
-                             int8_t is_buffer[],
-                             int num_attributes,
-                             float *vertex_buffer,
-                             int num_coords_dim0,
-                             int num_coords_dim1);
-// @}
-
-/** Set the underlying OpenGL texture for a buffer. The texture must
- * have an extent large enough to cover that specified by the
- * halide_buffer_t extent fields. The dev field of the halide_buffer_t
- * must be NULL when this routine is called. This call can fail due to
- * being passed an invalid texture. The device and host dirty bits are
- * left unmodified. */
-HALIDE_ATTRIBUTE_DEPRECATED("OpenGL is deprecated in Halide 11 and will be removed in Halide 12")
-extern int halide_opengl_wrap_texture(void *user_context, struct halide_buffer_t *buf, uint64_t texture_id);
-
-/** Set the underlying OpenGL texture for a buffer to refer to the
- * current render target (e.g., the frame buffer or an FBO). The
- * render target must have an extent large enough to cover that
- * specified by the halide_buffer_t extent fields. The dev field of
- * the halide_buffer_t must be NULL when this routine is called. This
- * call can fail due to running out of memory. The device and host
- * dirty bits are left unmodified. */
-HALIDE_ATTRIBUTE_DEPRECATED("OpenGL is deprecated in Halide 11 and will be removed in Halide 12")
-extern int halide_opengl_wrap_render_target(void *user_context, struct halide_buffer_t *buf);
-
-/** Disconnect this halide_buffer_t from the texture it was previously
- * wrapped around. Should only be called for a halide_buffer_t that
- * halide_opengl_wrap_texture was previously called on. Frees any
- * storage associated with the binding of the halide_buffer_t and the
- * device pointer, but does not free the texture.  The dev field of
- * the halide_buffer_t will be NULL on return.
- */
-HALIDE_ATTRIBUTE_DEPRECATED("OpenGL is deprecated in Halide 11 and will be removed in Halide 12")
-extern int halide_opengl_detach_texture(void *user_context, struct halide_buffer_t *buf);
-
-/** Return the underlying texture for a halide_buffer_t. This buffer
- *  must be valid on an OpenGL device, or not have any associated
- *  device memory. If there is no device memory (dev field is NULL),
- *  or if the buffer was wrapped via
- *  halide_opengl_wrap_render_target(), this returns 0.
- */
-HALIDE_ATTRIBUTE_DEPRECATED("OpenGL is deprecated in Halide 11 and will be removed in Halide 12")
-extern uintptr_t halide_opengl_get_texture(void *user_context, struct halide_buffer_t *buf);
-
-/** Forget all state associated with the previous OpenGL context.  This is
- * similar to halide_opengl_release, except that we assume that all OpenGL
- * resources have already been reclaimed by the OS. */
-HALIDE_ATTRIBUTE_DEPRECATED("OpenGL is deprecated in Halide 11 and will be removed in Halide 12")
-extern void halide_opengl_context_lost(void *user_context);
-
-/** This functions MUST be provided by the host environment to retrieve pointers
- *  to OpenGL API functions. */
-HALIDE_ATTRIBUTE_DEPRECATED("OpenGL is deprecated in Halide 11 and will be removed in Halide 12")
-void *halide_opengl_get_proc_address(void *user_context, const char *name);
-
-/** This functions MUST be provided by the host environment to create an OpenGL
- *  context for use by the OpenGL backend. */
-HALIDE_ATTRIBUTE_DEPRECATED("OpenGL is deprecated in Halide 11 and will be removed in Halide 12")
-int halide_opengl_create_context(void *user_context);
-
-#ifdef __cplusplus
-}  // End extern "C"
-#endif
-
-#endif  // HALIDE_HALIDERUNTIMEOPENGL_H
diff --git a/src/runtime/opengl.cpp b/src/runtime/opengl.cpp
deleted file mode 100644
index 73964bfb64ee..000000000000
--- a/src/runtime/opengl.cpp
+++ /dev/null
@@ -1,2101 +0,0 @@
-// Ignore deprecation warnings inside our own runtime
-#define HALIDE_ALLOW_DEPRECATED 1
-
-#include "HalideRuntimeOpenGL.h"
-#include "device_interface.h"
-#include "mini_opengl.h"
-#include "printer.h"
-
-// This constant is used to indicate that the application will take
-// responsibility for binding the output render target before calling the
-// Halide function.
-#define HALIDE_OPENGL_RENDER_TARGET ((uint64_t)-1)
-
-// Implementation note: all function that directly or indirectly access the
-// runtime state in halide_opengl_state must be declared as WEAK, otherwise
-// the behavior at runtime is undefined.
-
-// List of all OpenGL functions used by the runtime. The list is used to
-// declare and initialize the dispatch table in OpenGLState below.
-#define USED_GL_FUNCTIONS                                                \
-    GLFUNC(PFNGLDELETETEXTURESPROC, DeleteTextures);                     \
-    GLFUNC(PFNGLGENTEXTURESPROC, GenTextures);                           \
-    GLFUNC(PFNGLBINDTEXTUREPROC, BindTexture);                           \
-    GLFUNC(PFNGLGETERRORPROC, GetError);                                 \
-    GLFUNC(PFNGLVIEWPORTPROC, Viewport);                                 \
-    GLFUNC(PFNGLGENBUFFERSPROC, GenBuffers);                             \
-    GLFUNC(PFNGLDELETEBUFFERSPROC, DeleteBuffers);                       \
-    GLFUNC(PFNGLBINDBUFFERPROC, BindBuffer);                             \
-    GLFUNC(PFNGLBUFFERDATAPROC, BufferData);                             \
-    GLFUNC(PFNGLTEXPARAMETERIPROC, TexParameteri);                       \
-    GLFUNC(PFNGLTEXIMAGE2DPROC, TexImage2D);                             \
-    GLFUNC(PFNGLTEXSUBIMAGE2DPROC, TexSubImage2D);                       \
-    GLFUNC(PFNGLDISABLEPROC, Disable);                                   \
-    GLFUNC(PFNGLDISABLEPROC, Enable);                                    \
-    GLFUNC(PFNGLCREATESHADERPROC, CreateShader);                         \
-    GLFUNC(PFNGLACTIVETEXTUREPROC, ActiveTexture);                       \
-    GLFUNC(PFNGLSHADERSOURCEPROC, ShaderSource);                         \
-    GLFUNC(PFNGLCOMPILESHADERPROC, CompileShader);                       \
-    GLFUNC(PFNGLGETSHADERIVPROC, GetShaderiv);                           \
-    GLFUNC(PFNGLGETSHADERINFOLOGPROC, GetShaderInfoLog);                 \
-    GLFUNC(PFNGLDELETESHADERPROC, DeleteShader);                         \
-    GLFUNC(PFNGLCREATEPROGRAMPROC, CreateProgram);                       \
-    GLFUNC(PFNGLATTACHSHADERPROC, AttachShader);                         \
-    GLFUNC(PFNGLLINKPROGRAMPROC, LinkProgram);                           \
-    GLFUNC(PFNGLGETPROGRAMIVPROC, GetProgramiv);                         \
-    GLFUNC(PFNGLGETPROGRAMINFOLOGPROC, GetProgramInfoLog);               \
-    GLFUNC(PFNGLUSEPROGRAMPROC, UseProgram);                             \
-    GLFUNC(PFNGLDELETEPROGRAMPROC, DeleteProgram);                       \
-    GLFUNC(PFNGLGETUNIFORMLOCATIONPROC, GetUniformLocation);             \
-    GLFUNC(PFNGLUNIFORM1IVPROC, Uniform1iv);                             \
-    GLFUNC(PFNGLUNIFORM2IVPROC, Uniform2iv);                             \
-    GLFUNC(PFNGLUNIFORM2IVPROC, Uniform4iv);                             \
-    GLFUNC(PFNGLUNIFORM1FVPROC, Uniform1fv);                             \
-    GLFUNC(PFNGLUNIFORM1FVPROC, Uniform4fv);                             \
-    GLFUNC(PFNGLGENFRAMEBUFFERSPROC, GenFramebuffers);                   \
-    GLFUNC(PFNGLDELETEFRAMEBUFFERSPROC, DeleteFramebuffers);             \
-    GLFUNC(PFNGLCHECKFRAMEBUFFERSTATUSPROC, CheckFramebufferStatus);     \
-    GLFUNC(PFNGLBINDFRAMEBUFFERPROC, BindFramebuffer);                   \
-    GLFUNC(PFNGLFRAMEBUFFERTEXTURE2DPROC, FramebufferTexture2D);         \
-    GLFUNC(PFNGLGETATTRIBLOCATIONPROC, GetAttribLocation);               \
-    GLFUNC(PFNGLVERTEXATTRIBPOINTERPROC, VertexAttribPointer);           \
-    GLFUNC(PFNGLDRAWELEMENTSPROC, DrawElements);                         \
-    GLFUNC(PFNGLENABLEVERTEXATTRIBARRAYPROC, EnableVertexAttribArray);   \
-    GLFUNC(PFNGLDISABLEVERTEXATTRIBARRAYPROC, DisableVertexAttribArray); \
-    GLFUNC(PFNGLGETVERTEXATTRIBIVPROC, GetVertexAttribiv);               \
-    GLFUNC(PFNGLPIXELSTOREIPROC, PixelStorei);                           \
-    GLFUNC(PFNGLREADPIXELS, ReadPixels);                                 \
-    GLFUNC(PFNGLGETSTRINGPROC, GetString);                               \
-    GLFUNC(PFNGLGETINTEGERV, GetIntegerv);                               \
-    GLFUNC(PFNGLGETBOOLEANV, GetBooleanv);                               \
-    GLFUNC(PFNGLFINISHPROC, Finish);
-
-// List of all OpenGL functions used by the runtime, which may not
-// exist due to an older or less capable version of GL. In using any
-// of these functions, code must test if they are nullptr.
-#define OPTIONAL_GL_FUNCTIONS                            \
-    GLFUNC(PFNGLGENVERTEXARRAYS, GenVertexArrays);       \
-    GLFUNC(PFNGLBINDVERTEXARRAY, BindVertexArray);       \
-    GLFUNC(PFNGLDELETEVERTEXARRAYS, DeleteVertexArrays); \
-    GLFUNC(PFNDRAWBUFFERS, DrawBuffers)
-
-// ---------- Types ----------
-
-using namespace Halide::Runtime::Internal;
-
-namespace Halide {
-namespace Runtime {
-namespace Internal {
-namespace OpenGL {
-
-extern WEAK halide_device_interface_t opengl_device_interface;
-
-WEAK const char *gl_error_name(int32_t err) {
-    const char *result;
-    switch (err) {
-    case 0x500:
-        result = "GL_INVALID_ENUM";
-        break;
-    case 0x501:
-        result = "GL_INVALID_VALUE";
-        break;
-    case 0x502:
-        result = "GL_INVALID_OPERATION";
-        break;
-    case 0x503:
-        result = "GL_STACK_OVERFLOW";
-        break;
-    case 0x504:
-        result = "GL_STACK_UNDERFLOW";
-        break;
-    case 0x505:
-        result = "GL_OUT_OF_MEMORY";
-        break;
-    case 0x506:
-        result = "GL_INVALID_FRAMEBUFFER_OPERATION";
-        break;
-    case 0x507:
-        result = "GL_CONTEXT_LOST";
-        break;
-    case 0x8031:
-        result = "GL_TABLE_TOO_LARGE";
-        break;
-    default:
-        result = "<unknown GL error>";
-        break;
-    }
-    return result;
-}
-
-struct HalideMalloc {
-    ALWAYS_INLINE HalideMalloc(void *user_context, size_t size)
-        : user_context(user_context), ptr(halide_malloc(user_context, size)) {
-    }
-    ALWAYS_INLINE ~HalideMalloc() {
-        halide_free(user_context, ptr);
-    }
-    void *const user_context;
-    void *const ptr;
-};
-
-enum OpenGLProfile {
-    OpenGL,
-    OpenGLES
-};
-
-struct Argument {
-    // The kind of data stored in an argument
-    enum Kind {
-        Invalid,
-        Uniform,  // uniform variable
-        Varying,  // varying attribute
-        Inbuf,    // input texture
-        Outbuf    // output texture
-    };
-
-    // The elementary data type of the argument
-    enum Type {
-        Void,
-        Bool,
-        Float,
-        Int8,
-        Int16,
-        Int32,
-        UInt8,
-        UInt16,
-        UInt32
-    };
-
-    char *name;
-    Kind kind;
-    Type type;
-    Argument *next;
-};
-
-struct KernelInfo {
-    char *name;
-    char *source;
-    Argument *arguments;
-    GLuint shader_id;
-    GLuint program_id;
-};
-
-struct ModuleState {
-    KernelInfo *kernel;
-    ModuleState *next;
-};
-
-// All persistent state maintained by the runtime.
-struct GlobalState {
-    void init();
-    bool CheckAndReportError(void *user_context, const char *location);
-
-    bool initialized;
-
-    // Information about the OpenGL platform we're running on.
-    OpenGLProfile profile;
-    int major_version, minor_version;
-    bool have_vertex_array_objects;
-    bool have_texture_rg;
-    bool have_texture_float;
-    bool have_texture_rgb8_rgba8;
-
-    // Various objects shared by all filter kernels
-    GLuint framebuffer_id;
-    GLuint vertex_array_object;
-    GLuint vertex_buffer;
-    GLuint element_buffer;
-
-    // Declare pointers used OpenGL functions
-#define GLFUNC(PTYPE, VAR) PTYPE VAR
-    USED_GL_FUNCTIONS;
-    OPTIONAL_GL_FUNCTIONS;
-#undef GLFUNC
-};
-
-WEAK bool GlobalState::CheckAndReportError(void *user_context, const char *location) {
-    GLenum err = GetError();
-    if (err != GL_NO_ERROR) {
-        error(user_context) << "OpenGL error " << gl_error_name(err) << "(" << (int)err << ")"
-                            << " at " << location << ".\n";
-        return true;
-    }
-    return false;
-}
-
-WEAK GlobalState global_state;
-
-// Saves & restores OpenGL state
-class GLStateSaver {
-public:
-    ALWAYS_INLINE GLStateSaver() {
-        save();
-    }
-    ALWAYS_INLINE ~GLStateSaver() {
-        restore();
-    }
-
-private:
-    // The state variables
-    GLint active_texture;
-    GLint array_buffer_binding;
-    GLint element_array_buffer_binding;
-    GLint framebuffer_binding;
-    GLint program;
-    GLint vertex_array_binding;
-    GLint viewport[4];
-    GLboolean cull_face;
-    GLboolean depth_test;
-    int max_combined_texture_image_units;
-    GLint *texture_2d_binding;
-    int max_vertex_attribs;
-    GLint *vertex_attrib_array_enabled;
-
-    // Define these out-of-line as WEAK, to avoid LLVM error "MachO doesn't support COMDATs"
-    void save();
-    void restore();
-};
-
-WEAK void GLStateSaver::save() {
-    global_state.GetIntegerv(GL_ACTIVE_TEXTURE, &active_texture);
-    global_state.GetIntegerv(GL_ARRAY_BUFFER_BINDING, &array_buffer_binding);
-    global_state.GetIntegerv(GL_ELEMENT_ARRAY_BUFFER_BINDING, &element_array_buffer_binding);
-    global_state.GetIntegerv(GL_FRAMEBUFFER_BINDING, &framebuffer_binding);
-    global_state.GetIntegerv(GL_CURRENT_PROGRAM, &program);
-    global_state.GetBooleanv(GL_CULL_FACE, &cull_face);
-    global_state.GetBooleanv(GL_DEPTH_TEST, &depth_test);
-    global_state.GetIntegerv(GL_VIEWPORT, viewport);
-
-    global_state.GetIntegerv(GL_MAX_COMBINED_TEXTURE_IMAGE_UNITS, &max_combined_texture_image_units);
-    texture_2d_binding = (GLint *)malloc(max_combined_texture_image_units * sizeof(GLint));
-    for (int i = 0; i < max_combined_texture_image_units; i++) {
-        global_state.ActiveTexture(GL_TEXTURE0 + i);
-        global_state.GetIntegerv(GL_TEXTURE_BINDING_2D, &texture_2d_binding[i]);
-    }
-
-    global_state.GetIntegerv(GL_MAX_VERTEX_ATTRIBS, &max_vertex_attribs);
-    vertex_attrib_array_enabled = (GLint *)malloc(max_vertex_attribs * sizeof(GLint));
-    for (int i = 0; i < max_vertex_attribs; i++) {
-        global_state.GetVertexAttribiv(i, GL_VERTEX_ATTRIB_ARRAY_ENABLED, &vertex_attrib_array_enabled[i]);
-    }
-
-    if (global_state.have_vertex_array_objects) {
-        global_state.GetIntegerv(GL_VERTEX_ARRAY_BINDING, &vertex_array_binding);
-    }
-
-#ifdef DEBUG_RUNTIME
-    debug(nullptr) << "Saved OpenGL state\n";
-#endif
-}
-
-WEAK void GLStateSaver::restore() {
-#ifdef DEBUG_RUNTIME
-    debug(nullptr) << "Restoring OpenGL state\n";
-#endif
-
-    for (int i = 0; i < max_combined_texture_image_units; i++) {
-        global_state.ActiveTexture(GL_TEXTURE0 + i);
-        global_state.BindTexture(GL_TEXTURE_2D, texture_2d_binding[i]);
-    }
-    free(texture_2d_binding);
-
-    if (global_state.have_vertex_array_objects) {
-        global_state.BindVertexArray(vertex_array_binding);
-    }
-
-    for (int i = 0; i < max_vertex_attribs; i++) {
-        if (vertex_attrib_array_enabled[i]) {
-            global_state.EnableVertexAttribArray(i);
-        } else {
-            global_state.DisableVertexAttribArray(i);
-        }
-    }
-    free(vertex_attrib_array_enabled);
-
-    global_state.ActiveTexture(active_texture);
-    global_state.BindFramebuffer(GL_FRAMEBUFFER, framebuffer_binding);
-    global_state.BindBuffer(GL_ARRAY_BUFFER, array_buffer_binding);
-    global_state.BindBuffer(GL_ELEMENT_ARRAY_BUFFER, element_array_buffer_binding);
-    global_state.UseProgram(program);
-    global_state.Viewport(viewport[0], viewport[1], viewport[2], viewport[3]);
-    (cull_face ? global_state.Enable : global_state.Disable)(GL_CULL_FACE);
-    (depth_test ? global_state.Enable : global_state.Disable)(GL_DEPTH_TEST);
-}
-
-// A list of module-specific state. Each module corresponds to a single Halide filter
-WEAK ModuleState *state_list;
-
-WEAK const char *kernel_marker = "/// KERNEL ";
-WEAK const char *input_marker = "/// IN_BUFFER ";
-WEAK const char *output_marker = "/// OUT_BUFFER ";
-WEAK const char *uniform_marker = "/// UNIFORM ";
-WEAK const char *varying_marker = "/// VARYING ";
-
-// ---------- Helper functions ----------
-
-WEAK char *strndup(const char *s, size_t n) {
-    char *p = (char *)malloc(n + 1);
-    memcpy(p, s, n);
-    p[n] = '\0';
-    return p;
-}
-
-// Strip whitespace from the right side of
-// a string
-WEAK char *strstrip(char *str, size_t n) {
-    char *pos = str;
-    while (pos != str + n && *pos != '\0' && *pos != '\n' && *pos != ' ') {
-        pos++;
-    }
-    *pos = '\0';
-    return str;
-}
-
-WEAK void debug_buffer(void *user_context, halide_buffer_t *buf) {
-    debug(user_context) << *buf << "\n";
-}
-
-WEAK GLuint make_shader(void *user_context, GLenum type,
-                        const char *source, GLint *length) {
-#ifdef DEBUG_RUNTIME
-    {
-        debug(user_context) << ((type == GL_VERTEX_SHADER) ? "GL_VERTEX_SHADER" : "GL_FRAGMENT_SHADER")
-                            << " SOURCE:\n";
-        // debug() will go thru Printer<> which has a fixed, non-growing size.
-        // Just pass the source directly to halide_print instead, so it won't get clipped.
-        halide_print(user_context, source);
-    }
-#endif
-
-    GLuint shader = global_state.CreateShader(type);
-    if (global_state.CheckAndReportError(user_context, "make_shader(1)")) {
-        return 1;
-    }
-    if (*source == '\0') {
-        debug(user_context) << "Halide GLSL: passed shader source is empty, using default.\n";
-        const char *default_shader = "varying vec2 pixcoord;\n void main() { }";
-        global_state.ShaderSource(shader, 1, (const GLchar **)&default_shader, nullptr);
-    } else {
-        global_state.ShaderSource(shader, 1, (const GLchar **)&source, length);
-    }
-    if (global_state.CheckAndReportError(user_context, "make_shader(2)")) {
-        return 1;
-    }
-    global_state.CompileShader(shader);
-    if (global_state.CheckAndReportError(user_context, "make_shader(3)")) {
-        return 1;
-    }
-
-    GLint shader_ok = 0;
-    global_state.GetShaderiv(shader, GL_COMPILE_STATUS, &shader_ok);
-    if (!shader_ok) {
-        print(user_context) << "Could not compile shader:\n";
-        GLint log_len;
-        global_state.GetShaderiv(shader, GL_INFO_LOG_LENGTH, &log_len);
-        HalideMalloc log_tmp(user_context, log_len);
-        if (log_tmp.ptr) {
-            char *log = (char *)log_tmp.ptr;
-            global_state.GetShaderInfoLog(shader, log_len, nullptr, log);
-            print(user_context) << log << "\n";
-        }
-        global_state.DeleteShader(shader);
-        return 0;
-    }
-    return shader;
-}
-
-// Check whether string starts with a given prefix.
-// Returns pointer to character after matched prefix if successful or nullptr.
-WEAK const char *match_prefix(const char *s, const char *prefix) {
-    if (0 == strncmp(s, prefix, strlen(prefix))) {
-        return s + strlen(prefix);
-    }
-    return nullptr;
-}
-
-// Parse declaration of the form "type name" and construct matching Argument.
-WEAK Argument *parse_argument(void *user_context, const char *src,
-                              const char *end) {
-    const char *name;
-    Argument::Type type = Argument::Void;
-    if ((name = match_prefix(src, "float "))) {
-        type = Argument::Float;
-    } else if ((name = match_prefix(src, "bool "))) {
-        type = Argument::Bool;
-    } else if ((name = match_prefix(src, "int8_t "))) {
-        type = Argument::Int8;
-    } else if ((name = match_prefix(src, "int16_t "))) {
-        type = Argument::Int16;
-    } else if ((name = match_prefix(src, "int32_t "))) {
-        type = Argument::Int32;
-    } else if ((name = match_prefix(src, "uint8_t "))) {
-        type = Argument::UInt8;
-    } else if ((name = match_prefix(src, "uint16_t "))) {
-        type = Argument::UInt16;
-    } else if ((name = match_prefix(src, "uint32_t "))) {
-        type = Argument::UInt32;
-    }
-    if (type == Argument::Void) {
-        error(user_context) << "Internal error: argument type not supported";
-        return nullptr;
-    }
-
-    Argument *arg = (Argument *)malloc(sizeof(Argument));
-    arg->name = strndup(name, end - name);
-    arg->type = type;
-    arg->kind = Argument::Invalid;
-    arg->next = nullptr;
-    return arg;
-}
-
-// Create KernelInfo for a piece of GLSL code
-WEAK KernelInfo *create_kernel(void *user_context, const char *src, int size) {
-    KernelInfo *kernel = (KernelInfo *)malloc(sizeof(KernelInfo));
-
-    kernel->source = strndup(src, size);
-    kernel->arguments = nullptr;
-    kernel->program_id = 0;
-
-    debug(user_context) << "Compiling GLSL kernel (size = " << size << "):\n";
-
-    // Parse initial comment block
-    const char *line = kernel->source;
-    while (*line) {
-        const char *next_line = strchr(line, '\n') + 1;
-        if (!next_line) {
-            next_line = line + size;
-        }
-
-        const char *args;
-        if ((args = match_prefix(line, kernel_marker))) {
-            // set name
-            kernel->name = strstrip(strndup(args, next_line - args), next_line - args);
-        } else if ((args = match_prefix(line, uniform_marker))) {
-            if (Argument *arg =
-                    parse_argument(user_context, args, next_line - 1)) {
-                arg->kind = Argument::Uniform;
-                arg->next = kernel->arguments;
-                kernel->arguments = arg;
-            } else {
-                halide_error(user_context, "Invalid VAR marker");
-                goto error;
-            }
-        } else if ((args = match_prefix(line, varying_marker))) {
-            if (Argument *arg =
-                    parse_argument(user_context, args, next_line - 1)) {
-                arg->kind = Argument::Varying;
-                arg->next = kernel->arguments;
-                kernel->arguments = arg;
-            } else {
-                halide_error(user_context, "Invalid VARYING marker");
-                goto error;
-            }
-        } else if ((args = match_prefix(line, input_marker))) {
-            if (Argument *arg = parse_argument(user_context, args, next_line - 1)) {
-                arg->kind = Argument::Inbuf;
-                arg->next = kernel->arguments;
-                kernel->arguments = arg;
-            } else {
-                error(user_context) << "Invalid IN_BUFFER marker";
-                goto error;
-            }
-        } else if ((args = match_prefix(line, output_marker))) {
-            if (Argument *arg = parse_argument(user_context, args, next_line - 1)) {
-                arg->kind = Argument::Outbuf;
-                arg->next = kernel->arguments;
-                kernel->arguments = arg;
-            } else {
-                error(user_context) << "Invalid OUT_BUFFER marker";
-                goto error;
-            }
-        } else {
-            // Stop parsing if we encounter something we don't recognize
-            break;
-        }
-        line = next_line;
-    }
-
-    // Arguments are currently in reverse order, flip the list.
-    {
-        Argument *cur = kernel->arguments;
-        kernel->arguments = nullptr;
-        while (cur) {
-            Argument *next = cur->next;
-            cur->next = kernel->arguments;
-            kernel->arguments = cur;
-            cur = next;
-        }
-    }
-
-    return kernel;
-error:
-    free(kernel);
-    return nullptr;
-}
-
-// Delete all data associated with a kernel. Also release associated OpenGL
-// shader and program.
-WEAK void delete_kernel(void *user_context, KernelInfo *kernel) {
-    global_state.DeleteProgram(kernel->program_id);
-#if 0  // TODO figure out why this got deleted.
-    global_state.DeleteShader(kernel->shader_id);
-#endif
-
-    Argument *arg = kernel->arguments;
-    while (arg) {
-        Argument *next = arg->next;
-        free(arg->name);
-        free(arg);
-        arg = next;
-    }
-    free(kernel->source);
-    free(kernel->name);
-    free(kernel);
-}
-
-// Vertices and their order in a triangle strip for rendering a quad
-// ranging from (-1,-1) to (1,1).
-WEAK GLfloat quad_vertices[] = {
-    -1.0f, -1.0f, 1.0f, -1.0f,
-    -1.0f, 1.0f, 1.0f, 1.0f};
-WEAK GLuint quad_indices[] = {0, 1, 2, 3};
-
-WEAK void GlobalState::init() {
-    initialized = false;
-    profile = OpenGL;
-    major_version = 2;
-    minor_version = 0;
-    framebuffer_id = 0;
-    vertex_array_object = vertex_buffer = element_buffer = 0;
-    have_vertex_array_objects = false;
-    have_texture_rg = false;
-    have_texture_rgb8_rgba8 = false;
-    // Initialize all GL function pointers to nullptr
-#define GLFUNC(type, name) name = nullptr;
-    USED_GL_FUNCTIONS;
-    OPTIONAL_GL_FUNCTIONS;
-#undef GLFUNC
-}
-
-WEAK int load_gl_func(void *user_context, const char *name, void **ptr, bool required) {
-    void *p = halide_opengl_get_proc_address(user_context, name);
-    if (!p && required) {
-        error(user_context) << "Could not load function pointer for " << name;
-        return -1;
-    }
-    *ptr = p;
-    return 0;
-}
-
-WEAK bool extension_supported(void *user_context, const char *name) {
-    // Iterate over space delimited extension strings. Note that glGetStringi
-    // is not part of GL ES 2.0, and not reliable in all implementations of
-    // GL ES 3.0.
-    const char *start = (const char *)global_state.GetString(GL_EXTENSIONS);
-    if (!start) {
-        return false;
-    }
-    while (const char *pos = strstr(start, name)) {
-        const char *end = pos + strlen(name);
-        // Ensure the found match is a full word, not a substring.
-        if ((pos == start || pos[-1] == ' ') &&
-            (*end == ' ' || *end == '\0')) {
-            return true;
-        }
-        start = end;
-    }
-
-    return false;
-}
-
-// Check for availability of various version- and extension-specific features
-// and hook up functions pointers as necessary
-WEAK void init_extensions(void *user_context) {
-    if (global_state.major_version >= 3) {  // This is likely valid for both OpenGL and OpenGL ES
-        load_gl_func(user_context, "glGenVertexArrays", (void **)&global_state.GenVertexArrays, false);
-        load_gl_func(user_context, "glBindVertexArray", (void **)&global_state.BindVertexArray, false);
-        load_gl_func(user_context, "glDeleteVertexArrays", (void **)&global_state.DeleteVertexArrays, false);
-        if (global_state.GenVertexArrays && global_state.BindVertexArray && global_state.DeleteVertexArrays) {
-            global_state.have_vertex_array_objects = true;
-        }
-    }
-    load_gl_func(user_context, "glDrawBuffers", (void **)&global_state.DrawBuffers, false);
-
-    global_state.have_texture_rg =
-        global_state.major_version >= 3 ||
-        (global_state.profile == OpenGL &&
-         extension_supported(user_context, "GL_ARB_texture_rg")) ||
-        (global_state.profile == OpenGLES &&
-         extension_supported(user_context, "GL_EXT_texture_rg"));
-
-    global_state.have_texture_rgb8_rgba8 =
-        global_state.major_version >= 3 ||
-        (global_state.profile == OpenGLES &&
-         extension_supported(user_context, "GL_OES_rgb8_rgba8"));
-
-    global_state.have_texture_float =
-        (global_state.major_version >= 3) ||
-        (global_state.profile == OpenGL &&
-         extension_supported(user_context, "GL_ARB_texture_float")) ||
-        (global_state.profile == OpenGLES &&
-         extension_supported(user_context, "GL_OES_texture_float"));
-}
-
-WEAK const char *parse_int(const char *str, int *val) {
-    int v = 0;
-    size_t i = 0;
-    while (str[i] >= '0' && str[i] <= '9') {
-        v = 10 * v + (str[i] - '0');
-        i++;
-    }
-    if (i > 0) {
-        *val = v;
-        return &str[i];
-    }
-    return nullptr;
-}
-
-WEAK const char *parse_opengl_version(const char *str, int *major, int *minor) {
-    str = parse_int(str, major);
-    if (str == nullptr || *str != '.') {
-        return nullptr;
-    }
-    return parse_int(str + 1, minor);
-}
-
-// Initialize the OpenGL-specific parts of the runtime.
-WEAK int halide_opengl_init(void *user_context) {
-    if (global_state.initialized) {
-        return 0;
-    }
-
-#ifdef DEBUG_RUNTIME
-    halide_start_clock(user_context);
-#endif
-
-    global_state.init();
-
-    // Make a context if there isn't one
-    if (halide_opengl_create_context(user_context)) {
-        error(user_context) << "Failed to make OpenGL context";
-        return -1;
-    }
-
-    // Initialize pointers to core OpenGL functions.
-#define GLFUNC(TYPE, VAR)                                                              \
-    if (load_gl_func(user_context, "gl" #VAR, (void **)&global_state.VAR, true) < 0) { \
-        return -1;                                                                     \
-    }
-    USED_GL_FUNCTIONS;
-#undef GLFUNC
-
-    const char *version = (const char *)global_state.GetString(GL_VERSION);
-    const char *gles_version = match_prefix(version, "OpenGL ES ");
-    int major, minor;
-    if (gles_version && parse_opengl_version(gles_version, &major, &minor)) {
-        global_state.profile = OpenGLES;
-        global_state.major_version = major;
-        global_state.minor_version = minor;
-    } else if (parse_opengl_version(version, &major, &minor)) {
-        global_state.profile = OpenGL;
-        global_state.major_version = major;
-        global_state.minor_version = minor;
-    } else {
-        global_state.profile = OpenGL;
-        global_state.major_version = 2;
-        global_state.minor_version = 0;
-    }
-    init_extensions(user_context);
-    debug(user_context)
-        << "Halide running on OpenGL " << ((global_state.profile == OpenGL) ? "" : "ES ") << major << "." << minor << "\n"
-        << "  vertex_array_objects: " << (global_state.have_vertex_array_objects ? "yes\n" : "no\n")
-        << "  texture_rg: " << (global_state.have_texture_rg ? "yes\n" : "no\n")
-        << "  have_texture_rgb8_rgba8: " << (global_state.have_texture_rgb8_rgba8 ? "yes\n" : "no\n")
-        << "  texture_float: " << (global_state.have_texture_float ? "yes\n" : "no\n");
-
-    // Initialize framebuffer.
-    global_state.GenFramebuffers(1, &global_state.framebuffer_id);
-    if (global_state.CheckAndReportError(user_context, "halide_opengl_init GenFramebuffers")) {
-        return 1;
-    }
-
-    // Initialize vertex and element buffers.
-    GLuint buf[2];
-    global_state.GenBuffers(2, buf);
-    global_state.BindBuffer(GL_ARRAY_BUFFER, buf[0]);
-    global_state.BufferData(GL_ARRAY_BUFFER, sizeof(quad_vertices), quad_vertices, GL_STATIC_DRAW);
-    global_state.BindBuffer(GL_ARRAY_BUFFER, 0);
-    global_state.BindBuffer(GL_ELEMENT_ARRAY_BUFFER, buf[1]);
-    global_state.BufferData(GL_ELEMENT_ARRAY_BUFFER, sizeof(quad_indices), quad_indices, GL_STATIC_DRAW);
-    global_state.BindBuffer(GL_ELEMENT_ARRAY_BUFFER, 0);
-    global_state.vertex_buffer = buf[0];
-    global_state.element_buffer = buf[1];
-
-    if (global_state.have_vertex_array_objects) {
-        global_state.GenVertexArrays(1, &global_state.vertex_array_object);
-        if (global_state.CheckAndReportError(user_context, "halide_opengl_init GenVertexArrays")) {
-            return 1;
-        }
-    }
-
-    global_state.initialized = true;
-    return 0;
-}
-
-// Release all data allocated by the runtime.
-//
-// The OpenGL context itself is generally managed by the host application, so
-// we leave it untouched.
-WEAK int halide_opengl_device_release(void *user_context) {
-    if (!global_state.initialized) {
-        return 0;
-    }
-
-    debug(user_context) << "halide_opengl_release\n";
-    global_state.DeleteFramebuffers(1, &global_state.framebuffer_id);
-
-    ModuleState *mod = state_list;
-    while (mod) {
-        delete_kernel(user_context, mod->kernel);
-        mod->kernel = nullptr;
-        ModuleState *next = mod->next;
-        // do not call free(mod) to avoid dangling pointers: the module state
-        // is still referenced in the code generated by Halide (see
-        // CodeGen_GPU_Host::get_module_state).
-        mod = next;
-    }
-
-    global_state.DeleteBuffers(1, &global_state.vertex_buffer);
-    global_state.DeleteBuffers(1, &global_state.element_buffer);
-    if (global_state.have_vertex_array_objects) {
-        global_state.DeleteVertexArrays(1, &global_state.vertex_array_object);
-    }
-
-    global_state = GlobalState();
-
-    return 0;
-}
-
-// Determine OpenGL texture format and channel type for a given halide_buffer_t.
-WEAK bool get_texture_format(void *user_context, halide_buffer_t *buf,
-                             GLint *internal_format, GLint *format, GLint *type) {
-    if (buf->type == halide_type_of<uint8_t>()) {
-        *type = GL_UNSIGNED_BYTE;
-    } else if (buf->type == halide_type_of<uint16_t>()) {
-        *type = GL_UNSIGNED_SHORT;
-    } else if (buf->type == halide_type_of<float>()) {
-        *type = GL_FLOAT;
-    } else {
-        error(user_context) << "OpenGL: Only uint8, uint16, and float textures are supported.";
-        return false;
-    }
-
-    const int channels = (buf->dimensions > 2) ? buf->dim[2].extent : 0;
-
-    // GL_LUMINANCE and GL_LUMINANCE_ALPHA aren't color-renderable in ES2, period,
-    // thus can't be read back via ReadPixels, thus are nearly useless to us.
-    // GL_RED and GL_RG are technically optional in ES2 (required in ES3),
-    // but as a practical matter, they are supported on pretty much every recent device
-    // (iOS: everything >= iPhone 4s; Android: everything >= 4.3 plus various older devices).
-    // This is definitely suboptimal; the only real alternative would be to implement
-    // these as GL_RGB or GL_RGBA, ignoring the extra channels.
-    if (channels <= 2 && !global_state.have_texture_rg) {
-        error(user_context) << "OpenGL: 1 and 2 channel textures are not supported for this version of OpenGL.";
-        return false;
-    }
-
-    // Common formats supported by both GLES 2.0 and GL 2.1 are selected below
-    //
-    switch (channels) {
-    case 0:
-    case 1:
-        *format = GL_RED;
-        break;
-    case 2:
-        *format = GL_RG;
-        break;
-    case 3:
-        *format = GL_RGB;
-        break;
-    case 4:
-        *format = GL_RGBA;
-        break;
-    default:
-        error(user_context) << "OpenGL: Invalid number of color channels: " << channels;
-        return false;
-    }
-
-    switch (global_state.profile) {
-    case OpenGLES:
-        // For OpenGL ES, the texture format has to match the pixel format
-        // since there no conversion is performed during texture transfers.
-        // See OES_texture_float.
-        *internal_format = *format;
-        break;
-    case OpenGL:
-        // For desktop OpenGL, the internal format specifiers include the
-        // precise data type, see ARB_texture_float.
-        if (*type == GL_FLOAT) {
-            switch (*format) {
-            case GL_RED:
-            case GL_RG:
-            case GL_RGB:
-            case GL_RGBA:
-                *internal_format = GL_RGBA32F;
-                break;
-            default:
-                error(user_context) << "OpenGL: Cannot select internal format for format " << *format;
-                return false;
-            }
-        } else {
-            *internal_format = *format;
-        }
-        break;
-    }
-
-    return true;
-}
-
-// This function returns the width, height and number of color channels that the
-// texture for the specified halide_buffer_t will contain. It provides a single place
-// to implement the logic snapping zero sized dimensions to one element.
-WEAK bool get_texture_dimensions(void *user_context, halide_buffer_t *buf, GLint *width,
-                                 GLint *height, GLint *channels) {
-    if (buf->dimensions > 3) {
-        error(user_context) << "The GL backend supports buffers of at most 3 dimensions\n";
-        return false;
-    }
-
-    *width = buf->dim[0].extent;
-    if (*width == 0) {
-        error(user_context) << "Invalid dim[0].extent: " << *width << "\n";
-        return false;
-    }
-
-    // GLES 2.0 supports GL_TEXTURE_2D (plus cube map), but not 1d or 3d. If we
-    // end up with a buffer that has a zero extent, set the corresponding size
-    // to one.
-    *height = (buf->dimensions > 1) ? buf->dim[1].extent : 1;
-    *channels = (buf->dimensions > 2) ? buf->dim[2].extent : 1;
-
-    return true;
-}
-
-// Allocate a new texture matching the dimension and color format of the
-// specified buffer.
-WEAK int halide_opengl_device_malloc(void *user_context, halide_buffer_t *buf) {
-    if (int error = halide_opengl_init(user_context)) {
-        return error;
-    }
-
-    if (!buf) {
-        error(user_context) << "Invalid buffer";
-        return 1;
-    }
-
-    // If the texture was already created by the host application, check that
-    // it has the correct format. Otherwise, allocate and set up an
-    // appropriate texture.
-    GLuint tex = 0;
-    bool halide_allocated = false;
-
-    if (buf->device) {
-#ifdef HAVE_GLES3
-        // Look up the width and the height from the existing texture. Note that
-        // glGetTexLevelParameteriv does not support GL_TEXTURE_WIDTH or
-        // GL_TEXTURE_HEIGHT in GLES 2.0
-        GLint width, height;
-        global_state.BindTexture(GL_TEXTURE_2D, tex);
-        global_state.GetTexLevelParameteriv(GL_TEXTURE_2D, 0, GL_TEXTURE_WIDTH, &width);
-        global_state.GetTexLevelParameteriv(GL_TEXTURE_2D, 0, GL_TEXTURE_HEIGHT, &height);
-        if (global_state.CheckAndReportError(user_context, "halide_opengl_device_malloc binding texture (GLES3)")) {
-            return 1;
-        }
-        if (width < buf->dim[0].extent || height < buf->dim[1].extent) {
-            error(user_context)
-                << "Existing texture is smaller than buffer. "
-                << "Texture size: " << width << "x" << height
-                << ", buffer size: " << buf->dim[0].extent << "x" << buf->dim[1].extent;
-            return 1;
-        }
-#endif
-        uint64_t handle = buf->device;
-        tex = (handle == HALIDE_OPENGL_RENDER_TARGET) ? 0 : (GLuint)handle;
-    } else {
-        if (buf->dimensions > 3) {
-            error(user_context) << "high-dimensional textures are not supported";
-            return 1;
-        }
-
-        // Generate texture ID
-        global_state.GenTextures(1, &tex);
-        if (global_state.CheckAndReportError(user_context, "halide_opengl_device_malloc GenTextures")) {
-            global_state.DeleteTextures(1, &tex);
-            return 1;
-        }
-
-        // Set parameters for this texture: no interpolation and clamp to edges.
-        global_state.BindTexture(GL_TEXTURE_2D, tex);
-        global_state.TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
-        global_state.TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
-        global_state.TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
-        global_state.TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
-        if (global_state.CheckAndReportError(user_context, "halide_opengl_device_malloc binding texture")) {
-            global_state.DeleteTextures(1, &tex);
-            return 1;
-        }
-
-        // Create empty texture here and fill it with glTexSubImage2D later.
-        GLint internal_format, format, type;
-        if (!get_texture_format(user_context, buf, &internal_format, &format, &type)) {
-            error(user_context) << "Invalid texture format";
-            global_state.DeleteTextures(1, &tex);
-            return 1;
-        }
-
-        GLint width, height, channels;
-        if (!get_texture_dimensions(user_context, buf, &width, &height, &channels)) {
-            error(user_context) << "Invalid texture dimensions";
-            return 1;
-        }
-
-        global_state.TexImage2D(GL_TEXTURE_2D, 0, internal_format, width, height, 0, format, type, nullptr);
-        if (global_state.CheckAndReportError(user_context, "halide_opengl_device_malloc TexImage2D")) {
-            global_state.DeleteTextures(1, &tex);
-            return 1;
-        }
-
-        buf->device = tex;
-        buf->device_interface = &opengl_device_interface;
-        buf->device_interface->impl->use_module();
-        halide_allocated = true;
-        debug(user_context) << "Allocated texture " << tex
-                            << " of size " << width << " x " << height << "\n";
-
-        global_state.BindTexture(GL_TEXTURE_2D, 0);
-    }
-
-    return 0;
-}
-
-// Delete all texture information associated with a buffer.
-WEAK int halide_opengl_device_free(void *user_context, halide_buffer_t *buf) {
-    if (!global_state.initialized) {
-        error(user_context) << "OpenGL runtime not initialized in call to halide_opengl_device_free.";
-        return 1;
-    }
-
-    if (buf->device == 0) {
-        return 0;
-    }
-
-    uint64_t handle = buf->device;
-    GLuint tex = (handle == HALIDE_OPENGL_RENDER_TARGET) ? 0 : (GLuint)handle;
-
-    int result = 0;
-    debug(user_context) << "halide_opengl_device_free: Deleting texture " << tex << "\n";
-    global_state.DeleteTextures(1, &tex);
-    if (global_state.CheckAndReportError(user_context, "halide_opengl_device_free DeleteTextures")) {
-        result = 1;
-        // do not return: we want to zero out the interface and
-        // device fields even if we can't delete the texture.
-    }
-    buf->device = 0;
-    buf->device_interface->impl->release_module();
-    buf->device_interface = nullptr;
-
-    return result;
-}
-
-// Can't use std::min, std::max in Halide runtime.
-template<typename T>
-ALWAYS_INLINE T std_min(T a, T b) {
-    return (a < b) ? a : b;
-}
-template<typename T>
-ALWAYS_INLINE T std_max(T a, T b) {
-    return (a > b) ? a : b;
-}
-
-// This method copies image data from the layout specified by the strides of the
-// halide_buffer_t to the packed interleaved format needed by GL. It is assumed that
-// src and dst have the same number of channels.
-template<class T>
-ALWAYS_INLINE void halide_to_interleaved(const halide_buffer_t *src_buf, T *dst) {
-    const T *src = reinterpret_cast<const T *>(src_buf->host);
-    int width = (src_buf->dimensions > 0) ? src_buf->dim[0].extent : 1;
-    int height = (src_buf->dimensions > 1) ? src_buf->dim[1].extent : 1;
-    int channels = (src_buf->dimensions > 2) ? src_buf->dim[2].extent : 1;
-    int x_stride = (src_buf->dimensions > 0) ? src_buf->dim[0].stride : 0;
-    int y_stride = (src_buf->dimensions > 1) ? src_buf->dim[1].stride : 0;
-    int c_stride = (src_buf->dimensions > 2) ? src_buf->dim[2].stride : 0;
-    for (int y = 0; y < height; y++) {
-        int dstidx = y * width * channels;
-        for (int x = 0; x < width; x++) {
-            int srcidx = y * y_stride + x * x_stride;
-            for (int c = 0; c < channels; c++) {
-                dst[dstidx] = src[srcidx];
-                srcidx += c_stride;
-                dstidx += 1;
-            }
-        }
-    }
-}
-
-// This method copies image data from the packed interleaved format needed by GL
-// to the arbitrary strided layout specified by the halide_buffer_t. If src has fewer
-// channels than dst, the excess in dst will be left untouched; if src has
-// more channels than dst, the excess will be ignored.
-template<class T>
-ALWAYS_INLINE void interleaved_to_halide(void *user_context, const T *src, int src_channels, halide_buffer_t *dst_buf) {
-    T *dst = reinterpret_cast<T *>(dst_buf->host);
-    int width = (dst_buf->dimensions > 0) ? dst_buf->dim[0].extent : 1;
-    int height = (dst_buf->dimensions > 1) ? dst_buf->dim[1].extent : 1;
-    int dst_channels = (dst_buf->dimensions > 2) ? dst_buf->dim[2].extent : 1;
-    int x_stride = (dst_buf->dimensions > 0) ? dst_buf->dim[0].stride : 0;
-    int y_stride = (dst_buf->dimensions > 1) ? dst_buf->dim[1].stride : 0;
-    int c_stride = (dst_buf->dimensions > 2) ? dst_buf->dim[2].stride : 0;
-    int src_skip = std_max(0, src_channels - dst_channels);
-    int channels = std_min<int>(src_channels, dst_channels);
-
-    for (int y = 0; y < height; y++) {
-        int srcidx = y * width * src_channels;
-        for (int x = 0; x < width; x++) {
-            int dstidx = y * y_stride + x * x_stride;
-            for (int c = 0; c < channels; c++) {
-                dst[dstidx] = src[srcidx];
-                srcidx += 1;
-                dstidx += c_stride;
-            }
-            srcidx += src_skip;
-        }
-    }
-}
-
-// Copy image data from host memory to texture.
-WEAK int halide_opengl_copy_to_device(void *user_context, halide_buffer_t *buf) {
-    if (!global_state.initialized) {
-        error(user_context) << "OpenGL runtime not initialized (halide_opengl_copy_to_device).";
-        return 1;
-    }
-
-    GLStateSaver state_saver;
-
-    int err = halide_opengl_device_malloc(user_context, buf);
-    if (err) {
-        return err;
-    }
-
-    if (!buf->host || !buf->device) {
-        debug_buffer(user_context, buf);
-        error(user_context) << "Invalid copy_to_device operation: host or device nullptr";
-        return 1;
-    }
-
-    uint64_t handle = buf->device;
-    if (handle == HALIDE_OPENGL_RENDER_TARGET) {
-        // TODO: this isn't correct; we want to ensure we copy to the current render_target.
-        debug(user_context) << "halide_opengl_copy_to_device: called for HALIDE_OPENGL_RENDER_TARGET\n";
-        return 0;
-    }
-    GLuint tex = (GLuint)handle;
-    debug(user_context) << "halide_opengl_copy_to_device: " << tex << "\n";
-
-    global_state.BindTexture(GL_TEXTURE_2D, tex);
-    if (global_state.CheckAndReportError(user_context, "halide_opengl_copy_to_device BindTexture")) {
-        return 1;
-    }
-    GLint internal_format, format, type;
-    if (!get_texture_format(user_context, buf, &internal_format, &format, &type)) {
-        error(user_context) << "Invalid texture format";
-        return 1;
-    }
-
-    GLint width, height, buffer_channels;
-    if (!get_texture_dimensions(user_context, buf, &width, &height, &buffer_channels)) {
-        error(user_context) << "Invalid texture dimensions";
-        return 1;
-    }
-
-    // To use TexSubImage2D directly, the colors must be stored interleaved
-    // and rows must be stored consecutively.
-    // (Single-channel buffers are "interleaved" for our purposes here.)
-    bool is_interleaved = (buffer_channels == 1) || (buf->dim[2].stride == 1 && buf->dim[0].stride == buf->dim[2].extent);
-    bool is_packed = (buf->dim[1].stride == buf->dim[0].extent * buf->dim[0].stride);
-    if (is_interleaved && is_packed) {
-        global_state.PixelStorei(GL_UNPACK_ALIGNMENT, 1);
-        global_state.TexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, width, height, format, type, buf->host);
-        if (global_state.CheckAndReportError(user_context, "halide_opengl_copy_to_device TexSubImage2D(1)")) {
-            return 1;
-        }
-    } else {
-        debug(user_context)
-            << "Warning: In copy_to_device, host buffer is not interleaved. Doing slow interleave.\n";
-
-        size_t texture_size = width * height * buffer_channels * buf->type.bytes();
-        HalideMalloc tmp(user_context, texture_size);
-        if (!tmp.ptr) {
-            error(user_context) << "halide_malloc failed inside copy_to_device";
-            return -1;
-        }
-
-        switch (type) {
-        case GL_UNSIGNED_BYTE:
-            halide_to_interleaved<uint8_t>(buf, (uint8_t *)tmp.ptr);
-            break;
-        case GL_UNSIGNED_SHORT:
-            halide_to_interleaved<uint16_t>(buf, (uint16_t *)tmp.ptr);
-            break;
-        case GL_FLOAT:
-            halide_to_interleaved<float>(buf, (float *)tmp.ptr);
-            break;
-        }
-
-        global_state.PixelStorei(GL_UNPACK_ALIGNMENT, 1);
-        global_state.TexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, width, height, format, type, tmp.ptr);
-        if (global_state.CheckAndReportError(user_context, "halide_opengl_copy_to_device TexSubImage2D(2)")) {
-            return 1;
-        }
-    }
-
-    return 0;
-}
-
-// Copy image data from texture back to host memory.
-WEAK int halide_opengl_copy_to_host(void *user_context, halide_buffer_t *buf) {
-    if (!global_state.initialized) {
-        error(user_context) << "OpenGL runtime not initialized (halide_opengl_copy_to_host).";
-        return 1;
-    }
-
-    GLStateSaver state_saver;
-
-    if (!buf->host || !buf->device) {
-        debug_buffer(user_context, buf);
-        error(user_context) << "Invalid copy_to_host operation: host or dev nullptr";
-        return 1;
-    }
-
-    GLint internal_format, format, type;
-    if (!get_texture_format(user_context, buf, &internal_format, &format, &type)) {
-        error(user_context) << "Invalid texture format";
-        return 1;
-    }
-
-    GLint width, height, buffer_channels;
-    if (!get_texture_dimensions(user_context, buf, &width, &height, &buffer_channels)) {
-        error(user_context) << "Invalid texture dimensions";
-        return 1;
-    }
-    GLint texture_channels = buffer_channels;
-
-    uint64_t handle = buf->device;
-    if (handle != HALIDE_OPENGL_RENDER_TARGET) {
-        GLuint tex = (GLuint)handle;
-        debug(user_context) << "halide_copy_to_host: texture " << tex << "\n";
-        global_state.BindFramebuffer(GL_FRAMEBUFFER, global_state.framebuffer_id);
-        if (global_state.CheckAndReportError(user_context, "copy_to_host BindFramebuffer")) {
-            return 1;
-        }
-        global_state.FramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, tex, 0);
-        if (global_state.CheckAndReportError(user_context, "copy_to_host FramebufferTexture2D")) {
-            return 1;
-        }
-    } else {
-        debug(user_context) << "halide_copy_to_host: HALIDE_OPENGL_RENDER_TARGET\n";
-    }
-
-    // Check that framebuffer is set up correctly
-    GLenum status = global_state.CheckFramebufferStatus(GL_FRAMEBUFFER);
-    if (status != GL_FRAMEBUFFER_COMPLETE) {
-        error(user_context)
-            << "Setting up GL framebuffer " << global_state.framebuffer_id << " failed " << status;
-        return 1;
-    }
-
-    // The only format/type pairs guaranteed to be readable in GLES2 are GL_RGBA+GL_UNSIGNED_BYTE,
-    // plus one other implementation-dependent pair specified here. Spoiler alert:
-    // some ES2 implementations return that very same pair here (i.e., they don't support
-    // any other formats); in that case, we need to read as RGBA and manually convert to
-    // what we need (usually GL_RGB).
-    // NOTE: this requires the currently-bound Framebuffer is correct.
-    // TODO: short and float will require even more effort on top of this.
-    if (global_state.profile == OpenGLES && format == GL_RGB) {
-        GLint extra_format, extra_type;
-        global_state.GetIntegerv(GL_IMPLEMENTATION_COLOR_READ_TYPE, &extra_type);
-        if (type != GL_UNSIGNED_BYTE && type != extra_type) {
-            error(user_context) << "ReadPixels does not support our type; we don't handle this yet.\n";
-            return 1;
-        }
-        global_state.GetIntegerv(GL_IMPLEMENTATION_COLOR_READ_FORMAT, &extra_format);
-        if (format != GL_RGBA && format != extra_format) {
-            debug(user_context) << "ReadPixels does not support our format; falling back to GL_RGBA\n";
-            format = GL_RGBA;
-            texture_channels = 4;
-        }
-    }
-
-    // To download the texture directly, the colors must be stored interleaved
-    // and rows must be stored consecutively.
-    // (Single-channel buffers are "interleaved" for our purposes here.)
-    bool is_interleaved = (buffer_channels == 1) || (buf->dim[2].stride == 1 && buf->dim[0].stride == buf->dim[2].extent);
-    bool is_packed = (buf->dim[1].stride == buf->dim[0].extent * buf->dim[0].stride);
-    if (is_interleaved && is_packed && texture_channels == buffer_channels) {
-        global_state.PixelStorei(GL_PACK_ALIGNMENT, 1);
-#ifdef DEBUG_RUNTIME
-        int64_t t1 = halide_current_time_ns(user_context);
-#endif
-        global_state.ReadPixels(0, 0, buf->dim[0].extent, buf->dim[1].extent, format, type, buf->host);
-#ifdef DEBUG_RUNTIME
-        int64_t t2 = halide_current_time_ns(user_context);
-#endif
-        if (global_state.CheckAndReportError(user_context, "copy_to_host ReadPixels (1)")) {
-            return 1;
-        }
-#ifdef DEBUG_RUNTIME
-        debug(user_context) << "ReadPixels(1) time: " << (t2 - t1) / 1e3 << "usec\n";
-#endif
-    } else {
-        debug(user_context)
-            << "Warning: In copy_to_host, host buffer is not interleaved, or not a native format. Doing slow deinterleave.\n";
-
-        size_t texture_size = width * height * texture_channels * buf->type.bytes();
-        HalideMalloc tmp(user_context, texture_size);
-        if (!tmp.ptr) {
-            error(user_context) << "halide_malloc failed inside copy_to_host";
-            return -1;
-        }
-
-        global_state.PixelStorei(GL_PACK_ALIGNMENT, 1);
-#ifdef DEBUG_RUNTIME
-        int64_t t1 = halide_current_time_ns(user_context);
-#endif
-        global_state.ReadPixels(0, 0, buf->dim[0].extent, buf->dim[1].extent, format, type, tmp.ptr);
-#ifdef DEBUG_RUNTIME
-        int64_t t2 = halide_current_time_ns(user_context);
-        debug(user_context) << "ReadPixels(2) time: " << (t2 - t1) / 1e3 << "usec\n";
-#endif
-        if (global_state.CheckAndReportError(user_context, "copy_to_host ReadPixels (2)")) {
-            return 1;
-        }
-
-        // Premature optimization warning: interleaved_to_halide() could definitely
-        // be optimized, but ReadPixels() typically takes ~2-10x as long (especially on
-        // mobile devices), so the returns will be modest.
-#ifdef DEBUG_RUNTIME
-        int64_t t3 = halide_current_time_ns(user_context);
-#endif
-        switch (type) {
-        case GL_UNSIGNED_BYTE:
-            interleaved_to_halide<uint8_t>(user_context, (uint8_t *)tmp.ptr, texture_channels, buf);
-            break;
-        case GL_UNSIGNED_SHORT:
-            interleaved_to_halide<uint16_t>(user_context, (uint16_t *)tmp.ptr, texture_channels, buf);
-            break;
-        case GL_FLOAT:
-            interleaved_to_halide<float>(user_context, (float *)tmp.ptr, texture_channels, buf);
-            break;
-        }
-#ifdef DEBUG_RUNTIME
-        int64_t t4 = halide_current_time_ns(user_context);
-        debug(user_context) << "deinterleave time: " << (t4 - t3) / 1e3 << "usec\n";
-#endif
-    }
-
-    return 0;
-}
-
-}  // namespace OpenGL
-}  // namespace Internal
-}  // namespace Runtime
-}  // namespace Halide
-
-using namespace Halide::Runtime::Internal::OpenGL;
-
-// Find the correct module for the called function
-// TODO: This currently takes O(# of GLSL'd stages) and can
-// be optimized
-WEAK ModuleState *find_module(const char *stage_name) {
-    ModuleState *state_ptr = state_list;
-
-    while (state_ptr != nullptr) {
-        KernelInfo *kernel = state_ptr->kernel;
-        if (kernel && strcmp(stage_name, kernel->name) == 0) {
-            return state_ptr;
-        }
-        state_ptr = state_ptr->next;
-    }
-
-    return nullptr;
-}
-
-//  Create wrappers that satisfy old naming conventions
-
-extern "C" {
-
-WEAK int halide_opengl_run(void *user_context,
-                           void *state_ptr,
-                           const char *entry_name,
-                           int blocksX, int blocksY, int blocksZ,
-                           int threadsX, int threadsY, int threadsZ,
-                           int shared_mem_bytes,
-                           size_t arg_sizes[], void *args[], int8_t is_buffer[],
-                           int num_padded_attributes,
-                           float *vertex_buffer,
-                           int num_coords_dim0,
-                           int num_coords_dim1) {
-    if (!global_state.initialized) {
-        error(user_context) << "OpenGL runtime not initialized (halide_opengl_run).";
-        return 1;
-    }
-
-    GLStateSaver state_saver;
-
-    // Find the right module
-    ModuleState *mod = find_module(entry_name);
-    if (!mod) {
-        error(user_context) << "Internal error: module state for stage " << entry_name << " not found\n";
-        return 1;
-    }
-
-    KernelInfo *kernel = mod->kernel;
-
-    global_state.UseProgram(kernel->program_id);
-    if (global_state.CheckAndReportError(user_context, "halide_opengl_run UseProgram")) {
-        return 1;
-    }
-
-    // TODO(abstephensg) it would be great to codegen these vec4 uniform buffers
-    // directly, instead of passing an array of arguments and then copying them
-    // out at runtime.
-
-    // Determine the number of float and int uniform parameters. This code
-    // follows the argument packing convention in CodeGen_GPU_Host and
-    // CodeGen_OpenGL_Dev
-    int num_uniform_floats = 0;
-    int num_uniform_ints = 0;
-
-    Argument *kernel_arg = kernel->arguments;
-    for (int i = 0; args[i]; i++, kernel_arg = kernel_arg->next) {
-
-        // Check for a mismatch between the number of arguments declared in the
-        // fragment shader source header and the number passed to this function
-        if (!kernel_arg) {
-            error(user_context)
-                << "Too many arguments passed to halide_opengl_run\n"
-                << "Argument " << i << ": size=" << i << " value=" << args[i];
-            return 1;
-        }
-
-        // Count the number of float and int uniform parameters.
-        if (kernel_arg->kind == Argument::Uniform) {
-            switch (kernel_arg->type) {
-            case Argument::Float:
-            // Integer parameters less than 32 bits wide are passed as
-            // normalized float values
-            case Argument::Int8:
-            case Argument::UInt8:
-            case Argument::Int16:
-            case Argument::UInt16:
-                ++num_uniform_floats;
-                break;
-            case Argument::Bool:
-            case Argument::Int32:
-            case Argument::UInt32:
-                ++num_uniform_ints;
-                break;
-            default:
-                error(user_context) << "GLSL: Encountered invalid kernel argument type";
-                return 1;
-            }
-        }
-    }
-
-    // Pad up to a multiple of four
-    int num_padded_uniform_floats = (num_uniform_floats + 0x3) & ~0x3;
-    int num_padded_uniform_ints = (num_uniform_ints + 0x3) & ~0x3;
-
-    // Allocate storage for the packed arguments
-    float uniform_float[num_padded_uniform_floats];
-    int uniform_int[num_padded_uniform_ints];
-
-    bool bind_render_targets = true;
-
-    // Copy input arguments to corresponding GLSL uniforms.
-    GLint num_active_textures = 0;
-    int uniform_float_idx = 0;
-    int uniform_int_idx = 0;
-
-    kernel_arg = kernel->arguments;
-    for (int i = 0; args[i]; i++, kernel_arg = kernel_arg->next) {
-
-        if (kernel_arg->kind == Argument::Outbuf) {
-            halide_assert(user_context, is_buffer[i] && "OpenGL Outbuf argument is not a buffer.");
-            // Check if the output buffer will be bound by the client instead of
-            // the Halide runtime
-            uint64_t handle = ((halide_buffer_t *)args[i])->device;
-            if (!handle) {
-                error(user_context) << "GLSL: Encountered invalid nullptr dev pointer";
-                return 1;
-            }
-            if (handle == HALIDE_OPENGL_RENDER_TARGET) {
-                bind_render_targets = false;
-            }
-            // Outbuf textures are handled explicitly below
-            continue;
-        } else if (kernel_arg->kind == Argument::Inbuf) {
-            halide_assert(user_context, is_buffer[i] && "OpenGL Inbuf argument is not a buffer.");
-            GLint loc =
-                global_state.GetUniformLocation(kernel->program_id, kernel_arg->name);
-            if (global_state.CheckAndReportError(user_context, "halide_opengl_run GetUniformLocation(InBuf)")) {
-                return 1;
-            }
-            if (loc == -1) {
-                error(user_context) << "No sampler defined for input texture.";
-                return 1;
-            }
-            uint64_t handle = ((halide_buffer_t *)args[i])->device;
-            if (!handle) {
-                error(user_context) << "GLSL: Encountered invalid nullptr dev pointer";
-                return 1;
-            }
-            global_state.ActiveTexture(GL_TEXTURE0 + num_active_textures);
-            global_state.BindTexture(GL_TEXTURE_2D, handle == HALIDE_OPENGL_RENDER_TARGET ? 0 : (GLuint)handle);
-            global_state.Uniform1iv(loc, 1, &num_active_textures);
-
-            // Textures not created by the Halide runtime might not have
-            // parameters set, or might have had parameters set differently
-            global_state.TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
-            global_state.TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
-            global_state.TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
-            global_state.TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
-
-            num_active_textures++;
-            // TODO: check maximum number of active textures
-        } else if (kernel_arg->kind == Argument::Uniform) {
-            // Copy the uniform parameter into the packed scalar list
-            // corresponding to its type.
-
-            // Note: small integers are represented as floats in GLSL.
-            switch (kernel_arg->type) {
-            case Argument::Float:
-                uniform_float[uniform_float_idx++] = *(float *)args[i];
-                break;
-            case Argument::Bool:
-                uniform_int[uniform_int_idx++] = *((bool *)args[i]) ? 1 : 0;
-                break;
-            case Argument::Int8:
-                uniform_float[uniform_float_idx++] = *((int8_t *)args[i]);
-                break;
-            case Argument::UInt8:
-                uniform_float[uniform_float_idx++] = *((uint8_t *)args[i]);
-                break;
-            case Argument::Int16: {
-                uniform_float[uniform_float_idx++] = *((int16_t *)args[i]);
-                break;
-            }
-            case Argument::UInt16: {
-                uniform_float[uniform_float_idx++] = *((uint16_t *)args[i]);
-                break;
-            }
-            case Argument::Int32: {
-                uniform_int[uniform_int_idx++] = *((int32_t *)args[i]);
-                break;
-            }
-            case Argument::UInt32: {
-                uint32_t value = *((uint32_t *)args[i]);
-                if (value > 0x7fffffff) {
-                    error(user_context)
-                        << "OpenGL: argument '" << kernel_arg->name << "' is too large for GLint";
-                    return -1;
-                }
-                uniform_int[uniform_int_idx++] = static_cast<GLint>(value);
-                break;
-            }
-            case Argument::Void:
-                error(user_context) << "OpenGL: Encountered invalid kernel argument type";
-                return 1;
-            }
-        }
-    }
-
-    if (kernel_arg) {
-        error(user_context) << "Too few arguments passed to halide_opengl_run";
-        return 1;
-    }
-
-    // Set the packed uniform int parameters
-    for (int idx = 0; idx != num_padded_uniform_ints; idx += 4) {
-
-        // Produce the uniform parameter name without using the std library.
-        Printer<StringStreamPrinter, 16> name(user_context);
-        name << "_uniformi" << (idx / 4);
-
-        GLint loc = global_state.GetUniformLocation(kernel->program_id, name.str());
-        if (global_state.CheckAndReportError(user_context, "halide_opengl_run GetUniformLocation")) {
-            return 1;
-        }
-        if (loc == -1) {
-            // Argument was probably optimized away by GLSL compiler.
-            continue;
-        }
-
-        global_state.Uniform4iv(loc, 1, &uniform_int[idx]);
-    }
-
-    // Set the packed uniform float parameters
-    for (int idx = 0; idx != num_padded_uniform_floats; idx += 4) {
-
-        // Produce the uniform parameter name without using the std library.
-        Printer<StringStreamPrinter, 16> name(user_context);
-        name << "_uniformf" << (idx / 4);
-
-        GLint loc = global_state.GetUniformLocation(kernel->program_id, name.str());
-        if (global_state.CheckAndReportError(user_context, "halide_opengl_run GetUniformLocation(2)")) {
-            return 1;
-        }
-        if (loc == -1) {
-            // Argument was probably optimized away by GLSL compiler.
-            continue;
-        }
-
-        global_state.Uniform4fv(loc, 1, &uniform_float[idx]);
-    }
-
-    // Prepare framebuffer for rendering to output textures.
-    GLint output_min[2] = {0, 0};
-    GLint output_extent[2] = {0, 0};
-
-    if (bind_render_targets) {
-        global_state.BindFramebuffer(GL_FRAMEBUFFER, global_state.framebuffer_id);
-    }
-
-    global_state.Disable(GL_CULL_FACE);
-    global_state.Disable(GL_DEPTH_TEST);
-
-    GLint num_output_textures = 0;
-    kernel_arg = kernel->arguments;
-    for (int i = 0; args[i]; i++, kernel_arg = kernel_arg->next) {
-        if (kernel_arg->kind != Argument::Outbuf) {
-            continue;
-        }
-
-        halide_assert(user_context, is_buffer[i] && "OpenGL Outbuf argument is not a buffer.");
-
-        // TODO: GL_MAX_COLOR_ATTACHMENTS
-        if (num_output_textures >= 1) {
-            error(user_context)
-                << "OpenGL ES 2.0 only supports one single output texture";
-            return 1;
-        }
-
-        halide_buffer_t *buf = (halide_buffer_t *)args[i];
-        halide_assert(user_context, buf->dimensions >= 2);
-        uint64_t handle = buf->device;
-        if (!handle) {
-            error(user_context) << "GLSL: Encountered invalid nullptr dev pointer";
-            return 1;
-        }
-        GLuint tex = (handle == HALIDE_OPENGL_RENDER_TARGET) ? 0 : (GLuint)handle;
-
-        // Check to see if the object name is actually a FBO
-        if (bind_render_targets) {
-            debug(user_context)
-                << "Output texture " << num_output_textures << ": " << tex << "\n";
-            global_state.FramebufferTexture2D(GL_FRAMEBUFFER,
-                                              GL_COLOR_ATTACHMENT0 + num_output_textures,
-                                              GL_TEXTURE_2D, tex, 0);
-            if (global_state.CheckAndReportError(user_context, "halide_opengl_run FramebufferTexture2D")) {
-                return 1;
-            }
-        }
-
-        output_min[0] = buf->dim[0].min;
-        output_min[1] = buf->dim[1].min;
-        output_extent[0] = buf->dim[0].extent;
-        output_extent[1] = buf->dim[1].extent;
-        num_output_textures++;
-    }
-    // TODO: GL_MAX_DRAW_BUFFERS
-    if (num_output_textures == 0) {
-        error(user_context) << "halide_opengl_run: kernel has no output\n";
-        // TODO: cleanup
-        return 1;
-    } else if (num_output_textures > 1) {
-        if (global_state.DrawBuffers) {
-            HalideMalloc draw_buffers_tmp(user_context, num_output_textures * sizeof(GLenum));
-            if (!draw_buffers_tmp.ptr) {
-                error(user_context) << "halide_malloc";
-                return 1;
-            }
-            GLenum *draw_buffers = (GLenum *)draw_buffers_tmp.ptr;
-            for (int i = 0; i < num_output_textures; i++) {
-                draw_buffers[i] = GL_COLOR_ATTACHMENT0 + i;
-            }
-            global_state.DrawBuffers(num_output_textures, draw_buffers);
-            if (global_state.CheckAndReportError(user_context, "halide_opengl_run DrawBuffers")) {
-                return 1;
-            }
-        } else {
-            error(user_context) << "halide_opengl_run: kernel has more than one output and DrawBuffers is not available (earlier than GL ES 3.0?).\n";
-            // TODO: cleanup
-            return 1;
-        }
-    }
-
-    if (bind_render_targets) {
-        // Check that framebuffer is set up correctly
-        GLenum status = global_state.CheckFramebufferStatus(GL_FRAMEBUFFER);
-        if (global_state.CheckAndReportError(user_context, "halide_opengl_run CheckFramebufferStatus")) {
-            return 1;
-        }
-        if (status != GL_FRAMEBUFFER_COMPLETE) {
-            error(user_context)
-                << "Setting up GL framebuffer " << global_state.framebuffer_id
-                << " failed (" << status << ")";
-            // TODO: cleanup
-            return 1;
-        }
-    }
-
-    // Set vertex attributes
-    GLint loc = global_state.GetUniformLocation(kernel->program_id, "output_extent");
-    global_state.Uniform2iv(loc, 1, output_extent);
-    if (global_state.CheckAndReportError(user_context, "halide_opengl_run Uniform2iv(output_extent)")) {
-        return 1;
-    }
-    loc = global_state.GetUniformLocation(kernel->program_id, "output_min");
-    global_state.Uniform2iv(loc, 1, output_min);
-    if (global_state.CheckAndReportError(user_context, "halide_opengl_run Uniform2iv(output_min)")) {
-        return 1;
-    }
-
-#if 0  // DEBUG_RUNTIME
-    debug(user_context) << "output_extent: " << output_extent[0] << "," << output_extent[1] << "\n";
-    debug(user_context) << "output_min: " << output_min[0] << "," << output_min[1] << "\n";
-#endif
-
-    // TODO(abestephensg): Sort coordinate dimensions when the linear solver is integrated
-    // Sort the coordinates
-
-    // Construct an element buffer using the sorted vertex order.
-    // Note that this is "width" and "height" of the vertices, not the output image.
-    int width = num_coords_dim0;
-    int height = num_coords_dim1;
-
-    int vertex_buffer_size = width * height * num_padded_attributes;
-
-    int element_buffer_size = (width - 1) * (height - 1) * 6;
-    int element_buffer[element_buffer_size];
-
-    int idx = 0;
-    for (int h = 0; h != (height - 1); ++h) {
-        for (int w = 0; w != (width - 1); ++w) {
-
-            // TODO(abestephensg): Use sorted coordinates when integrated
-            int v = w + h * width;
-            element_buffer[idx++] = v;
-            element_buffer[idx++] = v + 1;
-            element_buffer[idx++] = v + width + 1;
-
-            element_buffer[idx++] = v + width + 1;
-            element_buffer[idx++] = v + width;
-            element_buffer[idx++] = v;
-        }
-    }
-
-#if 0  // DEBUG_RUNTIME
-    debug(user_context) << "Vertex buffer:";
-    for (int i=0;i!=vertex_buffer_size;++i) {
-        if (!(i%num_padded_attributes)) {
-          debug(user_context) << "\n";
-        }
-        debug(user_context) << vertex_buffer[i] << " ";
-    }
-    debug(user_context) << "\n";
-    debug(user_context) << "\n";
-
-    debug(user_context) << "Element buffer:";
-    for (int i=0;i!=element_buffer_size;++i) {
-        if (!(i%3)) {
-            debug(user_context) << "\n";
-        }
-        debug(user_context) << element_buffer[i] << " ";
-    }
-    debug(user_context) << "\n";
-#endif
-
-    // Setup viewport
-    global_state.Viewport(0, 0, output_extent[0], output_extent[1]);
-
-    // Setup the vertex and element buffers
-    GLuint vertex_array_object = 0;
-    if (global_state.have_vertex_array_objects) {
-        global_state.GenVertexArrays(1, &vertex_array_object);
-        global_state.BindVertexArray(vertex_array_object);
-    }
-
-    GLuint vertex_buffer_id;
-    global_state.GenBuffers(1, &vertex_buffer_id);
-    global_state.BindBuffer(GL_ARRAY_BUFFER, vertex_buffer_id);
-    global_state.BufferData(GL_ARRAY_BUFFER, sizeof(float) * vertex_buffer_size, vertex_buffer, GL_STATIC_DRAW);
-    if (global_state.CheckAndReportError(user_context, "halide_opengl_run vertex BufferData et al")) {
-        return 1;
-    }
-
-    GLuint element_buffer_id;
-    global_state.GenBuffers(1, &element_buffer_id);
-    global_state.BindBuffer(GL_ELEMENT_ARRAY_BUFFER, element_buffer_id);
-    global_state.BufferData(GL_ELEMENT_ARRAY_BUFFER, sizeof(float) * element_buffer_size, element_buffer, GL_STATIC_DRAW);
-    if (global_state.CheckAndReportError(user_context, "halide_opengl_run element BufferData et al")) {
-        return 1;
-    }
-
-    // The num_padded_attributes argument is the number of vertex attributes,
-    // including the spatial x and y coordinates, padded up to a multiple of
-    // four so that the attributes may be packed into vec4 slots.
-    int num_packed_attributes = num_padded_attributes / 4;
-
-    // Set up the per vertex attributes
-    GLint attrib_ids[num_packed_attributes];
-
-    for (int i = 0; i != num_packed_attributes; i++) {
-
-        // The attribute names can synthesized by the runtime based on the
-        // number of packed varying attributes
-        Printer<StringStreamPrinter> attribute_name(user_context);
-        attribute_name << "_varyingf" << i << "_attrib";
-
-        // TODO(abstephensg): Switch to glBindAttribLocation
-        GLint attrib_id = global_state.GetAttribLocation(kernel->program_id, attribute_name.buf);
-        attrib_ids[i] = attrib_id;
-
-        // Check to see if the varying attribute was simplified out of the
-        // program by the GLSL compiler.
-        if (attrib_id == -1) {
-            continue;
-        }
-
-        global_state.VertexAttribPointer(attrib_id, 4, GL_FLOAT, GL_FALSE /* Normalized */, sizeof(GLfloat) * num_padded_attributes, (void *)(i * sizeof(GLfloat) * 4));
-        if (global_state.CheckAndReportError(user_context, "halide_opengl_run VertexAttribPointer et al")) {
-            return 1;
-        }
-
-        global_state.EnableVertexAttribArray(attrib_id);
-        if (global_state.CheckAndReportError(user_context, "halide_opengl_run EnableVertexAttribArray et al")) {
-            return 1;
-        }
-    }
-
-    // Draw the scene
-    global_state.DrawElements(GL_TRIANGLES, element_buffer_size, GL_UNSIGNED_INT, nullptr);
-    if (global_state.CheckAndReportError(user_context, "halide_opengl_run DrawElements et al")) {
-        return 1;
-    }
-
-    // Cleanup
-    if (global_state.have_vertex_array_objects) {
-        global_state.DeleteVertexArrays(1, &vertex_array_object);
-    }
-
-    global_state.DeleteBuffers(1, &vertex_buffer_id);
-    global_state.DeleteBuffers(1, &element_buffer_id);
-
-    return 0;
-}
-
-WEAK int halide_opengl_device_sync(void *user_context, struct halide_buffer_t *) {
-    if (!global_state.initialized) {
-        error(user_context) << "OpenGL runtime not initialized (halide_opengl_device_sync).";
-        return 1;
-    }
-#ifdef DEBUG_RUNTIME
-    int64_t t0 = halide_current_time_ns(user_context);
-#endif
-    global_state.Finish();
-#ifdef DEBUG_RUNTIME
-    int64_t t1 = halide_current_time_ns(user_context);
-    debug(user_context) << "halide_opengl_device_sync: took " << (t1 - t0) / 1e3 << "usec\n";
-#endif
-    return 0;
-}
-
-// Called at the beginning of a code block generated by Halide. This function
-// is responsible for setting up the OpenGL environment and compiling the GLSL
-// code into a fragment shader.
-WEAK int halide_opengl_initialize_kernels(void *user_context, void **state_ptr,
-                                          const char *src, int size) {
-    debug(user_context) << "In initialize_kernels\n";
-
-    if (int error = halide_opengl_init(user_context)) {
-        return error;
-    }
-
-    const char *this_kernel = src;
-
-    ModuleState **state = (ModuleState **)state_ptr;
-    ModuleState *module = *state;
-
-    while (this_kernel) {
-        // Find the start of the next kernel
-        const char *next_kernel = strstr(this_kernel + 1, kernel_marker);
-
-        // Use that to compute the length of this kernel
-        int len = 0;
-        if (!next_kernel) {
-            len = strlen(this_kernel);
-        } else {
-            len = next_kernel - this_kernel;
-        }
-
-        // Construct a new ModuleState and add it to the global list
-        module = (ModuleState *)malloc(sizeof(ModuleState));
-        module->kernel = nullptr;
-        module->next = state_list;
-        state_list = module;
-        *state = module;
-
-        KernelInfo *kernel = module->kernel;
-        if (!kernel) {
-            kernel = create_kernel(user_context, this_kernel, len);
-            if (!kernel) {
-                error(user_context) << "Invalid kernel: " << this_kernel;
-                return -1;
-            }
-            module->kernel = kernel;
-        }
-
-        // Create the vertex shader. The runtime will output boilerplate for the
-        // vertex shader based on a fixed program plus arguments obtained from
-        // the comment header passed in the fragment shader. Since there are a
-        // relatively small number of vertices (i.e. usually only four), per-vertex
-        // expressions interpolated by varying attributes are evaluated
-        // by host code on the CPU and passed to the GPU as values in the
-        // vertex buffer.
-        enum { PrinterLength = 1024 * 4 };
-        Printer<StringStreamPrinter, PrinterLength> vertex_src(user_context);
-
-        // Count the number of varying attributes, this is 2 for the spatial
-        // x and y coordinates, plus the number of scalar varying attribute
-        // expressions pulled out of the fragment shader.
-        int num_varying_float = 2;
-
-        for (Argument *arg = kernel->arguments; arg; arg = arg->next) {
-            if (arg->kind == Argument::Varying) {
-                ++num_varying_float;
-            }
-        }
-
-        int num_packed_varying_float = ((num_varying_float + 3) & ~0x3) / 4;
-
-        for (int i = 0; i != num_packed_varying_float; ++i) {
-            vertex_src << "attribute vec4 _varyingf" << i << "_attrib;\n";
-            vertex_src << "varying   vec4 _varyingf" << i << ";\n";
-        }
-
-        vertex_src << "uniform ivec2 output_min;\n"
-                   << "uniform ivec2 output_extent;\n"
-                   << "void main() {\n"
-
-                   // Host codegen always passes the spatial vertex coordinates
-                   // in the first two elements of the _varyingf0_attrib
-                   << "    vec2 position = vec2(_varyingf0_attrib[0], _varyingf0_attrib[1]);\n"
-                   << "    gl_Position = vec4(position, 0.0, 1.0);\n"
-                   << "    vec2 texcoord = 0.5 * position + 0.5;\n"
-                   << "    vec2 pixcoord = texcoord * vec2(output_extent.xy) + vec2(output_min.xy);\n";
-
-        // Copy through all of the varying attributes
-        for (int i = 0; i != num_packed_varying_float; ++i) {
-            vertex_src << "    _varyingf" << i << " = _varyingf" << i << "_attrib;\n";
-        }
-
-        vertex_src << "    _varyingf0.xy = pixcoord;\n";
-
-        vertex_src << "}\n";
-
-        // Check to see if there was sufficient storage for the vertex program.
-        if (vertex_src.size() >= PrinterLength) {
-            error(user_context) << "Vertex shader source truncated";
-            return 1;
-        }
-
-        // Initialize vertex shader.
-        GLuint vertex_shader_id = make_shader(user_context,
-                                              GL_VERTEX_SHADER, vertex_src.buf, nullptr);
-        if (vertex_shader_id == 0) {
-            halide_error(user_context, "Failed to create vertex shader");
-            return 1;
-        }
-
-        // Create the fragment shader
-        GLuint fragment_shader_id = make_shader(user_context, GL_FRAGMENT_SHADER,
-                                                kernel->source, nullptr);
-        // Link GLSL program
-        GLuint program = global_state.CreateProgram();
-        global_state.AttachShader(program, vertex_shader_id);
-        global_state.AttachShader(program, fragment_shader_id);
-        global_state.LinkProgram(program);
-
-        // Release the individual shaders
-        global_state.DeleteShader(vertex_shader_id);
-        global_state.DeleteShader(fragment_shader_id);
-
-        GLint status;
-        global_state.GetProgramiv(program, GL_LINK_STATUS, &status);
-        if (!status) {
-            GLint log_len;
-            global_state.GetProgramiv(program, GL_INFO_LOG_LENGTH, &log_len);
-            HalideMalloc log_tmp(user_context, log_len);
-            if (log_tmp.ptr) {
-                char *log = (char *)log_tmp.ptr;
-                global_state.GetProgramInfoLog(program, log_len, nullptr, log);
-                debug(user_context) << "Could not link GLSL program:\n"
-                                    << log << "\n";
-            }
-            global_state.DeleteProgram(program);
-            return -1;
-        }
-        kernel->program_id = program;
-
-        this_kernel = next_kernel;
-    }
-    return 0;
-}
-
-WEAK int halide_opengl_device_and_host_malloc(void *user_context, struct halide_buffer_t *buf) {
-    return halide_default_device_and_host_malloc(user_context, buf, &opengl_device_interface);
-}
-
-WEAK int halide_opengl_device_and_host_free(void *user_context, struct halide_buffer_t *buf) {
-    return halide_default_device_and_host_free(user_context, buf, &opengl_device_interface);
-}
-
-WEAK const halide_device_interface_t *halide_opengl_device_interface() {
-    return &opengl_device_interface;
-}
-
-WEAK void halide_opengl_context_lost(void *user_context) {
-    if (!global_state.initialized) {
-        return;
-    }
-
-    debug(user_context) << "halide_opengl_context_lost\n";
-    for (ModuleState *mod = state_list; mod; mod = mod->next) {
-        // Reset program handle to force recompilation.
-        mod->kernel->program_id = 0;
-    }
-
-    global_state.init();
-}
-
-WEAK int halide_opengl_wrap_texture(void *user_context, halide_buffer_t *buf, uint64_t texture_id) {
-    if (!global_state.initialized) {
-        if (int error = halide_opengl_init(user_context)) {
-            return error;
-        }
-    }
-    if (texture_id == 0) {
-        error(user_context) << "Texture " << texture_id << " is not a valid texture name.";
-        return -3;
-    }
-    halide_assert(user_context, buf->device == 0);
-    if (buf->device != 0) {
-        return -2;
-    }
-    buf->device = texture_id;
-    buf->device_interface = &opengl_device_interface;
-    buf->device_interface->impl->use_module();
-    return 0;
-}
-
-WEAK int halide_opengl_wrap_render_target(void *user_context, halide_buffer_t *buf) {
-    if (!global_state.initialized) {
-        if (int error = halide_opengl_init(user_context)) {
-            return error;
-        }
-    }
-    halide_assert(user_context, buf->device == 0);
-    if (buf->device != 0) {
-        return -2;
-    }
-    buf->device = HALIDE_OPENGL_RENDER_TARGET;
-    buf->device_interface = &opengl_device_interface;
-    buf->device_interface->impl->use_module();
-    return 0;
-}
-
-WEAK int halide_opengl_detach_texture(void *user_context, halide_buffer_t *buf) {
-    if (buf->device == 0) {
-        return 0;
-    }
-
-    halide_assert(user_context, buf->device_interface == &opengl_device_interface);
-    buf->device = 0;
-    buf->device_interface->impl->release_module();
-    buf->device_interface = nullptr;
-    return 0;
-}
-
-WEAK uintptr_t halide_opengl_get_texture(void *user_context, halide_buffer_t *buf) {
-    if (buf->device == 0) {
-        return 0;
-    }
-    halide_assert(user_context, buf->device_interface == &opengl_device_interface);
-    uint64_t handle = buf->device;
-    // client_bound always return 0 here.
-    return handle == HALIDE_OPENGL_RENDER_TARGET ? 0 : (uintptr_t)handle;
-}
-
-namespace {
-WEAK __attribute__((destructor)) void halide_opengl_cleanup() {
-    halide_opengl_device_release(nullptr);
-}
-}  // namespace
-
-}  // extern "C"
-
-namespace Halide {
-namespace Runtime {
-namespace Internal {
-namespace OpenGL {
-
-WEAK halide_device_interface_impl_t opengl_device_interface_impl = {
-    halide_use_jit_module,
-    halide_release_jit_module,
-    halide_opengl_device_malloc,
-    halide_opengl_device_free,
-    halide_opengl_device_sync,
-    halide_opengl_device_release,
-    halide_opengl_copy_to_host,
-    halide_opengl_copy_to_device,
-    halide_opengl_device_and_host_malloc,
-    halide_opengl_device_and_host_free,
-    halide_default_buffer_copy,
-    halide_default_device_crop,
-    halide_default_device_slice,
-    halide_default_device_release_crop,
-    halide_opengl_wrap_texture,
-    halide_opengl_detach_texture};
-
-WEAK halide_device_interface_t opengl_device_interface = {
-    halide_device_malloc,
-    halide_device_free,
-    halide_device_sync,
-    halide_device_release,
-    halide_copy_to_host,
-    halide_copy_to_device,
-    halide_device_and_host_malloc,
-    halide_device_and_host_free,
-    halide_buffer_copy,
-    halide_device_crop,
-    halide_device_slice,
-    halide_device_release_crop,
-    halide_device_wrap_native,
-    halide_device_detach_native,
-    nullptr,
-    &opengl_device_interface_impl};
-
-}  // namespace OpenGL
-}  // namespace Internal
-}  // namespace Runtime
-}  // namespace Halide
diff --git a/src/runtime/runtime_api.cpp b/src/runtime/runtime_api.cpp
index 230c907721d0..7b38a15caefe 100644
--- a/src/runtime/runtime_api.cpp
+++ b/src/runtime/runtime_api.cpp
@@ -7,7 +7,6 @@
 #include "HalideRuntimeHexagonHost.h"
 #include "HalideRuntimeMetal.h"
 #include "HalideRuntimeOpenCL.h"
-#include "HalideRuntimeOpenGL.h"
 #include "HalideRuntimeOpenGLCompute.h"
 #include "HalideRuntimeQurt.h"
 #include "cpu_features.h"
@@ -148,16 +147,8 @@ extern "C" __attribute__((used)) void *halide_runtime_api_functions[] = {
     (void *)&halide_opencl_set_device_type,
     (void *)&halide_opencl_set_platform_name,
     (void *)&halide_opencl_wrap_cl_mem,
-    (void *)&halide_opengl_context_lost,
     (void *)&halide_opengl_create_context,
-    (void *)&halide_opengl_detach_texture,
-    (void *)&halide_opengl_device_interface,
     (void *)&halide_opengl_get_proc_address,
-    (void *)&halide_opengl_get_texture,
-    (void *)&halide_opengl_initialize_kernels,
-    (void *)&halide_opengl_run,
-    (void *)&halide_opengl_wrap_render_target,
-    (void *)&halide_opengl_wrap_texture,
     (void *)&halide_openglcompute_device_interface,
     (void *)&halide_openglcompute_initialize_kernels,
     (void *)&halide_openglcompute_run,
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index dee66bc4dd21..c5147af5adc7 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -34,11 +34,6 @@ if (WITH_TEST_PERFORMANCE)
     add_subdirectory(performance)
 endif ()
 
-option(WITH_TEST_OPENGL "Build OpenGL tests" OFF)
-if (WITH_TEST_OPENGL)
-    add_subdirectory(opengl)
-endif ()
-
 option(WITH_TEST_GENERATOR "Build generator tests" ON)
 if (WITH_TEST_GENERATOR)
     add_subdirectory(generator)
diff --git a/test/correctness/device_buffer_copy.cpp b/test/correctness/device_buffer_copy.cpp
index cff35c8a11f1..c83a079eb151 100644
--- a/test/correctness/device_buffer_copy.cpp
+++ b/test/correctness/device_buffer_copy.cpp
@@ -214,8 +214,7 @@ int main(int argc, char **argv) {
     // Test copying between different device APIs. Probably will not
     // run on test infrastructure as we do not configure more than one
     // GPU API at a time. For now, special case CUDA and OpenCL as these are
-    // the most likely to be supported together. (OpenGL would be a candidate
-    // but buffer_copy support needs to be added.)
+    // the most likely to be supported together.
     if (target.has_feature(Target::CUDA) && target.has_feature(Target::OpenCL)) {
         printf("Test cross device copy device to device.\n");
         {
diff --git a/test/correctness/gpu_multi_device.cpp b/test/correctness/gpu_multi_device.cpp
index b92a1b37ae22..b9a872f33af2 100644
--- a/test/correctness/gpu_multi_device.cpp
+++ b/test/correctness/gpu_multi_device.cpp
@@ -39,16 +39,6 @@ struct MultiDevicePipeline {
                 .gpu_tile(x, y, xi, yi, 8, 8, TailStrategy::Auto, DeviceAPI::Metal);
             current_stage++;
         }
-        if (jit_target.has_feature(Target::OpenGL)) {
-            stage[current_stage](x, y, c) = stage[current_stage - 1](x, y, c) + 69;
-            stage[current_stage]
-                .compute_root()
-                .bound(c, 0, 3)
-                .reorder(c, x, y)
-                .glsl(x, y, c)
-                .vectorize(c);
-            current_stage++;
-        }
         if (jit_target.has_feature(Target::OpenGLCompute)) {
             stage[current_stage](x, y, c) = stage[current_stage - 1](x, y, c) + 69;
             stage[current_stage]
diff --git a/test/correctness/plain_c_includes.c b/test/correctness/plain_c_includes.c
index 18529c77a8fb..65a436014cbd 100644
--- a/test/correctness/plain_c_includes.c
+++ b/test/correctness/plain_c_includes.c
@@ -10,7 +10,6 @@
 #include "HalideRuntimeHexagonHost.h"
 #include "HalideRuntimeMetal.h"
 #include "HalideRuntimeOpenCL.h"
-#include "HalideRuntimeOpenGL.h"
 #include "HalideRuntimeOpenGLCompute.h"
 #include "HalideRuntimeQurt.h"
 
diff --git a/test/correctness/target.cpp b/test/correctness/target.cpp
index 64060606d0e5..7c575c5233ee 100644
--- a/test/correctness/target.cpp
+++ b/test/correctness/target.cpp
@@ -52,10 +52,10 @@ int main(int argc, char **argv) {
     // Full specification round-trip, crazy features
     t1 = Target(Target::Android, Target::ARM, 32,
                 {Target::JIT, Target::SSE41, Target::AVX, Target::AVX2,
-                 Target::CUDA, Target::OpenCL, Target::OpenGL, Target::OpenGLCompute,
+                 Target::CUDA, Target::OpenCL, Target::OpenGLCompute,
                  Target::Debug});
     ts = t1.to_string();
-    if (ts != "arm-32-android-avx-avx2-cuda-debug-jit-opencl-opengl-openglcompute-sse41") {
+    if (ts != "arm-32-android-avx-avx2-cuda-debug-jit-opencl-openglcompute-sse41") {
         printf("to_string failure: %s\n", ts.c_str());
         return -1;
     }
diff --git a/test/opengl/CMakeLists.txt b/test/opengl/CMakeLists.txt
deleted file mode 100644
index b38c20a3a36a..000000000000
--- a/test/opengl/CMakeLists.txt
+++ /dev/null
@@ -1,28 +0,0 @@
-if (TARGET OpenGL::GL)
-    tests(GROUPS opengl
-          SOURCES
-          conv_select.cpp
-          copy_pixels.cpp
-          copy_to_device.cpp
-          copy_to_host.cpp
-          float_texture.cpp
-          inline_reduction.cpp
-          internal.cpp
-          lut.cpp
-          multiple_stages.cpp
-          produce.cpp
-          rewrap_texture.cpp
-          save_state.cpp
-          select.cpp
-          set_pixels.cpp
-          shifted_domains.cpp
-          special_funcs.cpp
-          sumcolor_reduction.cpp
-          sum_reduction.cpp
-          tuples.cpp
-          varying.cpp
-          )
-    foreach (test_name IN LISTS TEST_NAMES)
-        target_link_libraries("${test_name}" PRIVATE OpenGL::GL)
-    endforeach ()
-endif ()
diff --git a/test/opengl/conv_select.cpp b/test/opengl/conv_select.cpp
deleted file mode 100644
index 735c752d06f0..000000000000
--- a/test/opengl/conv_select.cpp
+++ /dev/null
@@ -1,50 +0,0 @@
-// test case provided by Lee Yuguang
-
-#include "Halide.h"
-#include <stdio.h>
-
-#include "testing.h"
-
-using namespace Halide;
-
-int main() {
-    // This test must be run with an OpenGL target.
-    const Target target = get_jit_target_from_environment().with_feature(Target::OpenGL);
-
-    // Define the input
-    const int width = 10, height = 10, channels = 4, res_channels = 2;
-    Buffer<float> input(width, height, channels);
-    input.fill([](int x, int y, int c) {
-        return float(x + y);
-    });
-
-    // Define the algorithm.
-    Var x, y, c;
-    RDom r(0, 2, "r");
-    Func f, g;
-
-    Expr coordx = clamp(x + r, 0, input.width() - 1);
-    f(x, y, c) = cast<float>(sum(input(coordx, y, c)));
-
-    Expr R = select(f(x, y, c) > 9.0f, 1.0f, 0.0f);
-    Expr G = select(f(x, y, c) > 9.0f, 0.f, 1.0f);
-    g(x, y, c) = mux(c, {R, G});
-
-    // Schedule f and g to compute in separate passes on the GPU.
-    g.bound(c, 0, 2).glsl(x, y, c);
-
-    // Generate the result.
-    Buffer<float> result = g.realize(width, height, res_channels, target);
-    result.copy_to_host();
-
-    //Check the result.
-    if (!Testing::check_result<float>(result, [](int x, int y, int c) {
-            const float temp = ((x + y) > 4) ? 1.0f : 0.0f;
-            return (c == 0) ? temp : (1.0f - temp);
-        })) {
-        return 1;
-    }
-
-    printf("Success!\n");
-    return 0;
-}
diff --git a/test/opengl/copy_pixels.cpp b/test/opengl/copy_pixels.cpp
deleted file mode 100644
index 97cacecd32e1..000000000000
--- a/test/opengl/copy_pixels.cpp
+++ /dev/null
@@ -1,33 +0,0 @@
-#include "Halide.h"
-#include <stdio.h>
-
-#include "testing.h"
-
-using namespace Halide;
-
-int main() {
-    // This test must be run with an OpenGL target.
-    const Target target = get_jit_target_from_environment().with_feature(Target::OpenGL);
-
-    Buffer<uint8_t> input(255, 10, 3);
-    input.fill([](int x, int y, int c) {
-        return 10 * x + y + c;
-    });
-
-    Var x, y, c;
-    Func g;
-    g(x, y, c) = input(x, y, c);
-
-    Buffer<uint8_t> out(255, 10, 3);
-    g.bound(c, 0, 3);
-    g.glsl(x, y, c);
-    g.realize(out, target);
-    out.copy_to_host();
-
-    if (!Testing::check_result<uint8_t>(out, [&](int x, int y, int c) { return input(x, y, c); })) {
-        return 1;
-    }
-
-    printf("Success!\n");
-    return 0;
-}
diff --git a/test/opengl/copy_to_device.cpp b/test/opengl/copy_to_device.cpp
deleted file mode 100644
index 0feedf5895c8..000000000000
--- a/test/opengl/copy_to_device.cpp
+++ /dev/null
@@ -1,38 +0,0 @@
-#include "Halide.h"
-
-#include "testing.h"
-
-using namespace Halide;
-
-// Test that internal allocations work correctly with copy_to_device.
-// This requires that suitable halide_buffer_t objects are created internally.
-int main() {
-    // This test must be run with an OpenGL target.
-    const Target target = get_jit_target_from_environment().with_feature(Target::OpenGL);
-
-    Buffer<uint8_t> input(255, 10, 3);
-    input.fill([](int x, int y, int c) {
-        return 10 * x + y + c;
-    });
-
-    Var x, y, c;
-    Func g, h;
-    h(x, y, c) = input(x, y, c);
-    h.compute_root();  // force internal allocation of h
-
-    // access h from shader to trigger copy_to_device operation
-    g(x, y, c) = h(x, y, c);
-    g.bound(c, 0, 3);
-    g.glsl(x, y, c);
-
-    Buffer<uint8_t> out(255, 10, 3);
-    g.realize(out, target);
-    out.copy_to_host();
-
-    if (!Testing::check_result<uint8_t>(out, [&](int x, int y, int c) { return input(x, y, c); })) {
-        return 1;
-    }
-
-    printf("Success!\n");
-    return 0;
-}
diff --git a/test/opengl/copy_to_host.cpp b/test/opengl/copy_to_host.cpp
deleted file mode 100644
index c03759065a9d..000000000000
--- a/test/opengl/copy_to_host.cpp
+++ /dev/null
@@ -1,39 +0,0 @@
-#include "Halide.h"
-#include <stdio.h>
-
-#include "testing.h"
-
-using namespace Halide;
-
-int main() {
-    // This test must be run with an OpenGL target.
-    const Target target = get_jit_target_from_environment().with_feature(Target::OpenGL);
-
-    Func gpu("gpu"), cpu("cpu");
-    Var x, y, c;
-
-    // Fill buffer using GLSL
-    gpu(x, y, c) = cast<uint8_t>(mux(c, {10 * x + y, 127, 12}));
-    gpu.bound(c, 0, 3);
-    gpu.glsl(x, y, c);
-    gpu.compute_root();
-
-    // This should trigger a copy_to_host operation
-    cpu(x, y, c) = gpu(x, y, c);
-
-    Buffer<uint8_t> out(10, 10, 3);
-    cpu.realize(out, target);
-
-    if (!Testing::check_result<uint8_t>(out, [&](int x, int y, int c) {
-            switch (c) {
-                case 0: return 10*x+y;
-                case 1: return 127;
-                case 2: return 12;
-                default: return -1;
-            } })) {
-        return 1;
-    }
-
-    printf("Success!\n");
-    return 0;
-}
diff --git a/test/opengl/float_texture.cpp b/test/opengl/float_texture.cpp
deleted file mode 100644
index 166863d559ea..000000000000
--- a/test/opengl/float_texture.cpp
+++ /dev/null
@@ -1,37 +0,0 @@
-#include "Halide.h"
-#include <stdio.h>
-
-#include "testing.h"
-
-using namespace Halide;
-
-int main() {
-    // This test must be run with an OpenGL target.
-    const Target target = get_jit_target_from_environment().with_feature(Target::OpenGL);
-
-    Buffer<float> input(255, 255, 3);
-    input.fill([](int x, int y, int c) {
-        // Note: the following values can be >1.0f to test whether
-        // OpenGL performs clamping operations as part of the copy
-        // operation.  (It may do so if something other than floats
-        // are stored in the actual texture.)
-        return (10 * x + y + c);
-    });
-
-    Var x, y, c;
-    Func g;
-    g(x, y, c) = input(x, y, c);
-
-    Buffer<float> out(255, 255, 3);
-    g.bound(c, 0, 3);
-    g.glsl(x, y, c);
-    g.realize(out, target);
-    out.copy_to_host();
-
-    if (!Testing::check_result<float>(out, [&](int x, int y, int c) { return input(x, y, c); })) {
-        return 1;
-    }
-
-    printf("Success!\n");
-    return 0;
-}
diff --git a/test/opengl/inline_reduction.cpp b/test/opengl/inline_reduction.cpp
deleted file mode 100644
index 6630145e284f..000000000000
--- a/test/opengl/inline_reduction.cpp
+++ /dev/null
@@ -1,27 +0,0 @@
-#include "Halide.h"
-#include <stdio.h>
-
-#include "testing.h"
-
-using namespace Halide;
-
-int main() {
-    // This test must be run with an OpenGL target.
-    const Target target = get_jit_target_from_environment().with_feature(Target::OpenGL);
-
-    Func f;
-    Var x, y, c;
-    RDom r(0, 10);
-    f(x, y, c) = sum(cast<float>(r));
-    f.bound(c, 0, 3).glsl(x, y, c);
-
-    Buffer<float> result = f.realize(100, 100, 3, target);
-
-    if (!Testing::check_result<float>(result, [&](int x, int y, int c) { return 45; })) {
-        return 1;
-    }
-
-    printf("Success!\n");
-
-    return 0;
-}
diff --git a/test/opengl/internal.cpp b/test/opengl/internal.cpp
deleted file mode 100644
index e1ce9c34ed5b..000000000000
--- a/test/opengl/internal.cpp
+++ /dev/null
@@ -1,10 +0,0 @@
-#include "../../src/CodeGen_OpenGL_Dev.h"
-
-using namespace Halide;
-using namespace Halide::Internal;
-
-int main() {
-    CodeGen_GLSL::test();
-
-    return 0;
-}
diff --git a/test/opengl/lut.cpp b/test/opengl/lut.cpp
deleted file mode 100644
index d51f7f1f8bf6..000000000000
--- a/test/opengl/lut.cpp
+++ /dev/null
@@ -1,76 +0,0 @@
-
-#include "Halide.h"
-#include <stdio.h>
-
-#include "testing.h"
-
-using namespace Halide;
-
-// This test creates two input images and uses one to perform a dependent lookup
-// into the other.
-
-int test_lut1d() {
-
-    // This test must be run with an OpenGL target.
-    const Target target = get_jit_target_from_environment().with_feature(Target::OpenGL);
-
-    Var x("x");
-    Var y("y");
-    Var c("c");
-
-    Buffer<uint8_t> input(8, 8, 3);
-    input.fill([](int x, int y, int c) {
-        const float v = (1.0f / 16.0f) + (float)x / 8.0f;
-        switch (c) {
-        case 0:
-            return (uint8_t)(v * 255.0f);
-        case 1:
-            return (uint8_t)((1.0f - v) * 255.0f);
-        default:
-            return (uint8_t)((v > 0.5 ? 1.0 : 0.0) * 255.0f);
-        }
-    });
-
-    // 1D Look Up Table case
-    Buffer<float> lut1d(8, 1, 3);
-    for (int c = 0; c != 3; ++c) {
-        for (int i = 0; i != 8; ++i) {
-            lut1d(i, 0, c) = (float)(1 + i);
-        }
-    }
-
-    Func f0("f");
-    Expr e = cast<int>(8.0f * cast<float>(input(x, y, c)) / 255.0f);
-
-    f0(x, y, c) = lut1d(clamp(e, 0, 7), 0, c);
-
-    Buffer<float> out0(8, 8, 3);
-
-    f0.bound(c, 0, 3);
-    f0.glsl(x, y, c);
-    f0.realize(out0, target);
-    out0.copy_to_host();
-
-    if (!Testing::check_result<float>(out0, [](int x, int y, int c) {
-            switch (c) {
-                case 0: return  (float)(1 + x);
-                case 1: return (float)(8 - x);
-                case 2: return (x > 3) ? 8.0f : 1.0f;
-                default: return std::numeric_limits<float>::infinity();
-            } })) {
-        return 1;
-    }
-
-    return 0;
-}
-
-int main() {
-
-    if (test_lut1d() == 0) {
-        printf("Success!\n");
-    } else {
-        printf("FAILED\n");
-    }
-
-    return 0;
-}
diff --git a/test/opengl/multiple_stages.cpp b/test/opengl/multiple_stages.cpp
deleted file mode 100644
index f5bac6b8b197..000000000000
--- a/test/opengl/multiple_stages.cpp
+++ /dev/null
@@ -1,44 +0,0 @@
-#include "Halide.h"
-#include <stdio.h>
-
-#include "testing.h"
-
-using namespace Halide;
-
-int main() {
-
-    // This test must be run with an OpenGL target.
-    const Target target = get_jit_target_from_environment().with_feature(Target::OpenGL);
-
-    Func f, g, h;
-    Var x, y, c;
-    g(x, y, c) = cast<uint8_t>(x);
-    h(x, y, c) = 1 + g(x, y, c);
-    f(x, y, c) = h(x, y, c) + cast<uint8_t>(y);
-    f.bound(c, 0, 3).glsl(x, y, c);
-    h.bound(c, 0, 3).compute_root();
-    g.bound(c, 0, 3).compute_root().glsl(x, y, c);
-
-    Buffer<uint8_t> result = f.realize(10, 10, 3, target);
-    result.copy_to_host();
-
-    if (!Testing::check_result<uint8_t>(result, [&](int i, int j, int k) { return i + j + 1; })) {
-        return 1;
-    }
-
-    Func f2, g2;
-    f2(x, y, c) = cast<float>(x);
-    g2(x, y, c) = f2(x, y, c) + cast<float>(y);
-
-    f2.bound(c, 0, 3).glsl(x, y, c).compute_root();
-    g2.bound(c, 0, 3).glsl(x, y, c);
-
-    Buffer<float> result2 = g2.realize(10, 10, 3, target);
-    if (!Testing::check_result<float>(result2, 0.01f, [&](int i, int j, int k) { return (float)(i + j); })) {
-        return 1;
-    }
-
-    printf("Success!\n");
-
-    return 0;
-}
diff --git a/test/opengl/produce.cpp b/test/opengl/produce.cpp
deleted file mode 100644
index 002f9ec89045..000000000000
--- a/test/opengl/produce.cpp
+++ /dev/null
@@ -1,70 +0,0 @@
-#include "Halide.h"
-#include <stdio.h>
-
-#include "testing.h"
-
-using namespace Halide;
-
-// This test creates two input images and uses one to perform a dependent lookup
-// into the other. The lookup table is produced using a Halide func scheduled
-// on the host.
-
-int test_lut1d() {
-
-    // This test must be run with an OpenGL target.
-    const Target target = get_jit_target_from_environment().with_feature(Target::OpenGL);
-
-    Var x("x");
-    Var y("y");
-    Var c("c");
-
-    Buffer<uint8_t> input(8, 8, 3);
-    input.fill([](int x, int y, int c) {
-            float v = (1.0f / 16.0f) + (float)x / 8.0f;
-	    switch (c) {
-	    case 0: return (uint8_t)(v * 255.0f);
-	    case 1: return (uint8_t)((1.0f - v) * 255.0f);
-	    default: return (uint8_t)((v > 0.5 ? 1.0 : 0.0) * 255.0f);
-        } });
-
-    // 1D Look Up Table case
-    Func lut1d("lut1d");
-    lut1d(x) = cast<float>(1 + x);
-
-    Func f0("f");
-    Expr e = cast<int>(8.0f * cast<float>(input(x, y, c)) / 255.0f);
-
-    f0(x, y, c) = lut1d(clamp(e, 0, 7));
-    lut1d.compute_root();
-
-    f0.bound(c, 0, 3);
-    f0.glsl(x, y, c);
-
-    Buffer<float> out0(8, 8, 3);
-    f0.realize(out0, target);
-
-    out0.copy_to_host();
-
-    if (!Testing::check_result<float>(out0, [](int x, int y, int c) {
-	    switch (c) {
-                case 0: return (float)(1 + x);
-                case 1: return (float)(8 - x);
-                case 2: return (x > 3) ? 8.0f : 1.0f;
-		default: return -1.0f;
-	    } })) {
-        return 1;
-    }
-
-    return 0;
-}
-
-int main() {
-
-    if (test_lut1d() == 0) {
-        printf("Success!\n");
-    } else {
-        printf("FAILED\n");
-    }
-
-    return 0;
-}
diff --git a/test/opengl/rewrap_texture.cpp b/test/opengl/rewrap_texture.cpp
deleted file mode 100644
index f19842338670..000000000000
--- a/test/opengl/rewrap_texture.cpp
+++ /dev/null
@@ -1,67 +0,0 @@
-#ifdef _WIN32
-#include <stdio.h>
-int main() {
-    printf("[SKIP] OpenGL on Windows is broken.\n");
-    return 0;
-}
-#else
-
-#include "Halide.h"
-
-#include <cstdio>
-
-#if __APPLE__
-// TODO: why are these deprecated? Can we update this test?
-#define GL_SILENCE_DEPRECATION
-#include <OpenGL/gl3.h>
-#else
-#include <GL/gl.h>
-#endif
-
-using namespace Halide;
-
-int main() {
-
-    // This test must be run with an OpenGL target.
-    const Target target = get_jit_target_from_environment().with_feature(Target::OpenGL);
-
-    const int width = 255;
-    const int height = 10;
-
-    Buffer<uint8_t> input(width, height, 3);
-    Buffer<uint8_t> out1(width, height, 3);
-    Buffer<uint8_t> out2(width, height, 3);
-    Buffer<uint8_t> out3(width, height, 3);
-
-    Var x, y, c;
-    Func g;
-    g(x, y, c) = input(x, y, c);
-    g.bound(c, 0, 3);
-    g.glsl(x, y, c);
-
-    g.realize(out1, target);  // run once to initialize OpenGL
-
-    GLuint texture_id;
-    glGenTextures(1, &texture_id);
-    glBindTexture(GL_TEXTURE_2D, texture_id);
-    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
-    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
-    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
-    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
-    glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, width, height, 0, GL_RGBA, GL_UNSIGNED_BYTE, nullptr);
-
-    // wrapping a texture should work
-    out2.device_wrap_native(DeviceAPI::GLSL, texture_id, target);
-    g.realize(out2, target);
-    out2.device_detach_native();
-
-    // re-wrapping the texture should not abort
-    out3.device_wrap_native(DeviceAPI::GLSL, texture_id, target);
-    g.realize(out3, target);
-    out3.device_detach_native();
-
-    printf("Success!\n");
-    return 0;
-}
-
-#endif
diff --git a/test/opengl/save_state.cpp b/test/opengl/save_state.cpp
deleted file mode 100644
index 574565775728..000000000000
--- a/test/opengl/save_state.cpp
+++ /dev/null
@@ -1,314 +0,0 @@
-#ifdef _WIN32
-#include <stdio.h>
-int main() {
-    printf("[SKIP] OpenGL on Windows is broken.\n");
-    return 0;
-}
-#else
-
-#include "Halide.h"
-
-#include <cstddef>
-#include <cstdio>
-#include <cstdlib>
-#include <cstring>
-
-#if __APPLE__
-// TODO: why are these deprecated? Can we update this test?
-#define GL_SILENCE_DEPRECATION
-#include <OpenGL/gl3.h>
-#else
-#define GL_GLEXT_PROTOTYPES
-#include <GL/gl.h>
-#endif
-
-// Generates an arbitrary program.
-class Program {
-public:
-    static GLuint handle() {
-        const char *vertexShader = " \
-                                    attribute vec4 Position;  \
-                                    attribute vec2 TexCoordIn; \
-                                    varying vec2 TexCoordOut; \
-                                    void main(void) {  \
-                                        gl_Position = Position; \
-                                        TexCoordOut = TexCoordIn; \
-                                    }";
-
-        const char *fragmentShader = " \
-                                      varying vec2 TexCoordOut; \
-                                      uniform sampler2D Texture; \
-                                      void main(void) { \
-                                          gl_FragColor = texture2D(Texture, TexCoordOut); \
-                                      }";
-
-        GLuint handle = glCreateProgram();
-        glAttachShader(handle, compileShader("vertex", vertexShader, GL_VERTEX_SHADER));
-        glAttachShader(handle, compileShader("fragment", fragmentShader, GL_FRAGMENT_SHADER));
-        glLinkProgram(handle);
-
-        GLint linkSuccess;
-        glGetProgramiv(handle, GL_LINK_STATUS, &linkSuccess);
-        if (linkSuccess == GL_FALSE) {
-            GLchar messages[256];
-            glGetProgramInfoLog(handle, sizeof(messages), 0, messages);
-            fprintf(stderr, "Error linking program: %s\n", messages);
-            exit(1);
-        }
-
-        return handle;
-    }
-
-private:
-    static GLuint compileShader(const char *label, const char *shaderString, GLenum shaderType) {
-        const GLuint handle = glCreateShader(shaderType);
-        const int len = strlen(shaderString);
-        glShaderSource(handle, 1, &shaderString, &len);
-        glCompileShader(handle);
-        GLint compileSuccess;
-        glGetShaderiv(handle, GL_COMPILE_STATUS, &compileSuccess);
-        if (compileSuccess == GL_FALSE) {
-            GLchar messages[256];
-            glGetShaderInfoLog(handle, sizeof(messages), 0, messages);
-            fprintf(stderr, "Error compiling %s shader: %s\n", label, messages);
-            exit(1);
-        }
-        return handle;
-    }
-};
-
-// Encapsulates setting OpenGL's state to arbitrary values, and checking
-// whether the state matches those values.
-class KnownState {
-private:
-    void gl_enable(GLenum cap, bool state) {
-        (state ? glEnable : glDisable)(cap);
-    }
-
-    GLuint gl_gen(void (*fn)(GLsizei, GLuint *)) {
-        GLuint val;
-        (*fn)(1, &val);
-        return val;
-    }
-
-    void check_value(const char *operation, const char *label, GLenum pname, GLint initial) {
-        GLint val;
-        glGetIntegerv(pname, &val);
-        if (val != initial) {
-            fprintf(stderr, "%s did not restore %s: initial value was %d (%#x), current value is %d (%#x)\n", operation, label, initial, initial, val, val);
-            errors = true;
-        }
-    }
-
-    void check_value(const char *operation, const char *label, GLenum pname, GLenum initial) {
-        check_value(operation, label, pname, (GLint)initial);
-    }
-
-    void check_value(const char *operation, const char *label, GLenum pname, GLint initial[], int n = 4) {
-        GLint val[2048];
-        glGetIntegerv(pname, val);
-        for (int i = 0; i < n; i++) {
-            if (val[i] != initial[i]) {
-                fprintf(stderr, "%s did not restore %s: initial value was", operation, label);
-                for (int j = 0; j < n; j++) {
-                    fprintf(stderr, " %d", initial[j]);
-                }
-                fprintf(stderr, ", current value is");
-                for (int j = 0; j < n; j++) {
-                    fprintf(stderr, " %d", val[j]);
-                }
-                fprintf(stderr, "\n");
-                errors = true;
-                return;
-            }
-        }
-    }
-
-    void check_value(const char *operation, const char *label, GLenum pname, bool initial) {
-        GLboolean val;
-        glGetBooleanv(pname, &val);
-        if (val != initial) {
-            fprintf(stderr, "%s did not restore boolean %s: initial value was %s, current value is %s\n", operation, label, initial ? "true" : "false", val ? "true" : "false");
-            errors = true;
-        }
-    }
-
-    void check_error(const char *label) {
-        GLenum err = glGetError();
-        if (err != GL_NO_ERROR) {
-            fprintf(stderr, "Error setting %s: OpenGL error %#x\n", label, err);
-            errors = true;
-        }
-    }
-
-    // version of OpenGL
-    int gl_major_version;
-    int gl_minor_version;
-
-    GLenum initial_active_texture;
-    GLint initial_viewport[4];
-    GLuint initial_array_buffer_binding;
-    GLuint initial_element_array_buffer_binding;
-    GLuint initial_current_program;
-    GLuint initial_framebuffer_binding;
-    static const int ntextures = 10;
-    GLuint initial_bound_textures[ntextures];
-    bool initial_cull_face;
-    bool initial_depth_test;
-
-    static const int nvertex_attribs = 10;
-    bool initial_vertex_attrib_array_enabled[nvertex_attribs];
-
-    // The next two functions are stolen from opengl.cpp
-    // and are used to parse the major/minor version of OpenGL
-    // to see if vertex array objects are supported
-    const char *parse_int(const char *str, int *val) {
-        int v = 0;
-        size_t i = 0;
-        while (str[i] >= '0' && str[i] <= '9') {
-            v = 10 * v + (str[i] - '0');
-            i++;
-        }
-        if (i > 0) {
-            *val = v;
-            return &str[i];
-        }
-        return nullptr;
-    }
-
-    const char *parse_opengl_version(const char *str, int *major, int *minor) {
-        str = parse_int(str, major);
-        if (str == nullptr || *str != '.') {
-            return nullptr;
-        }
-        return parse_int(str + 1, minor);
-    }
-
-    GLuint initial_vertex_array_binding;
-
-public:
-    bool errors{false};
-
-    // This sets most values to generated or arbitrary values, which the
-    // halide calls would be unlikely to accidentally use.  But for boolean
-    // values, we want to be sure that halide is really restoring the
-    // initial value, not just setting it to true or false.  So we need to
-    // be able to try both.
-    void setup(bool boolval) {
-        // parse the OpenGL version
-        const char *version = (const char *)glGetString(GL_VERSION);
-        parse_opengl_version(version, &gl_major_version, &gl_minor_version);
-
-        glGenTextures(ntextures, initial_bound_textures);
-        for (int i = 0; i < ntextures; i++) {
-            glActiveTexture(GL_TEXTURE0 + i);
-            glBindTexture(GL_TEXTURE_2D, initial_bound_textures[i]);
-        }
-        glActiveTexture(initial_active_texture = GL_TEXTURE3);
-
-        // Vertex array objects are only used by Halide if the OpenGL version >=3
-        if (gl_major_version >= 3) {
-            glBindVertexArray(initial_vertex_array_binding = gl_gen(glGenVertexArrays));
-        }
-
-        for (int i = 0; i < nvertex_attribs; i++) {
-            if ((initial_vertex_attrib_array_enabled[i] = boolval)) {
-                glEnableVertexAttribArray(i);
-            } else {
-                glDisableVertexAttribArray(i);
-            }
-            char buf[256];
-            sprintf(buf, "vertex attrib array %d state", i);
-            check_error(buf);
-        }
-
-        glUseProgram(initial_current_program = Program::handle());
-        glViewport(initial_viewport[0] = 111, initial_viewport[1] = 222, initial_viewport[2] = 333, initial_viewport[3] = 444);
-        gl_enable(GL_CULL_FACE, initial_cull_face = boolval);
-        gl_enable(GL_DEPTH_TEST, initial_depth_test = boolval);
-        glBindBuffer(GL_ARRAY_BUFFER, initial_array_buffer_binding = gl_gen(glGenBuffers));
-        glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, initial_element_array_buffer_binding = gl_gen(glGenBuffers));
-        glBindFramebuffer(GL_FRAMEBUFFER, initial_framebuffer_binding = gl_gen(glGenFramebuffers));
-
-        check_error("known state");
-    }
-
-    void check(const char *operation) {
-        check_value(operation, "ActiveTexture", GL_ACTIVE_TEXTURE, initial_active_texture);
-        check_value(operation, "current program", GL_CURRENT_PROGRAM, initial_current_program);
-        check_value(operation, "framebuffer binding", GL_FRAMEBUFFER_BINDING, initial_framebuffer_binding);
-        check_value(operation, "array buffer binding", GL_ARRAY_BUFFER_BINDING, initial_array_buffer_binding);
-        check_value(operation, "element array buffer binding", GL_ELEMENT_ARRAY_BUFFER_BINDING, initial_element_array_buffer_binding);
-        check_value(operation, "viewport", GL_VIEWPORT, initial_viewport);
-        check_value(operation, "GL_CULL_FACE", GL_CULL_FACE, initial_cull_face);
-        check_value(operation, "GL_DEPTH_TEST", GL_DEPTH_TEST, initial_cull_face);
-
-        // Vertex array objects are only used by Halide if the OpenGL version >=3
-        if (gl_major_version >= 3) {
-            check_value(operation, "vertex array binding", GL_VERTEX_ARRAY_BINDING, initial_vertex_array_binding);
-        } else {
-            fprintf(stderr, "Skipping vertex array binding tests because OpenGL version is %d.%d (<3.0)\n", gl_major_version, gl_minor_version);
-        }
-
-        for (int i = 0; i < ntextures; i++) {
-            char buf[100];
-            sprintf(buf, "bound texture (unit %d)", i);
-            glActiveTexture(GL_TEXTURE0 + i);
-            check_value(operation, buf, GL_TEXTURE_BINDING_2D, initial_bound_textures[i]);
-        }
-
-        for (int i = 0; i < nvertex_attribs; i++) {
-            int initial = initial_vertex_attrib_array_enabled[i];
-            GLint val;
-            glGetVertexAttribiv(i, GL_VERTEX_ATTRIB_ARRAY_ENABLED, &val);
-            if (val != initial) {
-                fprintf(stderr, "%s did not restore boolean VertexAttributeArrayEnabled(%d): initial value was %s, current value is %s\n", operation, i, initial ? "true" : "false", val ? "true" : "false");
-                errors = true;
-            }
-        }
-    }
-};
-
-using namespace Halide;
-
-int main() {
-    // This test must be run with an OpenGL target.
-    const Target target = get_jit_target_from_environment().with_feature(Target::OpenGL);
-
-    KnownState known_state;
-
-    Buffer<uint8_t> input(255, 10, 3);
-    Buffer<uint8_t> out(UInt(8), 255, 10, 3);
-
-    Var x, y, c;
-    Func g;
-    g(x, y, c) = input(x, y, c);
-    g.bound(c, 0, 3);
-    g.glsl(x, y, c);
-    g.realize(out, target);  // let Halide initialize OpenGL
-
-    known_state.setup(true);
-    g.realize(out, target);
-    known_state.check("realize");
-
-    known_state.setup(true);
-    out.copy_to_host();
-    known_state.check("copy_to_host");
-
-    known_state.setup(false);
-    g.realize(out, target);
-    known_state.check("realize");
-
-    known_state.setup(false);
-    out.copy_to_host();
-    known_state.check("copy_to_host");
-
-    if (known_state.errors) {
-        return 1;
-    }
-
-    printf("Success!\n");
-    return 0;
-}
-
-#endif
diff --git a/test/opengl/select.cpp b/test/opengl/select.cpp
deleted file mode 100644
index f4c358b43e70..000000000000
--- a/test/opengl/select.cpp
+++ /dev/null
@@ -1,214 +0,0 @@
-#include "Halide.h"
-#include <stdio.h>
-
-#include "testing.h"
-
-using namespace Halide;
-
-int test_per_channel_select() {
-
-    printf("Testing select of channel.\n");
-
-    // This test must be run with an OpenGL target.
-    const Target target = get_jit_target_from_environment().with_feature(Target::OpenGL);
-
-    Func gpu("gpu"), cpu("cpu");
-    Var x("x"), y("y"), c("c");
-
-    gpu(x, y, c) = cast<uint8_t>(mux(c, {128, x, y, x * y}));
-    gpu.bound(c, 0, 4);
-    gpu.glsl(x, y, c);
-    gpu.compute_root();
-
-    cpu(x, y, c) = gpu(x, y, c);
-
-    Buffer<uint8_t> out(10, 10, 4);
-    cpu.realize(out, target);
-
-    // Verify the result
-    if (!Testing::check_result<uint8_t>(out, [&](int x, int y, int c) {
-	    switch (c) {
-		case 0: return 128;
-		case 1: return x;
-		case 2: return y;
-		default: return x*y;
-	    } })) {
-        return 1;
-    }
-
-    return 0;
-}
-
-int test_flag_scalar_select() {
-
-    printf("Testing select of scalar value with flag.\n");
-
-    // This test must be run with an OpenGL target.
-    const Target target = get_jit_target_from_environment().with_feature(Target::OpenGL);
-
-    Func gpu("gpu"), cpu("cpu");
-    Var x("x"), y("y"), c("c");
-
-    int flag_value = 0;
-
-    Param<int> flag("flag");
-    flag.set(flag_value);
-
-    gpu(x, y, c) = cast<uint8_t>(select(flag != 0, 128,
-                                        255));
-    gpu.bound(c, 0, 4);
-    gpu.glsl(x, y, c);
-    gpu.compute_root();
-
-    // This should trigger a copy_to_host operation
-    cpu(x, y, c) = gpu(x, y, c);
-
-    Buffer<uint8_t> out(10, 10, 4);
-    cpu.realize(out, target);
-
-    // Verify the result
-    if (!Testing::check_result<uint8_t>(out, [&](int x, int y, int c) {
-            return !flag_value ? 255 : 128;
-        })) {
-        return 1;
-    }
-
-    return 0;
-}
-
-int test_flag_pixel_select() {
-
-    printf("Testing select of pixel value with flag.\n");
-
-    // This test must be run with an OpenGL target.
-    const Target target = get_jit_target_from_environment().with_feature(Target::OpenGL);
-
-    Func gpu("gpu"), cpu("cpu");
-    Var x("x"), y("y"), c("c");
-
-    int flag_value = 0;
-
-    Param<int> flag("flag");
-    flag.set(flag_value);
-
-    Buffer<uint8_t> image(10, 10, 4);
-    for (int y = 0; y < image.height(); y++) {
-        for (int x = 0; x < image.width(); x++) {
-            for (int c = 0; c < image.channels(); c++) {
-                image(x, y, c) = 128;
-            }
-        }
-    }
-
-    gpu(x, y, c) = cast<uint8_t>(select(flag != 0, image(x, y, c),
-                                        255));
-    gpu.bound(c, 0, 4);
-    gpu.glsl(x, y, c);
-    gpu.compute_root();
-
-    // This should trigger a copy_to_host operation
-    cpu(x, y, c) = gpu(x, y, c);
-
-    Buffer<uint8_t> out(10, 10, 4);
-    cpu.realize(out, target);
-
-    // Verify the result
-    if (!Testing::check_result<uint8_t>(out, [&](int x, int y, int c) {
-            return !flag_value ? 255 : 128;
-        })) {
-        return 1;
-    }
-
-    return 0;
-}
-
-int test_nested_select() {
-
-    printf("Testing nested select.\n");
-
-    // This test must be run with an OpenGL target.
-    const Target target = get_jit_target_from_environment().with_feature(Target::OpenGL);
-
-    // Define the algorithm.
-    Var x("x"), y("y"), c("c");
-    Func f("f");
-    Expr temp = cast<uint8_t>(select(x == 0, 1, 2));
-    f(x, y, c) = select(y == 0, temp, 255 - temp);
-
-    // Schedule f to run on the GPU.
-    const int channels = 3;
-    f.bound(c, 0, channels).glsl(x, y, c);
-
-    // Generate the result.
-    const int width = 10, height = 10;
-    Buffer<uint8_t> out = f.realize(width, height, channels, target);
-
-    // Check the result.
-    int errors = 0;
-    out.for_each_element([&](int x, int y, int c) {
-        uint8_t temp = x == 0 ? 1 : 2;
-        uint8_t expected = y == 0 ? temp : 255 - temp;
-        uint8_t actual = out(x, y, c);
-        if (expected != actual && ++errors == 1) {
-            fprintf(stderr, "out(%d, %d, %d) = %d instead of %d\n",
-                    x, y, c, actual, expected);
-        }
-    });
-
-    return errors;
-}
-
-int test_nested_select_varying() {
-
-    printf("Testing nested select with varying condition.\n");
-
-    // This test must be run with an OpenGL target.
-    const Target target = get_jit_target_from_environment().with_feature(Target::OpenGL);
-
-    // Define the algorithm.
-    Var x("x"), y("y"), c("c");
-    Func f("f");
-    Expr temp = cast<uint8_t>(select(x - c > 0, 1, 2));
-    f(x, y, c) = select(y == 0, temp, 255 - temp);
-
-    // Schedule f to run on the GPU.
-    const int channels = 3;
-    f.bound(c, 0, channels).glsl(x, y, c);
-
-    // Generate the result.
-    const int width = 10, height = 10;
-    Buffer<uint8_t> out = f.realize(width, height, channels, target);
-
-    // Check the result.
-    int errors = 0;
-    out.for_each_element([&](int x, int y, int c) {
-        uint8_t temp = x - c > 0 ? 1 : 2;
-        uint8_t expected = y == 0 ? temp : 255 - temp;
-        uint8_t actual = out(x, y, c);
-        if (expected != actual && ++errors == 1) {
-            fprintf(stderr, "out(%d, %d, %d) = %d instead of %d\n",
-                    x, y, c, actual, expected);
-        }
-    });
-
-    return errors;
-}
-
-int main() {
-
-    int err = 0;
-
-    err |= test_per_channel_select();
-    err |= test_flag_scalar_select();
-    err |= test_flag_pixel_select();
-    err |= test_nested_select();
-    err |= test_nested_select_varying();
-
-    if (err) {
-        printf("FAILED\n");
-        return 1;
-    }
-
-    printf("Success!\n");
-    return 0;
-}
diff --git a/test/opengl/set_pixels.cpp b/test/opengl/set_pixels.cpp
deleted file mode 100644
index 7c282878af0b..000000000000
--- a/test/opengl/set_pixels.cpp
+++ /dev/null
@@ -1,28 +0,0 @@
-#include "Halide.h"
-#include <stdio.h>
-
-#include "testing.h"
-
-using namespace Halide;
-
-int main() {
-    // This test must be run with an OpenGL target.
-    const Target target = get_jit_target_from_environment().with_feature(Target::OpenGL);
-
-    Func f;
-    Var x, y, c;
-
-    f(x, y, c) = cast<uint8_t>(42);
-
-    Buffer<uint8_t> out(10, 10, 3);
-    f.bound(c, 0, 3).glsl(x, y, c);
-    f.realize(out, target);
-
-    out.copy_to_host();
-    if (!Testing::check_result<uint8_t>(out, [](int x, int y, int c) { return 42; })) {
-        return 1;
-    }
-
-    printf("Success!\n");
-    return 0;
-}
diff --git a/test/opengl/shifted_domains.cpp b/test/opengl/shifted_domains.cpp
deleted file mode 100644
index 38e2e81b2771..000000000000
--- a/test/opengl/shifted_domains.cpp
+++ /dev/null
@@ -1,66 +0,0 @@
-#include "Halide.h"
-#include <stdio.h>
-
-#include "testing.h"
-
-using namespace Halide;
-
-// This test executes a simple kernel with a non-zero min value. The code is
-// adapted from lesson_06_realizing_over_shifted_domains.cpp and scheduled for
-// GLSL
-int shifted_domains() {
-
-    // This test must be run with an OpenGL target.
-    const Target target = get_jit_target_from_environment().with_feature(Target::OpenGL);
-
-    int errors = 0;
-
-    Func gradient("gradient");
-    Var x("x"), y("y"), c("c");
-    gradient(x, y, c) = cast<float>(x + y);
-
-    gradient.bound(c, 0, 1);
-    gradient.glsl(x, y, c);
-
-    printf("Evaluating gradient from (0, 0) to (7, 7)\n");
-    Buffer<float> result(8, 8, 1);
-    gradient.realize(result, target);
-    result.copy_to_host();
-
-    if (!Testing::check_result<float>(result, 5e-5f, [](int x, int y) { return float(x + y); }))
-        errors++;
-
-    Buffer<float> shifted(5, 7, 1);
-    shifted.set_min(100, 50);
-
-    printf("Evaluating gradient from (100, 50) to (104, 56)\n");
-
-    gradient.realize(shifted, target);
-    shifted.copy_to_host();
-
-    if (!Testing::check_result<float>(shifted, 5e-5f, [](int x, int y) { return float(x + y); }))
-        errors++;
-
-    // Test with a negative min
-    shifted.set_min(-100, -50);
-
-    printf("Evaluating gradient from (-100, -50) to (-96, -44)\n");
-
-    gradient.realize(shifted, target);
-    shifted.copy_to_host();
-
-    if (!Testing::check_result<float>(shifted, 5e-5f, [](int x, int y) { return float(x + y); }))
-        errors++;
-
-    return errors;
-}
-
-int main() {
-
-    if (shifted_domains() != 0) {
-        return 1;
-    }
-
-    printf("Success!\n");
-    return 0;
-}
diff --git a/test/opengl/special_funcs.cpp b/test/opengl/special_funcs.cpp
deleted file mode 100644
index 677bf05a23c0..000000000000
--- a/test/opengl/special_funcs.cpp
+++ /dev/null
@@ -1,150 +0,0 @@
-#include "Halide.h"
-#include <algorithm>
-#include <iostream>
-#include <stdio.h>
-#include <stdlib.h>
-
-using namespace Halide;
-
-Var x, y, c;
-
-double square(double x) {
-    return x * x;
-}
-
-template<typename T>
-void test_function(Expr e, Buffer<T> &cpu_result, Buffer<T> &gpu_result) {
-    Func cpu("cpu"), gpu("gpu");
-
-    Target cpu_target = get_host_target();
-    Target gpu_target = get_host_target().with_feature(Target::OpenGL);
-    cpu(x, y, c) = e;
-    gpu(x, y, c) = e;
-
-    cpu.realize(cpu_result, cpu_target);
-
-    gpu.bound(c, 0, 3).glsl(x, y, c);
-    gpu.realize(gpu_result, gpu_target);
-    gpu_result.copy_to_host();
-}
-
-template<typename T>
-bool test_exact(Expr r, Expr g, Expr b) {
-    Expr e = cast<T>(mux(c, {r, g, b}));
-    const int W = 256, H = 256;
-    Buffer<T> cpu_result(W, H, 3);
-    Buffer<T> gpu_result(W, H, 3);
-    test_function(e, cpu_result, gpu_result);
-
-    for (int y = 0; y < gpu_result.height(); y++) {
-        for (int x = 0; x < gpu_result.width(); x++) {
-            if (!(gpu_result(x, y, 0) == cpu_result(x, y, 0) &&
-                  gpu_result(x, y, 1) == cpu_result(x, y, 1) &&
-                  gpu_result(x, y, 2) == cpu_result(x, y, 2))) {
-                std::cerr << "Incorrect pixel for " << e << " at (" << x << ", " << y << ")\n"
-                          << "  ("
-                          << (int)gpu_result(x, y, 0) << ", "
-                          << (int)gpu_result(x, y, 1) << ", "
-                          << (int)gpu_result(x, y, 2) << ") != ("
-                          << (int)cpu_result(x, y, 0) << ", "
-                          << (int)cpu_result(x, y, 1) << ", "
-                          << (int)cpu_result(x, y, 2)
-                          << ")\n";
-                return false;
-            }
-        }
-    }
-    return true;
-}
-
-template<typename T>
-bool test_approx(Expr r, Expr g, Expr b, double rms_error) {
-    Expr e = cast<T>(mux(c, {r, g, b}));
-    const int W = 256, H = 256;
-    Buffer<T> cpu_result(W, H, 3);
-    Buffer<T> gpu_result(W, H, 3);
-    test_function(e, cpu_result, gpu_result);
-
-    double err = 0.0;
-    for (int y = 0; y < gpu_result.height(); y++) {
-        for (int x = 0; x < gpu_result.width(); x++) {
-            err += square(gpu_result(x, y, 0) - cpu_result(x, y, 0));
-            err += square(gpu_result(x, y, 1) - cpu_result(x, y, 1));
-            err += square(gpu_result(x, y, 2) - cpu_result(x, y, 2));
-        }
-    }
-    err = sqrt(err / (W * H));
-    if (err > rms_error) {
-        std::cerr << "RMS error too large for " << e << ": "
-                  << err << " > " << rms_error << "\n";
-        return false;
-    } else {
-        return true;
-    }
-}
-
-int main() {
-
-    int errors = 0;
-
-    if (!test_exact<uint8_t>(0, 0, 0)) {
-        printf("Failed constant value test\n");
-        errors++;
-    }
-    if (!test_exact<uint8_t>(clamp(x + y, 0, 255), 0, 0)) {
-        printf("Failed clamp test\n");
-        errors++;
-    }
-
-    if (!test_exact<uint8_t>(
-            max(x, y),
-            cast<int>(min(cast<float>(x), cast<float>(y))),
-            clamp(x, 0, 10))) {
-        printf("Failed min/max test\n");
-        errors++;
-    }
-
-    if (!test_exact<float>(trunc(x + 0.25f), trunc(-(x + 0.75f)), 0.0f)) {
-        printf("Failed trunc test\n");
-        errors++;
-    }
-
-    // Trigonometric functions in GLSL are fast but not very accurate,
-    // especially outside of 0..2pi.
-    // The GLSL ES 1.0 spec does not define the precision of these operations
-    // so a wide error bound is used in this test.
-    Expr r = (256 * x + y) / ceilf(65536.f / (2 * 3.1415926536f));
-    if (!test_approx<float>(sin(r), cos(r), 0.0f, 5e-2)) {
-        errors++;
-        printf("Failed trigonometric test\n");
-    }
-
-    // TODO: the test must account for differences in default rounding behavior
-    // between the CPU and GPU for float <-> integer conversions. In this case
-    // the operation is performed in float in the GLSL shader, and then
-    // converted back to a normalized integer value.
-    if (!test_approx<uint8_t>(
-            (x - 127) / 3 + 127,
-            (x - 127) % 3 + 127,
-            0,
-            1)) {
-        printf("Failed integer operation test\n");
-        errors++;
-    }
-
-    if (!test_exact<uint8_t>(
-            lerp(cast<uint8_t>(x), cast<uint8_t>(y), cast<uint8_t>(128)),
-            lerp(cast<uint8_t>(x), cast<uint8_t>(y), 0.5f),
-            cast<uint8_t>(lerp(cast<float>(x), cast<float>(y), 0.2f)))) {
-        printf("Failed lerp test\n");
-        errors++;
-    }
-
-    if (errors == 0) {
-        printf("Success!\n");
-        return 0;
-    } else {
-        printf("FAILED %d tests\n", errors);
-        return 1;
-    }
-}
diff --git a/test/opengl/sum_reduction.cpp b/test/opengl/sum_reduction.cpp
deleted file mode 100644
index 97fd40d5905c..000000000000
--- a/test/opengl/sum_reduction.cpp
+++ /dev/null
@@ -1,46 +0,0 @@
-#include "Halide.h"
-#include <stdio.h>
-
-#include "testing.h"
-
-using namespace Halide;
-
-int main() {
-    // This test must be run with an OpenGL target.
-    const Target target = get_jit_target_from_environment().with_feature(Target::OpenGL);
-
-    // Define the input
-    const int width = 10, height = 10, channels = 4;
-    Buffer<float> input(width, height, channels);
-    input.fill([](int x, int y, int c) {
-        return float(x + y);
-    });
-
-    // Define the algorithm.
-    Var x, y, c;
-    RDom r(0, 5, "r");
-    Func g;
-    Expr coordx = clamp(x + r, 0, input.width() - 1);
-    g(x, y, c) = cast<float>(sum(input(coordx, y, c)) / sum(r) * 255.0f);
-
-    // Schedule f and g to compute in separate passes on the GPU.
-    g.bound(c, 0, 4).glsl(x, y, c);
-
-    // Generate the result.
-    Buffer<float> result = g.realize(width, height, channels, target);
-    result.copy_to_host();
-
-    // Check the result.
-    if (!Testing::check_result<float>(result, 1e-3f, [&](int x, int y, int c) {
-            float temp = 0.0f;
-            for (int r = 0; r < 5; r++) {
-                temp += input(std::min(x + r, input.width() - 1), y, c);
-            }
-            return temp / 10.0f * 255.0f;
-        })) {
-        return 1;
-    }
-
-    printf("Success!\n");
-    return 0;
-}
diff --git a/test/opengl/sumcolor_reduction.cpp b/test/opengl/sumcolor_reduction.cpp
deleted file mode 100644
index 6532376061be..000000000000
--- a/test/opengl/sumcolor_reduction.cpp
+++ /dev/null
@@ -1,40 +0,0 @@
-#include "Halide.h"
-#include <stdio.h>
-
-#include "testing.h"
-
-using namespace Halide;
-
-int main() {
-    // This test must be run with an OpenGL target.
-    const Target target = get_jit_target_from_environment().with_feature(Target::OpenGL);
-
-    // Define the input.
-    const int width = 10, height = 10, channels = 3;
-    Buffer<float> input(width, height, channels);
-    input.fill([](int x, int y, int c) {
-        return x + y;
-    });
-
-    // Define the algorithm.
-    Var x, y, c;
-    RDom r(0, 3, "r");
-    Func g;
-
-    g(x, y, c) = sum(input(x, y, r));
-
-    // Schedule f and g to compute in separate passes on the GPU.
-    g.bound(c, 0, 3).glsl(x, y, c);
-
-    // Generate the result.
-    Buffer<float> result = g.realize(10, 10, 3, target);
-    result.copy_to_host();
-
-    // Check the result.
-    if (!Testing::check_result<float>(result, 1e-6f, [](int x, int y, int c) { return 3.0f * (x + y); })) {
-        return 1;
-    }
-
-    printf("Success!\n");
-    return 0;
-}
diff --git a/test/opengl/testing.h b/test/opengl/testing.h
deleted file mode 100644
index 860ea55c172c..000000000000
--- a/test/opengl/testing.h
+++ /dev/null
@@ -1,86 +0,0 @@
-#ifndef _TESTING_H_
-#define _TESTING_H_
-
-#include "Halide.h"
-#include <cmath>
-#include <exception>
-#include <functional>
-#include <iostream>
-
-namespace Testing {
-
-template<typename T>
-bool neq(T a, T b, T tol) {
-    return std::abs(a - b) > tol;
-}
-
-// Check 3-dimension buffer
-template<typename T, typename F>
-auto check_result(const Halide::Buffer<T> &buf, T tol, F f) -> decltype(std::declval<F>()(0, 0, 0), bool()) {
-    class err : std::exception {
-    public:
-        static void vector(const std::vector<T> &v) {
-            for (size_t i = 0; i < v.size(); i++) {
-                if (i > 0) {
-                    std::cerr << ",";
-                }
-                std::cerr << +v[i];  // use unary + to promote uint8_t from char to numeric
-            }
-        }
-    };
-    try {
-        buf.for_each_element([&](int x, int y) {
-            std::vector<T> expected;
-            std::vector<T> result;
-            for (int c = 0; c < buf.channels(); c++) {
-                expected.push_back(f(x, y, c));
-                result.push_back(buf(x, y, c));
-            }
-            for (int c = 0; c < buf.channels(); c++) {
-                if (neq(result[c], expected[c], tol)) {
-                    std::cerr << "Error: result (";
-                    err::vector(result);
-                    std::cerr << ") should be (";
-                    err::vector(expected);
-                    std::cerr << ") at x=" << x << " y=" << y << "\n";
-                    throw err();
-                }
-            }
-        });
-    } catch (err &) {
-        return false;
-    }
-    return true;
-}
-
-// Check 2-dimension buffer
-template<typename T, typename F>
-auto check_result(const Halide::Buffer<T> &buf, T tol, F f) -> decltype(std::declval<F>()(0, 0), bool()) {
-    class err : std::exception {};
-    try {
-        buf.for_each_element([&](int x, int y) {
-            const T expected = f(x, y);
-            const T result = buf(x, y);
-            if (neq(result, expected, tol)) {
-                std::cerr << "Error: result (";
-                std::cerr << +result;
-                std::cerr << ") should be (";
-                std::cerr << +expected;
-                std::cerr << ") at x=" << x << " y=" << y << "\n";
-                throw err();
-            }
-        });
-    } catch (err &) {
-        return false;
-    }
-    return true;
-}
-
-// Shorthand to check with tolerance=0
-template<typename T, typename Func>
-bool check_result(const Halide::Buffer<T> &buf, Func f) {
-    return check_result<T>(buf, 0, f);
-}
-}  // namespace Testing
-
-#endif  // _TESTING_H_
diff --git a/test/opengl/tuples.cpp b/test/opengl/tuples.cpp
deleted file mode 100644
index b4a834ffd1ca..000000000000
--- a/test/opengl/tuples.cpp
+++ /dev/null
@@ -1,40 +0,0 @@
-#include "Halide.h"
-#include <stdio.h>
-
-#include "testing.h"
-
-using namespace Halide;
-
-int main() {
-    // This test must be run with an OpenGL target.
-    const Target target = get_jit_target_from_environment().with_feature(Target::OpenGL);
-
-    Buffer<uint8_t> input(255, 10, 3);
-    input.fill([](int x, int y, int c) {
-        return 10 * x + y + c;
-    });
-
-    Var x, y, c;
-    Func g;
-    g(x, y, c) = {input(x, y, c), input(x, y, c) / 2};
-
-    // h will be an opengl stage with tuple input. Tuple outputs
-    // aren't supported because OpenGL ES 2.0 doesn't support multiple
-    // output textures.
-    Func h;
-    h(x, y, c) = min(g(x, y, c)[0], g(x, y, c)[1]);
-
-    Buffer<uint8_t> out(255, 10, 3);
-    g.compute_root();
-    h.compute_root().bound(c, 0, 3).glsl(x, y, c);
-
-    h.realize(out, target);
-    out.copy_to_host();
-
-    if (!Testing::check_result<uint8_t>(out, [&](int x, int y, int c) { return input(x, y, c) / 2; })) {
-        return 1;
-    }
-
-    printf("Success!\n");
-    return 0;
-}
diff --git a/test/opengl/vagrant/.gitignore b/test/opengl/vagrant/.gitignore
deleted file mode 100644
index 8000dd9db47c..000000000000
--- a/test/opengl/vagrant/.gitignore
+++ /dev/null
@@ -1 +0,0 @@
-.vagrant
diff --git a/test/opengl/vagrant/README.md b/test/opengl/vagrant/README.md
deleted file mode 100644
index febf3be7d1ad..000000000000
--- a/test/opengl/vagrant/README.md
+++ /dev/null
@@ -1,136 +0,0 @@
-# Testing OpenGL on Ubuntu 14 & 16 using vagrant & VirtualBox
-
-## Overview
-
-This subdirectory (`Halide/test/opengl/vagrant`) provides the setup to build
-Halide and run the OpenGL tests headlessly on Ubuntu 14.04 and/or 16.04, running
-virtually under [vagrant](http://vagrantup.com) and
-[VirtualBox](https://www.virtualbox.org).
-
-This is intended in particular for use by those who develop Halide's OpenGL
-back-end on OS X and need to test on Linux.
-
-The `Vagrantfile` provisions with the necessary capabilities to build Halide and
-build & run Halide's OpenGL test suite. In particular it installs llvm-3.8 and
-OpenGL with software rendering to a dummy X server.
-
-## Quick instructions
-
-Presuming that you have [vagrant](http://vagrantup.com) and
-[VirtualBox](https://www.virtualbox.org) installed,
-
-```
-$ cd Halide/test/opengl/vagrant
-$ vagrant up [u14|u16]
-[...]
-$ vagrant ssh [u14|u16] -c "sh /vagrant/build_tests.sh"
-[...]
-```
-
-The `[u14|u16]` argument is optional, the default is `u16` to use the Ubuntu
-16.04 virtual machine. Specify `u14` to use the Ubuntu 14.04 macihne.
-
-After a bit of time and a lot of verbiage, you should eventually see the `make`
-output for building and running the OpenGL tests
-
-## Detailed instructions
-
-### Starting and provisioning the virtual machine(s)
-
-As per above, you can start the machines using
-
-```
-$ cd Halide/test/opengl/vagrant
-$ vagrant up [u14|u16]
-[...]
-```
-
-The first time you run it for a given machine, it will download the necessary
-base box, then boot and provision the machine. This will take several minutes.
-
-You may notice some errors or warnings in the output of `vagrant up`'s
-provisioning; these can be safely ignored. (In particular for `u16` the output
-ends with `ttyname failed: Inappropriate ioctl for device` which looks omnious
-but is harmless.)
-
-As usual, you can stop or power down the machine using
-`vagrant suspend [u14|u16]` or `vagrant halt [u14|u16]`; subsequently starting
-it up again using `vagrant up [u14|u16]` should be reasonably quick. For more
-info, see the `vagrant help` or the [vagrant](http://vagrantup.com) docs.
-
-See the `Vagrantfile` for the specific details of what gets provisioned.
-
-### Building Halide and running the tests
-
-The virtual machine has these directories live-shared with the host:
-
-- `/Halide` - The root of your Halide source tree
-- `/vagrant` - The vagrant work directory. I.e. effectively a hard link to
-  `/Halide/test/opengl/vagrant`
-
-Because these are live shared, you can edit Halide source files on your host
-machine but build them on the virtual machine.
-
-The script `build_tests.sh`, run on the virtual machine, is just a quick
-shorthand to minimize the amount of typing, letting you build and run everything
-at once from the host via
-
-```
-$ vagrant ssh [u14|u16] -c "sh /vagrant/build_tests.sh"
-```
-
-But of course for more focused development & debugging you might want to do
-things one step at a time:
-
-```
-$ vagrant ssh [u14|u16]
-[...Ubuntu motd...]
-vagrant@vagrant:~$
-```
-
-These are the steps taken by `build_tests.sh`:
-
-#### 1. Create an out-of-tree build directory
-
-```
-vagrant@vagrant:~$ mkdir ~/halide_build
-vagrant@vagrant:~$ cd ~/halide_build
-vagrant@vagrant:~/halide_build$ ln -s /Halide/Makefile .
-```
-
-It's important to build out-of-tree, because `/Halide` tree is live shared and
-we don't want the virtual machine's object files to clobber the host object
-files!
-
-#### 2. Build Halide
-
-Nothing special here, just build normally, e.g.:
-
-```
-vagrant@vagrant:~/halide_build$ make -j 3
-```
-
-The machine is provisioned with environment variables `LLVM_CONFIG` globally set
-appropriately.
-
-#### 3. Build & run the OpenGL tests
-
-Again nothing special here, just build the opengl tests normally, e.g.:
-
-```
-vagrant@vagrant:~/halide_build$ make -k test_opengl
-```
-
-Or of course you can build and run just one test, e.g.:
-
-```
-vagrant@vagrant:~/halide_build$ make opengl_float_texture
-```
-
-The machine is provisioned with environment variables `HL_TARGET` and
-`HL_JIT_TARGET` set to `host-opengl`. You can of course override in your shell,
-e.g. if you want to use `host-opengl-debug`.
-
-The machine is provisioned with `lldb` installed in case you need to do some
-debugging. Aside from that it's bare-bones; if you need anything else for your
-debugging or development you will need to `apt-get install` it.
diff --git a/test/opengl/vagrant/Vagrantfile b/test/opengl/vagrant/Vagrantfile
deleted file mode 100644
index 5d7fef1a0afe..000000000000
--- a/test/opengl/vagrant/Vagrantfile
+++ /dev/null
@@ -1,118 +0,0 @@
-# -*- mode: ruby -*-
-# vi: set ft=ruby :
-
-# All Vagrant configuration is done below. The "2" in Vagrant.configure
-# configures the configuration version (we support older styles for
-# backwards compatibility). Please don't change it unless you know what
-# you're doing.
-Vagrant.configure("2") do |config|
-  # The most common configuration options are documented and commented below.
-  # For a complete reference, please see the online documentation at
-  # https://docs.vagrantup.com.
-
-  # Every Vagrant development environment requires a box. You can search for
-  # boxes at https://atlas.hashicorp.com/search.
-  config.vm.define "u14", autostart: false do |u14|
-    u14.vm.box = "bento/ubuntu-14.04"
-    u14.vm.provision "shell", inline: <<-SHELL
-        # Create and start headless X service using upstart
-        cp /vagrant/provision/etc/init/xdummy.conf /etc/init/
-        service xdummy start
-    SHELL
-  end
-  config.vm.define "u16", primary: true do |u16|
-    u16.vm.box = "bento/ubuntu-16.04"
-    u16.vm.provision "shell", inline: <<-SHELL
-        # Create and start headless X service using systemd
-        cp /vagrant/provision/etc/systemd/system/xdummy.service /etc/systemd/system/
-        systemctl start xdummy
-    SHELL
-  end
-
-  config.vm.boot_timeout = 600
-
-
-  # Disable automatic box update checking. If you disable this, then
-  # boxes will only be checked for updates when the user runs
-  # `vagrant box outdated`. This is not recommended.
-  # config.vm.box_check_update = false
-
-  # Create a forwarded port mapping which allows access to a specific port
-  # within the machine from a port on the host machine. In the example below,
-  # accessing "localhost:8080" will access port 80 on the guest machine.
-  # config.vm.network "forwarded_port", guest: 80, host: 8080
-
-  # Create a private network, which allows host-only access to the machine
-  # using a specific IP.
-  # config.vm.network "private_network", ip: "192.168.33.10"
-
-  # Create a public network, which generally matched to bridged network.
-  # Bridged networks make the machine appear as another physical device on
-  # your network.
-  # config.vm.network "public_network"
-
-  # Share an additional folder to the guest VM. The first argument is
-  # the path on the host to the actual folder. The second argument is
-  # the path on the guest to mount the folder. And the optional third
-  # argument is a set of non-required options.
-  # config.vm.synced_folder "../data", "/vagrant_data"
-  config.vm.synced_folder "../../..", "/Halide"
-
-  # Provider-specific configuration so you can fine-tune various
-  # backing providers for Vagrant. These expose provider-specific options.
-  # Example for VirtualBox:
-  #
-  config.vm.provider "virtualbox" do |vb|
-    # Display the VirtualBox GUI when booting the machine
-    vb.gui = false
-
-    # Customize the amount of memory on the VM:
-    vb.memory = "2048"
-  end
-  #
-  # View the documentation for the provider you are using for more
-  # information on available options.
-
-  # Define a Vagrant Push strategy for pushing to Atlas. Other push strategies
-  # such as FTP and Heroku are also available. See the documentation at
-  # https://docs.vagrantup.com/v2/push/atlas.html for more information.
-  # config.push.define "atlas" do |push|
-  #   push.app = "YOUR_ATLAS_USERNAME/YOUR_APPLICATION_NAME"
-  # end
-
-  # Enable provisioning with a shell script. Additional provisioners such as
-  # Puppet, Chef, Ansible, Salt, and Docker are also available. Please see the
-  # documentation for more information about their specific syntax and use.
-  config.vm.provision "shell", inline: <<-SHELL
-
-    # Global environment variables, both for system purposes (PATH, LC_ALL)
-    # and for convenience of building and running Halide opengl tests
-    # (DISPLAY, LLVM_CONFIG, CLANG, HL_JIT_TARGET)
-    cp /vagrant/provision/etc/environment /etc/environment
-
-    apt-get update
-
-    # Install resources for headless X service (final provisioning of the service is machine-specific)
-    apt-get install -y xserver-xorg-video-dummy
-    cp /vagrant/provision/usr/share/X11/xorg.conf.d/xdummy.conf /usr/share/X11/xorg.conf.d/xdummy.conf
-
-    # Install llvm-3.8 as llvm
-    apt-get install -y llvm-3.8 llvm-3.8-dev clang-3.8 lldb-3.8
-    for ll in /usr/bin/*-3.8 ; do ln -s -f $ll `echo $ll | sed -e s/-3.8//` ; done
-
-    # Build OpenGL (mesa) using software driver (gallium / llvmpipe). Can't
-    # use the prebuilt mesa packages because they expect video hardware drivers.
-    apt-get install -y build-essential scons python-mako flex bison zlib1g-dev libudev-dev pkg-config libx11-dev libxext-dev libxdamage-dev x11proto-gl-dev libx11-xcb-dev
-    cd /usr/local/src
-    test -f mesa-12.0.2.tar.xz || wget -q https://mesa.freedesktop.org/archive/12.0.2/mesa-12.0.2.tar.xz
-    test -d mesa-12.0.2 || tar xkf mesa-12.0.2.tar.xz
-    cd mesa-12.0.2
-    scons build=release texture_float=yes libgl-xlib
-    ln -s -f `pwd`/include/GL* /usr/local/include/
-    cp `pwd`/build/linux-x86_64/gallium/targets/libgl-xlib/libGL.* /usr/local/lib
-    ldconfig
-
-    # Machine-specific provisioning will happpen next
-  SHELL
-
-end
diff --git a/test/opengl/vagrant/build_tests.sh b/test/opengl/vagrant/build_tests.sh
deleted file mode 100755
index 54dec279c28a..000000000000
--- a/test/opengl/vagrant/build_tests.sh
+++ /dev/null
@@ -1,6 +0,0 @@
-#!/bin/sh -x
-mkdir -p ~/halide_build
-cd ~/halide_build
-ln -s -f /Halide/Makefile .
-make -j 3
-make -k test_opengl
diff --git a/test/opengl/vagrant/provision/etc/environment b/test/opengl/vagrant/provision/etc/environment
deleted file mode 100644
index 2ab25818fbad..000000000000
--- a/test/opengl/vagrant/provision/etc/environment
+++ /dev/null
@@ -1,7 +0,0 @@
-PATH="/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/games:/usr/local/games"
-LC_ALL=C
-DISPLAY=:0.0
-LLVM_CONFIG=/usr/bin/llvm-config-3.8
-CLANG=/usr/bin/clang-3.8
-HL_TARGET=host-opengl
-HL_JIT_TARGET=host-opengl
diff --git a/test/opengl/vagrant/provision/etc/init/xdummy.conf b/test/opengl/vagrant/provision/etc/init/xdummy.conf
deleted file mode 100644
index da5925809f41..000000000000
--- a/test/opengl/vagrant/provision/etc/init/xdummy.conf
+++ /dev/null
@@ -1,7 +0,0 @@
-description "Dummy X server providing DISPLAY=:0.0"
-
-expect fork
-
-script
-    /usr/bin/Xorg -noreset +extension GLX +extension RANDR +extension RENDER -logfile /var/log/Xorg.log :0 &
-end script
diff --git a/test/opengl/vagrant/provision/etc/systemd/system/xdummy.service b/test/opengl/vagrant/provision/etc/systemd/system/xdummy.service
deleted file mode 100644
index 8d0ce1a3c4d9..000000000000
--- a/test/opengl/vagrant/provision/etc/systemd/system/xdummy.service
+++ /dev/null
@@ -1,6 +0,0 @@
-[Unit]
-Description=Dummy X server providing DISPLAY=:0.0"
-
-[Service]
-Type=simple
-ExecStart=/usr/bin/Xorg -noreset +extension GLX +extension RANDR +extension RENDER -config /dev/null -logfile /var/log/Xorg.log :0
diff --git a/test/opengl/vagrant/provision/usr/share/X11/xorg.conf.d/xdummy.conf b/test/opengl/vagrant/provision/usr/share/X11/xorg.conf.d/xdummy.conf
deleted file mode 100644
index d31d944c32f3..000000000000
--- a/test/opengl/vagrant/provision/usr/share/X11/xorg.conf.d/xdummy.conf
+++ /dev/null
@@ -1,137 +0,0 @@
-# This xorg configuration file is meant to be used by xpra
-# to start a dummy X11 server.
-# For details, please see:
-# https://xpra.org/Xdummy.html
-
-Section "ServerFlags"
-  Option "DontVTSwitch" "true"
-  Option "AllowMouseOpenFail" "true"
-  Option "PciForceNone" "true"
-  Option "AutoEnableDevices" "false"
-  Option "AutoAddDevices" "false"
-EndSection
-
-Section "InputDevice"
-  Identifier "dummy_mouse"
-  Option "CorePointer" "true"
-  Driver "void"
-EndSection
-
-Section "InputDevice"
-  Identifier "dummy_keyboard"
-  Option "CoreKeyboard" "true"
-  Driver "void"
-EndSection
-
-Section "Device"
-  Identifier "dummy_videocard"
-  Driver "dummy"
-  Option "ConstantDPI" "true"
-  #VideoRam 4096000
-  #VideoRam 256000
-  VideoRam 192000
-EndSection
-
-Section "Monitor"
-  Identifier "dummy_monitor"
-  HorizSync   5.0 - 1000.0
-  VertRefresh 5.0 - 200.0
-  #This can be used to get a specific DPI, but only for the default resolution:
-  #DisplaySize 508 317
-  #NOTE: the highest modes will not work without increasing the VideoRam
-  # for the dummy video card.
-  Modeline "32768x32768" 15226.50 32768 35800 39488 46208 32768 32771 32781 32953
-  Modeline "32768x16384" 7516.25 32768 35544 39192 45616 16384 16387 16397 16478
-  Modeline "16384x8192" 2101.93 16384 16416 24400 24432 8192 8390 8403 8602
-  Modeline "8192x4096" 424.46 8192 8224 9832 9864 4096 4195 4202 4301
-  Modeline "5496x1200" 199.13 5496 5528 6280 6312 1200 1228 1233 1261
-  Modeline "5280x1080" 169.96 5280 5312 5952 5984 1080 1105 1110 1135
-  Modeline "5280x1200" 191.40 5280 5312 6032 6064 1200 1228 1233 1261
-  Modeline "5120x3200" 199.75 5120 5152 5904 5936 3200 3277 3283 3361
-  Modeline "4800x1200" 64.42 4800 4832 5072 5104 1200 1229 1231 1261
-  Modeline "3840x2880" 133.43 3840 3872 4376 4408 2880 2950 2955 3025
-  Modeline "3840x2560" 116.93 3840 3872 4312 4344 2560 2622 2627 2689
-  Modeline "3840x2048" 91.45 3840 3872 4216 4248 2048 2097 2101 2151
-  Modeline "3840x1080" 100.38 3840 3848 4216 4592 1080 1081 1084 1093
-  Modeline "3600x1200" 106.06 3600 3632 3984 4368 1200 1201 1204 1214
-  Modeline "3288x1080" 39.76 3288 3320 3464 3496 1080 1106 1108 1135
-  Modeline "2048x2048" 49.47 2048 2080 2264 2296 2048 2097 2101 2151
-  Modeline "2048x1536" 80.06 2048 2104 2312 2576 1536 1537 1540 1554
-  Modeline "2560x1600" 47.12 2560 2592 2768 2800 1600 1639 1642 1681
-  Modeline "2560x1440" 42.12 2560 2592 2752 2784 1440 1475 1478 1513
-  Modeline "1920x1440" 69.47 1920 1960 2152 2384 1440 1441 1444 1457
-  Modeline "1920x1200" 26.28 1920 1952 2048 2080 1200 1229 1231 1261
-  Modeline "1920x1080" 23.53 1920 1952 2040 2072 1080 1106 1108 1135
-  Modeline "1680x1050" 20.08 1680 1712 1784 1816 1050 1075 1077 1103
-  Modeline "1600x1200" 22.04 1600 1632 1712 1744 1200 1229 1231 1261
-  Modeline "1600x900" 33.92 1600 1632 1760 1792 900 921 924 946
-  Modeline "1440x900" 30.66 1440 1472 1584 1616 900 921 924 946
-  ModeLine "1366x768" 72.00 1366 1414 1446 1494  768 771 777 803
-  Modeline "1280x1024" 31.50 1280 1312 1424 1456 1024 1048 1052 1076
-  Modeline "1280x800" 24.15 1280 1312 1400 1432 800 819 822 841
-  Modeline "1280x768" 23.11 1280 1312 1392 1424 768 786 789 807
-  Modeline "1360x768" 24.49 1360 1392 1480 1512 768 786 789 807
-  Modeline "1024x768" 18.71 1024 1056 1120 1152 768 786 789 807
-  Modeline "768x1024" 19.50 768 800 872 904 1024 1048 1052 1076
-
-
-  #common resolutions for android devices (both orientations):
-  Modeline "800x1280" 25.89 800 832 928 960 1280 1310 1315 1345
-  Modeline "1280x800" 24.15 1280 1312 1400 1432 800 819 822 841
-  Modeline "720x1280" 30.22 720 752 864 896 1280 1309 1315 1345
-  Modeline "1280x720" 27.41 1280 1312 1416 1448 720 737 740 757
-  Modeline "768x1024" 24.93 768 800 888 920 1024 1047 1052 1076
-  Modeline "1024x768" 23.77 1024 1056 1144 1176 768 785 789 807
-  Modeline "600x1024" 19.90 600 632 704 736 1024 1047 1052 1076
-  Modeline "1024x600" 18.26 1024 1056 1120 1152 600 614 617 631
-  Modeline "536x960" 16.74 536 568 624 656 960 982 986 1009
-  Modeline "960x536" 15.23 960 992 1048 1080 536 548 551 563
-  Modeline "600x800" 15.17 600 632 688 720 800 818 822 841
-  Modeline "800x600" 14.50 800 832 880 912 600 614 617 631
-  Modeline "480x854" 13.34 480 512 560 592 854 873 877 897
-  Modeline "848x480" 12.09 848 880 920 952 480 491 493 505
-  Modeline "480x800" 12.43 480 512 552 584 800 818 822 841
-  Modeline "800x480" 11.46 800 832 872 904 480 491 493 505
-  #resolutions for android devices (both orientations)
-  #minus the status bar
-  #38px status bar (and width rounded up)
-  Modeline "800x1242" 25.03 800 832 920 952 1242 1271 1275 1305
-  Modeline "1280x762" 22.93 1280 1312 1392 1424 762 780 783 801
-  Modeline "720x1242" 29.20 720 752 856 888 1242 1271 1276 1305
-  Modeline "1280x682" 25.85 1280 1312 1408 1440 682 698 701 717
-  Modeline "768x986" 23.90 768 800 888 920 986 1009 1013 1036
-  Modeline "1024x730" 22.50 1024 1056 1136 1168 730 747 750 767
-  Modeline "600x986" 19.07 600 632 704 736 986 1009 1013 1036
-  Modeline "1024x562" 17.03 1024 1056 1120 1152 562 575 578 591
-  Modeline "536x922" 16.01 536 568 624 656 922 943 947 969
-  Modeline "960x498" 14.09 960 992 1040 1072 498 509 511 523
-  Modeline "600x762" 14.39 600 632 680 712 762 779 783 801
-  Modeline "800x562" 13.52 800 832 880 912 562 575 578 591
-  Modeline "480x810" 12.59 480 512 552 584 810 828 832 851
-  Modeline "848x442" 11.09 848 880 920 952 442 452 454 465
-  Modeline "480x762" 11.79 480 512 552 584 762 779 783 801
-EndSection
-
-Section "Screen"
-  Identifier "dummy_screen"
-  Device "dummy_videocard"
-  Monitor "dummy_monitor"
-  DefaultDepth 24
-  SubSection "Display"
-    Viewport 0 0
-    Depth 24
-    #Modes "32768x32768" "32768x16384" "16384x8192" "8192x4096" "5120x3200" "3840x2880" "3840x2560" "3840x2048" "2048x2048" "2560x1600" "1920x1440" "1920x1200" "1920x1080" "1600x1200" "1680x1050" "1600x900" "1400x1050" "1440x900" "1280x1024" "1366x768" "1280x800" "1024x768" "1024x600" "800x600" "320x200"
-    Modes "5120x3200" "3840x2880" "3840x2560" "3840x2048" "2048x2048" "2560x1600" "1920x1440" "1920x1200" "1920x1080" "1600x1200" "1680x1050" "1600x900" "1400x1050" "1440x900" "1280x1024" "1366x768" "1280x800" "1024x768" "1024x600" "800x600" "320x200"
-    #Virtual 32000 32000
-    #Virtual 16384 8192
-    Virtual 8192 4096
-    #Virtual 5120 3200
-  EndSubSection
-EndSection
-
-Section "ServerLayout"
-  Identifier   "dummy_layout"
-  Screen       "dummy_screen"
-  InputDevice  "dummy_mouse"
-  InputDevice  "dummy_keyboard"
-EndSection
diff --git a/test/opengl/varying.cpp b/test/opengl/varying.cpp
deleted file mode 100644
index 314136215c94..000000000000
--- a/test/opengl/varying.cpp
+++ /dev/null
@@ -1,218 +0,0 @@
-#include "Halide.h"
-#include <stdio.h>
-
-#include "testing.h"
-
-using namespace Halide;
-using namespace Halide::Internal;
-
-// This test exercises several use cases for the GLSL varying attributes
-// feature. This feature detects expressions that are linear in terms of the
-// loop variables of a .glsl(..) scheduled Func and uses graphics pipeline
-// interpolation to evaluate the expressions instead of evaluating them per
-// fragment in the Halide generated fragment shader. Common examples are texture
-// coordinates interpolated across a Func domain or texture coordinates
-// transformed by a matrix and interpolated across the domain. Both cases arise
-// when GLSL shaders are ported to Halide.
-
-// This is a mutator that injects code that counts the number of variables
-// tagged .varying
-#ifdef _MSC_VER
-#define DLLEXPORT __declspec(dllexport)
-#else
-#define DLLEXPORT
-#endif
-
-// This global variable is used to count the number of unique varying attribute
-// variables that appear in the lowered Halide IR.
-std::set<std::string> varyings;
-
-// This function is a HalideExtern used to add variables to the set. The tests
-// below check the total number of unique variables found--not the specific
-// names of the variables which are arbitrary.
-extern "C" DLLEXPORT const Variable *record_varying(const Variable *op) {
-    if (varyings.find(op->name) == varyings.end()) {
-        fprintf(stderr, "Found varying attribute: %s\n", op->name.c_str());
-        varyings.insert(op->name);
-    }
-    return op;
-}
-HalideExtern_1(const Variable *, record_varying, const Variable *);
-
-// This visitor inserts the above function in the IR tree.
-class CountVarying : public IRMutator {
-    using IRMutator::visit;
-
-    Expr visit(const Variable *op) override {
-        Expr expr = IRMutator::visit(op);
-        if (ends_with(op->name, ".varying")) {
-            expr = record_varying(op);
-        }
-        return expr;
-    }
-};
-
-bool perform_test(const char *label, const Target target, Func f, int expected_nvarying, float tol, std::function<float(int x, int y, int c)> expected_val) {
-    fprintf(stderr, "%s\n", label);
-
-    Buffer<float> out(8, 8, 3);
-
-    varyings.clear();
-    f.add_custom_lowering_pass(new CountVarying);
-    f.realize(out, target);
-
-    // Check for the correct number of varying attributes
-    if ((int)varyings.size() != expected_nvarying) {
-        fprintf(stderr,
-                "%s: Error: wrong number of varying attributes: %d should be %d\n",
-                label, (int)varyings.size(), expected_nvarying);
-        return false;
-    }
-
-    // Check for correct result values
-    out.copy_to_host();
-
-    if (!Testing::check_result<float>(out, tol, expected_val)) {
-        return false;
-    }
-
-    fprintf(stderr, "%s Passed!\n", label);
-    return true;
-}
-
-// This is a simple test case where there are two expressions that are not
-// linearly varying in terms of a loop variable and one expression that is.
-bool test0(const Target target, Var &x, Var &y, Var &c) {
-    float p_value = 8.0f;
-    Param<float> p("p");
-    p.set(p_value);
-
-    Func f0("f0");
-    f0(x, y, c) = mux(c, {4.0f,                       // Constant term
-                          p * 10.0f,                  // Linear expression not in terms of a loop parameter
-                          cast<float>(x) * 100.0f});  // Linear expression in terms of x
-
-    f0.bound(c, 0, 3);
-    f0.glsl(x, y, c);
-    return perform_test("Test0", target, f0, 2, 0.0f, [&](int x, int y, int c) {
-                switch (c) {
-                case 0: return 4.0f;
-                case 1: return p_value * 10.0f;
-                default: return static_cast<float>(x) * 100.0f;
-                } });
-}
-
-struct CoordXform {
-    const float th = 3.141592f / 8.0f;
-    const float s_th = sinf(th);
-    const float c_th = cosf(th);
-    const float m[6] = {
-        c_th, -s_th, 0.0f,
-        s_th, c_th, 0.0f};
-    Param<float> m0, m1, m2, m3, m4, m5;
-    CoordXform()
-        : m0("m0"), m1("m1"), m2("m2"), m3("m3"), m4("m4"), m5("m5") {
-        m0.set(m[0]);
-        m1.set(m[1]);
-        m2.set(m[2]);
-        m3.set(m[3]);
-        m4.set(m[4]);
-        m5.set(m[5]);
-    }
-};
-
-// This is a more complicated test case where several expressions are linear
-// in all of the loop variables. This is the coordinate transformation case
-bool test1(const Target target, Var &x, Var &y, Var &c) {
-    struct CoordXform m;
-    Func f1("f1");
-    f1(x, y, c) = mux(c, {m.m0 * x + m.m1 * y + m.m2,
-                          m.m3 * x + m.m4 * y + m.m5,
-                          1.0f});
-
-    f1.bound(c, 0, 3);
-    f1.glsl(x, y, c);
-
-    return perform_test("Test1", target, f1, 4, 0.000001f, [&](int x, int y, int c) {
-                switch (c) {
-                    case 0: return m.m[0] * x + m.m[1] * y + m.m[2];
-                    case 1: return m.m[3] * x + m.m[4] * y + m.m[5];
-                    default: return 1.0f;
-                } });
-}
-
-// The feature is supposed to find linearly varying sub-expressions as well
-// so for example, if the above expressions are wrapped in a non-linear
-// function like sqrt, they should still be extracted.
-bool test2(const Target target, Var &x, Var &y, Var &c) {
-    struct CoordXform m;
-    Func f2("f2");
-    f2(x, y, c) = mux(c, {sqrt(m.m0 * x + m.m1 * y + m.m2),
-                          sqrt(m.m3 * x + m.m4 * y + m.m5),
-                          1.0f});
-    f2.bound(c, 0, 3);
-    f2.glsl(x, y, c);
-
-    return perform_test("Test2", target, f2, 4, 0.000001f, [&](int x, int y, int c) {
-                switch (c) {
-                    case 0: return sqrtf(m.m[0] * x + m.m[1] * y + m.m[2]);
-                    case 1: return sqrtf(m.m[3] * x + m.m[4] * y + m.m[5]);
-                    default: return 1.0f;
-                } });
-}
-
-// This case tests a large expression linearly varying in terms of a loop
-// variable
-bool test3(const Target target, Var &x, Var &y, Var &c) {
-    float p_value = 8.0f;
-    Param<float> p("p");
-    p.set(p_value);
-    Expr foo = p;
-    for (int i = 0; i < 10; i++) {
-        foo = foo + foo + foo;
-    }
-    foo = x + foo;
-
-    float foo_value = p_value;
-    for (int i = 0; i < 10; i++) {
-        foo_value = foo_value + foo_value + foo_value;
-    }
-
-    Func f3("f3");
-    f3(x, y, c) = mux(c, {foo, 1.0f, 2.0f});
-
-    f3.bound(c, 0, 3);
-    f3.glsl(x, y, c);
-
-    return perform_test("Test3", target, f3, 2, 0.000001f, [&](int x, int y, int c) {
-                switch (c) {
-                    case 0: return (float)x + foo_value;
-                    case 1: return 1.0f;
-                    default: return 2.0f;
-                } });
-}
-
-int main() {
-    // This test must be run with an OpenGL target.
-    const Target target = get_jit_target_from_environment().with_feature(Target::OpenGL);
-
-    Var x("x");
-    Var y("y");
-    Var c("c");
-
-    bool pass = true;
-    pass &= test0(target, x, y, c);
-    pass &= test1(target, x, y, c);
-    pass &= test2(target, x, y, c);
-    pass &= test3(target, x, y, c);
-    if (!pass) {
-        return 1;
-    }
-
-    // The test will return early on error.
-    fprintf(stderr, "Success!\n");
-
-    // This test may abort with the message "Failed to free device buffer" due
-    // to https://github.com/halide/Halide/issues/559
-    return 0;
-}