diff --git a/.github/workflows/presubmit.yml b/.github/workflows/presubmit.yml index e7d50e4d2891..ec763c9b0935 100644 --- a/.github/workflows/presubmit.yml +++ b/.github/workflows/presubmit.yml @@ -49,6 +49,5 @@ jobs: (cd test/error && comm -23 <(ls *.{c,cpp} | sort) <(grep -P '^\s*#?\s*[A-Za-z0-9_.]+$' CMakeLists.txt | tr -d '# ' | sort) | tee missing_files && [ ! -s missing_files ]) (cd test/generator && comm -23 <(ls *.{c,cpp} | sort) <(grep -P '^\s*#?\s*[A-Za-z0-9_.]+$' CMakeLists.txt | tr -d '# ' | sort) | tee missing_files && [ ! -s missing_files ]) (cd test/failing_with_issue && comm -23 <(ls *.{c,cpp} | sort) <(grep -P '^\s*#?\s*[A-Za-z0-9_.]+$' CMakeLists.txt | tr -d '# ' | sort) | tee missing_files && [ ! -s missing_files ]) - (cd test/opengl && comm -23 <(ls *.{c,cpp} | sort) <(grep -P '^\s*#?\s*[A-Za-z0-9_.]+$' CMakeLists.txt | tr -d '# ' | sort) | tee missing_files && [ ! -s missing_files ]) (cd test/performance && comm -23 <(ls *.{c,cpp} | sort) <(grep -P '^\s*#?\s*[A-Za-z0-9_.]+$' CMakeLists.txt | tr -d '# ' | sort) | tee missing_files && [ ! -s missing_files ]) (cd test/warning && comm -23 <(ls *.{c,cpp} | sort) <(grep -P '^\s*#?\s*[A-Za-z0-9_.]+$' CMakeLists.txt | tr -d '# ' | sort) | tee missing_files && [ ! -s missing_files ]) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 9e288dd687d9..f7e1dbea8c97 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -148,12 +148,6 @@ jobs: libpng-dev \ ninja-build - # TODO(srj): OpenGL is only needed to build the opengl tests (which we don't even run)... - sudo apt-get install \ - freeglut3-dev \ - libglu1-mesa-dev \ - mesa-common-dev - - name: Configure MacOS Host if: startsWith(matrix.host_os, 'macos') shell: bash @@ -193,12 +187,6 @@ jobs: libjpeg-dev:i386 \ libpng-dev:i386 \ - # TODO(srj): OpenGL is only needed to build the opengl tests (which we don't even run)... - sudo apt-get install \ - freeglut3-dev:i386 \ - libglu1-mesa-dev:i386 \ - mesa-common-dev:i386 - - name: Configure Arm32 Crosscompilation if: matrix.target_os == 'linux' && matrix.target_arch == 'arm' && matrix.target_bits == 32 shell: bash @@ -449,9 +437,8 @@ jobs: TEST_GROUPS_SERIAL="tutorial" # performance is never going to be reliable on VMs. - # opengl won't work on the buildbots. # auto_schedule is just flaky. - TEST_GROUPS_BROKEN="performance opengl auto_schedule" + TEST_GROUPS_BROKEN="performance auto_schedule" if [[ ${{matrix.target_bits}} == 32 ]]; then # TODO: Skip testing apps on 32-bit systems for now; @@ -487,9 +474,8 @@ jobs: TEST_GROUPS_SERIAL="tutorial" # performance is never going to be reliable on VMs. - # opengl won't work on the buildbots. # auto_schedule is just flaky. - TEST_GROUPS_BROKEN="performance|opengl|auto_schedule" + TEST_GROUPS_BROKEN="performance|auto_schedule" export TEST_TMPDIR="${HALIDE_TEMP_DIR}" cd ${HALIDE_BUILD_DIR} diff --git a/Makefile b/Makefile index 8ad0a3970a0a..921f8e388e3b 100644 --- a/Makefile +++ b/Makefile @@ -466,7 +466,6 @@ SOURCE_FILES = \ ImageParam.cpp \ InferArguments.cpp \ InjectHostDevBufferCopies.cpp \ - InjectOpenGLIntrinsics.cpp \ Inline.cpp \ InlineReductions.cpp \ IntegerDivisionTable.cpp \ @@ -560,7 +559,6 @@ SOURCE_FILES = \ UnsafePromises.cpp \ Util.cpp \ Var.cpp \ - VaryingAttributes.cpp \ VectorizeLoops.cpp \ WasmExecutor.cpp \ WrapCalls.cpp @@ -645,7 +643,6 @@ HEADER_FILES = \ ImageParam.h \ InferArguments.h \ InjectHostDevBufferCopies.h \ - InjectOpenGLIntrinsics.h \ Inline.h \ InlineReductions.h \ IntegerDivisionTable.h \ @@ -728,7 +725,6 @@ HEADER_FILES = \ UnsafePromises.h \ Util.h \ Var.h \ - VaryingAttributes.h \ VectorizeLoops.h \ WrapCalls.h @@ -779,7 +775,6 @@ RUNTIME_CPP_COMPONENTS = \ msan \ msan_stubs \ opencl \ - opengl \ openglcompute \ opengl_egl_context \ opengl_glx_context \ @@ -851,7 +846,6 @@ RUNTIME_EXPORTED_INCLUDES = $(INCLUDE_DIR)/HalideRuntime.h \ $(INCLUDE_DIR)/HalideRuntimeHexagonDma.h \ $(INCLUDE_DIR)/HalideRuntimeHexagonHost.h \ $(INCLUDE_DIR)/HalideRuntimeOpenCL.h \ - $(INCLUDE_DIR)/HalideRuntimeOpenGL.h \ $(INCLUDE_DIR)/HalideRuntimeOpenGLCompute.h \ $(INCLUDE_DIR)/HalideRuntimeMetal.h \ $(INCLUDE_DIR)/HalideRuntimeQurt.h \ @@ -1110,14 +1104,11 @@ CORRECTNESS_TESTS = $(shell ls $(ROOT_DIR)/test/correctness/*.cpp) $(shell ls $( PERFORMANCE_TESTS = $(shell ls $(ROOT_DIR)/test/performance/*.cpp) ERROR_TESTS = $(shell ls $(ROOT_DIR)/test/error/*.cpp) WARNING_TESTS = $(shell ls $(ROOT_DIR)/test/warning/*.cpp) -OPENGL_TESTS := $(shell ls $(ROOT_DIR)/test/opengl/*.cpp) GENERATOR_EXTERNAL_TESTS := $(shell ls $(ROOT_DIR)/test/generator/*test.cpp) GENERATOR_EXTERNAL_TEST_GENERATOR := $(shell ls $(ROOT_DIR)/test/generator/*_generator.cpp) TUTORIALS = $(filter-out %_generate.cpp, $(shell ls $(ROOT_DIR)/tutorial/*.cpp)) AUTO_SCHEDULE_TESTS = $(shell ls $(ROOT_DIR)/test/auto_schedule/*.cpp) --include $(OPENGL_TESTS:$(ROOT_DIR)/test/opengl/%.cpp=$(BUILD_DIR)/test_opengl_%.d) - test_correctness: $(CORRECTNESS_TESTS:$(ROOT_DIR)/test/correctness/%.cpp=quiet_correctness_%) $(CORRECTNESS_TESTS:$(ROOT_DIR)/test/correctness/%.c=quiet_correctness_%) test_performance: $(PERFORMANCE_TESTS:$(ROOT_DIR)/test/performance/%.cpp=performance_%) test_error: $(ERROR_TESTS:$(ROOT_DIR)/test/error/%.cpp=error_%) @@ -1125,7 +1116,6 @@ test_warning: $(WARNING_TESTS:$(ROOT_DIR)/test/warning/%.cpp=warning_%) test_tutorial: $(TUTORIALS:$(ROOT_DIR)/tutorial/%.cpp=tutorial_%) test_valgrind: $(CORRECTNESS_TESTS:$(ROOT_DIR)/test/correctness/%.cpp=valgrind_%) test_avx512: $(CORRECTNESS_TESTS:$(ROOT_DIR)/test/correctness/%.cpp=avx512_%) -test_opengl: $(OPENGL_TESTS:$(ROOT_DIR)/test/opengl/%.cpp=opengl_%) test_auto_schedule: test_mullapudi2016 test_li2018 test_adams2019 .PHONY: test_correctness_multi_gpu @@ -1230,7 +1220,6 @@ ALL_TESTS = test_internal test_correctness test_error test_tutorial test_warning # For generator tests they time the compile time only. The times are recorded in CSV files. time_compilation_correctness: init_time_compilation_correctness $(CORRECTNESS_TESTS:$(ROOT_DIR)/test/correctness/%.cpp=time_compilation_test_%) time_compilation_performance: init_time_compilation_performance $(PERFORMANCE_TESTS:$(ROOT_DIR)/test/performance/%.cpp=time_compilation_performance_%) -time_compilation_opengl: init_time_compilation_opengl $(OPENGL_TESTS:$(ROOT_DIR)/test/opengl/%.cpp=time_compilation_opengl_%) time_compilation_generator: init_time_compilation_generator $(GENERATOR_TESTS:$(ROOT_DIR)/test/generator/%_aottest.cpp=time_compilation_generator_%) init_time_compilation_%: @@ -1250,14 +1239,6 @@ build_tests: $(CORRECTNESS_TESTS:$(ROOT_DIR)/test/correctness/%.cpp=$(BIN_DIR)/c $(GENERATOR_EXTERNAL_TESTS:$(ROOT_DIR)/test/generator/%_jittest.cpp=$(BIN_DIR)/generator_jit_%) \ $(AUTO_SCHEDULE_TESTS:$(ROOT_DIR)/test/auto_schedule/%.cpp=$(BIN_DIR)/auto_schedule_%) -# OpenGL doesn't build on every host platform we support (eg. ARM). -.PHONY: build_opengl_tests -build_opengl_tests: $(OPENGL_TESTS:$(ROOT_DIR)/test/opengl/%.cpp=$(BIN_DIR)/opengl_%) - -ifneq ($(WITH_OPENGL),) -build_tests: build_opengl_tests -endif - clean_generator: rm -rf $(BIN_DIR)/*.generator rm -rf $(BIN_DIR)/*/runtime.a @@ -1321,9 +1302,6 @@ $(BIN_DIR)/error_%: $(ROOT_DIR)/test/error/%.cpp $(BIN_DIR)/libHalide.$(SHARED_E $(BIN_DIR)/warning_%: $(ROOT_DIR)/test/warning/%.cpp $(BIN_DIR)/libHalide.$(SHARED_EXT) $(INCLUDE_DIR)/Halide.h $(CXX) $(TEST_CXX_FLAGS) $(OPTIMIZE_FOR_BUILD_TIME) $< -I$(INCLUDE_DIR) $(TEST_LD_FLAGS) -o $@ -$(BIN_DIR)/opengl_%: $(ROOT_DIR)/test/opengl/%.cpp $(BIN_DIR)/libHalide.$(SHARED_EXT) $(INCLUDE_DIR)/Halide.h $(INCLUDE_DIR)/HalideRuntime.h $(INCLUDE_DIR)/HalideRuntimeOpenGL.h - $(CXX) $(TEST_CXX_FLAGS) $(OPTIMIZE_FOR_BUILD_TIME) $< -I$(INCLUDE_DIR) -I$(SRC_DIR) $(TEST_LD_FLAGS) $(OPENGL_LD_FLAGS) -o $@ -MMD -MF $(BUILD_DIR)/test_opengl_$*.d - # Auto schedule tests that link against libHalide $(BIN_DIR)/auto_schedule_%: $(ROOT_DIR)/test/auto_schedule/%.cpp $(BIN_DIR)/libHalide.$(SHARED_EXT) $(INCLUDE_DIR)/Halide.h $(CXX) $(TEST_CXX_FLAGS) $(OPTIMIZE_FOR_BUILD_TIME) $< -I$(INCLUDE_DIR) $(TEST_LD_FLAGS) -o $@ @@ -1874,11 +1852,6 @@ warning_%: $(BIN_DIR)/warning_% cd $(TMP_DIR) ; $(CURDIR)/$< 2>&1 | egrep --q "^Warning" @-echo -opengl_%: $(BIN_DIR)/opengl_% - @-mkdir -p $(TMP_DIR) - cd $(TMP_DIR) ; $(CURDIR)/$< 2>&1 - @-echo - generator_jit_%: $(BIN_DIR)/generator_jit_% @-mkdir -p $(TMP_DIR) cd $(TMP_DIR) ; $(CURDIR)/$< @@ -1928,9 +1901,6 @@ time_compilation_test_%: $(BIN_DIR)/test_% time_compilation_performance_%: $(BIN_DIR)/performance_% $(TIME_COMPILATION) compile_times_performance.csv make -f $(THIS_MAKEFILE) $(@:time_compilation_performance_%=performance_%) -time_compilation_opengl_%: $(BIN_DIR)/opengl_% - $(TIME_COMPILATION) compile_times_opengl.csv make -f $(THIS_MAKEFILE) $(@:time_compilation_opengl_%=opengl_%) - time_compilation_generator_%: $(BIN_DIR)/%.generator $(TIME_COMPILATION) compile_times_generator.csv make -f $(THIS_MAKEFILE) $(@:time_compilation_generator_%=$(FILTERS_DIR)/%.a) diff --git a/README.md b/README.md index 6fe672968347..b80441df5535 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,7 @@ currently targets: - CPU architectures: X86, ARM, MIPS, Hexagon, PowerPC - Operating systems: Linux, Windows, Mac OS X, Android, iOS, Qualcomm QuRT -- GPU Compute APIs: CUDA, OpenCL, OpenGL, OpenGL Compute Shaders, Apple Metal, +- GPU Compute APIs: CUDA, OpenCL, OpenGL Compute Shaders, Apple Metal, Microsoft Direct X 12 Rather than being a standalone programming language, Halide is embedded in C++. @@ -336,140 +336,7 @@ an older XCode which does not default to libc++. # Halide OpenGL/GLSL backend -Halide's OpenGL backend offloads image processing operations to the GPU by -generating GLSL-based fragment shaders. - -Compared to other GPU-based processing options such as CUDA and OpenCL, OpenGL -has two main advantages: it is available on basically every desktop computer and -mobile device, and it is generally well supported across different hardware -vendors. - -The main disadvantage of OpenGL as an image processing framework is that the -computational capabilities of fragment shaders are quite restricted. In general, -the processing model provided by OpenGL is most suitable for filters where each -output pixel can be expressed as a simple function of the input pixels. This -covers a wide range of interesting operations like point-wise filters and -convolutions; but a few common image processing operations such as histograms or -recursive filters are notoriously hard to express in GLSL. - -#### Writing OpenGL-Based Filters - -To enable code generation for OpenGL, include `opengl` in the target specifier -passed to Halide. Since OpenGL shaders are limited in their computational power, -you must also specify a CPU target for those parts of the filter that cannot or -should not be computed on the GPU. Examples of valid target specifiers are - -``` -host-opengl -x86-opengl-debug -``` - -Adding `debug`, as in the second example, adds additional logging output and is -highly recommended during development. - -By default, filters compiled for OpenGL targets run completely on the CPU. -Execution on the GPU must be enabled for individual Funcs by appropriate -scheduling calls. - -GLSL fragment shaders implicitly iterate over two spatial dimensions x,y and the -color channel. Due to the way color channels handled in GLSL, only filters for -which the color index is a compile-time constant can be scheduled. The main -consequence is that the range of color variables must be explicitly specified -for both input and output buffers before scheduling: - -``` -ImageParam input; -Func f; -Var x, y, c; -f(x, y, c) = ...; - -input.set_bounds(2, 0, 3); // specify color range for input -f.bound(c, 0, 3); // and output -f.glsl(x, y, c); -``` - -#### JIT Compilation - -For JIT compilation Halide attempts to load the system libraries for opengl and -creates a new context to use for each module. Windows is not yet supported. - -Examples for JIT execution of OpenGL-based filters can be found in test/opengl. - -#### AOT Compilation - -When AOT (ahead-of-time) compilation is used, Halide generates OpenGL-enabled -object files that can be linked to and called from a host application. In -general, this is fairly straightforward, but a few things must be taken care of. - -On Linux, OS X, and Android, Halide creates its own OpenGL context unless the -current thread already has an active context. On other platforms you have to -link implementations of the following two functions with your Halide code: - -``` -extern "C" int halide_opengl_create_context(void *) { - return 0; // if successful -} - -extern "C" void *halide_opengl_get_proc_addr(void *, const char *name) { - ... -} -``` - -Halide allocates and deletes textures as necessary. Applications may manage the -textures by hand by setting the `halide_buffer_t::device` field; this is most -useful for reusing image data that is already stored in textures. Some -rudimentary checks are performed to ensure that externally allocated textures -have the correct format, but in general that's the responsibility of the -application. - -It is possible to let render directly to the current framebuffer; to do this, -set the `dev` field of the output buffer to the value returned by -`halide_opengl_output_client_bound`. The example in apps/HelloAndroidGL -demonstrates this technique. - -Some operating systems can delete the OpenGL context of suspended applications. -If this happens, Halide needs to re-initialize itself with the new context after -the application resumes. Call `halide_opengl_context_lost` to reset Halide's -OpenGL state after this has happened. - -#### Limitations - -The current implementation of the OpenGL backend targets the common subset of -OpenGL 2.0 and OpenGL ES 2.0 which is widely available on both mobile devices -and traditional computers. As a consequence, only a subset of the Halide -language can be scheduled to run using OpenGL. Some important limitations are: - -- Reductions cannot be implemented in GLSL and must be run on the CPU. - -- OpenGL ES 2.0 only supports uint8 buffers. - - Support for floating point texture is available, but requires OpenGL (ES) 3.0 - or the texture_float extension, which may not work on all mobile devices. - -- OpenGL ES 2.0 has very limited support for integer arithmetic. For maximum - compatibility, consider doing all computations using floating point, even when - using integer textures. - -- Only 2D images with 3 or 4 color channels can be scheduled. Images with one or - two channels require OpenGL (ES) 3.0 or the texture_rg extension. - -- Not all builtin functions provided by Halide are currently supported, for - example `fast_log`, `fast_exp`, `fast_pow`, `reinterpret`, bit operations, - `random_float`, `random_int` cannot be used in GLSL code. - -The maximum texture size in OpenGL is `GL_MAX_TEXTURE_SIZE`, which is often -smaller than the image of interest; on mobile devices, for example, -`GL_MAX_TEXTURE_SIZE` is commonly 2048. Tiling must be used to process larger -images. - -Planned features: - -- Support for half-float textures and arithmetic - -- Support for integer textures and arithmetic - -(Note that OpenGL Compute Shaders are supported with a separate OpenGLCompute -backend.) +TODO(https://github.com/halide/Halide/issues/5633): update this for OpenGLCompute, which is staying # Halide for Hexagon HVX diff --git a/README_cmake.md b/README_cmake.md index d078c41775b0..421d93007bee 100644 --- a/README_cmake.md +++ b/README_cmake.md @@ -392,7 +392,6 @@ apply when `WITH_TESTS=ON`: | `WITH_TEST_ERROR` | `ON` | enable the expected-error tests | | `WITH_TEST_WARNING` | `ON` | enable the expected-warning tests | | `WITH_TEST_PERFORMANCE` | `ON` | enable performance testing | -| `WITH_TEST_OPENGL` | `OFF` | enable the OpenGL tests | | `WITH_TEST_GENERATOR` | `ON` | enable the AOT generator tests | The following options enable/disable various LLVM backends (they correspond to @@ -416,7 +415,6 @@ The following options enable/disable various Halide-specific backends: | Option | Default | Description | | --------------------- | ------- | -------------------------------------- | | `TARGET_OPENCL` | `ON` | Enable the OpenCL-C backend | -| `TARGET_OPENGL` | `ON` | Enable the OpenGL/GLSL backend | | `TARGET_METAL` | `ON` | Enable the Metal backend | | `TARGET_D3D12COMPUTE` | `ON` | Enable the Direct3D 12 Compute backend | @@ -466,6 +464,8 @@ If the CMake version is lower than 3.18, the deprecated [`FindCUDA`][findcuda] module will be used instead. It reads the variable `CUDA_TOOLKIT_ROOT_DIR` instead of `CUDAToolkit_ROOT` above. +TODO(https://github.com/halide/Halide/issues/5633): update this section for OpenGLCompute, which needs some (but maybe not all) of this. + When targeting OpenGL, the [`FindOpenGL`][findopengl] and [`FindX11`][findx11] modules will be used to link AOT generated binaries. These modules can be overridden by setting the following variables: diff --git a/apps/CMakeLists.txt b/apps/CMakeLists.txt index 7e0ed08e4763..3effa0221125 100644 --- a/apps/CMakeLists.txt +++ b/apps/CMakeLists.txt @@ -9,7 +9,6 @@ enable_testing() # add_subdirectory(HelloAndroid) # TODO(#5374): missing CMake build # add_subdirectory(HelloAndroidCamera2) # TODO(#5374): missing CMake build -# add_subdirectory(HelloAndroidGL) # TODO(#5374): missing CMake build # add_subdirectory(HelloMatlab) # TODO(#5374): missing CMake build # add_subdirectory(HelloPyTorch) # TODO(#5374): missing CMake build # add_subdirectory(HelloWasm) # TODO(#5374): missing CMake build @@ -24,7 +23,6 @@ add_subdirectory(conv_layer) add_subdirectory(cuda_mat_mul) add_subdirectory(depthwise_separable_conv) add_subdirectory(fft) -add_subdirectory(glsl) add_subdirectory(harris) # add_subdirectory(hexagon_benchmarks) # TODO(#5374): missing CMake build # add_subdirectory(hexagon_dma) # TODO(#5374): missing CMake build @@ -39,7 +37,6 @@ add_subdirectory(max_filter) add_subdirectory(nl_means) # add_subdirectory(nn_ops) # TODO(#5374): missing CMake build # add_subdirectory(onnx) # TODO(#5374): missing CMake build -# add_subdirectory(opengl_demo) # TODO(#5374): missing CMake build # add_subdirectory(openglcompute) # TODO(#5374): missing CMake build add_subdirectory(resize) # add_subdirectory(resnet_50) # TODO(#5374): missing CMake build diff --git a/apps/HelloAndroidGL/AndroidManifest.xml b/apps/HelloAndroidGL/AndroidManifest.xml deleted file mode 100644 index de292f319f7f..000000000000 --- a/apps/HelloAndroidGL/AndroidManifest.xml +++ /dev/null @@ -1,21 +0,0 @@ - - - - - - - - - - - - - - - diff --git a/apps/HelloAndroidGL/ant.properties b/apps/HelloAndroidGL/ant.properties deleted file mode 100644 index b0971e891efd..000000000000 --- a/apps/HelloAndroidGL/ant.properties +++ /dev/null @@ -1,17 +0,0 @@ -# This file is used to override default values used by the Ant build system. -# -# This file must be checked into Version Control Systems, as it is -# integral to the build system of your project. - -# This file is only used by the Ant script. - -# You can use this to override default values such as -# 'source.dir' for the location of your java source folder and -# 'out.dir' for the location of your output folder. - -# You can also use it define how the release builds are signed by declaring -# the following properties: -# 'key.store' for the location of your keystore and -# 'key.alias' for the name of the key to use. -# The password will be asked during the build when you use the 'release' target. - diff --git a/apps/HelloAndroidGL/build.sh b/apps/HelloAndroidGL/build.sh deleted file mode 100755 index d9b1f395dc12..000000000000 --- a/apps/HelloAndroidGL/build.sh +++ /dev/null @@ -1,12 +0,0 @@ -#!/bin/bash -set -e -android update project -p . --target android-17 -cd jni -c++ -std=c++11 halide_gl_filter.cpp -L ../../../bin -lHalide -I ../../../include -ldl -lpthread -lz -HL_TARGET=arm-32-android-opengl-debug DYLD_LIBRARY_PATH=../../../bin LD_LIBRARY_PATH=../../../bin ./a.out -cd .. -pwd -ndk-build -ant debug -adb install -r bin/HelloAndroidGL-debug.apk -adb logcat diff --git a/apps/HelloAndroidGL/build.xml b/apps/HelloAndroidGL/build.xml deleted file mode 100644 index 1e79c7ee52fa..000000000000 --- a/apps/HelloAndroidGL/build.xml +++ /dev/null @@ -1,92 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/apps/HelloAndroidGL/jni/Android.mk b/apps/HelloAndroidGL/jni/Android.mk deleted file mode 100644 index c30cec7bf54b..000000000000 --- a/apps/HelloAndroidGL/jni/Android.mk +++ /dev/null @@ -1,15 +0,0 @@ -LOCAL_PATH := $(call my-dir) - -include $(CLEAR_VARS) - -LOCAL_MODULE := android_halide_gl_native -LOCAL_ARM_MODE := arm -LOCAL_SRC_FILES := android_halide_gl_native.cpp -LOCAL_LDFLAGS := -Ljni -LOCAL_LDLIBS := -lm -llog -landroid -lEGL -lGLESv2 jni/halide_gl_filter.o -LOCAL_STATIC_LIBRARIES := android_native_app_glue -LOCAL_C_INCLUDES := $(LOCAL_PATH)/../../../include - -include $(BUILD_SHARED_LIBRARY) - -$(call import-module,android/native_app_glue) diff --git a/apps/HelloAndroidGL/jni/Application.mk b/apps/HelloAndroidGL/jni/Application.mk deleted file mode 100644 index 56005dabf161..000000000000 --- a/apps/HelloAndroidGL/jni/Application.mk +++ /dev/null @@ -1,3 +0,0 @@ -# The ARMv7 is significanly faster due to the use of the hardware FPU -APP_ABI := armeabi-v7a -APP_PLATFORM := android-17 diff --git a/apps/HelloAndroidGL/jni/android_halide_gl_native.cpp b/apps/HelloAndroidGL/jni/android_halide_gl_native.cpp deleted file mode 100644 index 20e3de6bfe56..000000000000 --- a/apps/HelloAndroidGL/jni/android_halide_gl_native.cpp +++ /dev/null @@ -1,51 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include - -#include "HalideBuffer.h" -#include "HalideRuntimeOpenGL.h" -#include "halide_gl_filter.h" - -#define LOGD(...) __android_log_print(ANDROID_LOG_DEBUG, "halide_native", __VA_ARGS__) -#define LOGE(...) __android_log_print(ANDROID_LOG_ERROR, "halide_native", __VA_ARGS__) - -void *const user_context = NULL; - -extern "C" JNIEXPORT void JNICALL Java_org_halide_1lang_hellohalidegl_HalideGLView_processTextureHalide( - JNIEnv *env, jobject obj, jint dst, jint width, jint height) { - - auto dstBuf = Halide::Runtime::Buffer::make_interleaved(NULL, width, height, 4); - // If dst == 0, let Halide render directly to the current render target. - if (dst == 0) { - int result = halide_opengl_wrap_render_target(user_context, dstBuf); - if (result != 0) { - halide_error(user_context, "halide_opengl_wrap_render_target failed"); - } - } else { - int result = halide_opengl_wrap_texture(user_context, dstBuf, dst); - if (result != 0) { - halide_error(user_context, "halide_opengl_wrap_texture failed"); - } - } - - static float time = 0.0f; - if (int err = halide_gl_filter(time, dstBuf)) { - LOGD("Halide filter failed with error code %d\n", err); - } - time += 1.0f / 16.0f; - - uintptr_t detached = halide_opengl_detach_texture(user_context, dstBuf); - if (detached != dst) { - halide_error(user_context, "halide_opengl_detach_texture failed"); - } -} - -extern "C" JNIEXPORT void JNICALL Java_org_halide_1lang_hellohalidegl_HalideGLView_halideContextLost( - JNIEnv *env, jobject obj) { - - halide_opengl_context_lost(NULL); -} diff --git a/apps/HelloAndroidGL/jni/halide_gl_filter.cpp b/apps/HelloAndroidGL/jni/halide_gl_filter.cpp deleted file mode 100644 index 15e949312539..000000000000 --- a/apps/HelloAndroidGL/jni/halide_gl_filter.cpp +++ /dev/null @@ -1,40 +0,0 @@ -#include "Halide.h" - -using namespace Halide; - -int main(int argc, char **argv) { - Param time; - - const float pi = 3.1415926536; - - Var x, y, c; - Func result; - - Expr kx, ky; - Expr xx, yy; - kx = x / 150.0f; - ky = y / 150.0f; - - xx = kx + sin(time / 3.0f); - yy = ky + sin(time / 2.0f); - - Expr angle; - angle = 2 * pi * sin(time / 20.0f); - kx = kx * cos(angle) - ky * sin(angle); - ky = kx * sin(angle) + ky * cos(angle); - - Expr v = 0.0f; - v += sin((ky + time) / 2.0f); - v += sin((kx + ky + time) / 2.0f); - v += sin(sqrt(xx * xx + yy * yy + 1.0f) + time); - - result(x, y, c) = cast(selecy_by_index(c, {32, cos(pi * v), sin(pi * v)}) * 80 + (255 - 80)); - - result.output_buffer().set_stride(0, 4); - result.bound(c, 0, 4); - result.glsl(x, y, c); - - result.compile_to_file("halide_gl_filter", {time}, "halide_gl_filter"); - - return 0; -} diff --git a/apps/HelloAndroidGL/project.properties b/apps/HelloAndroidGL/project.properties deleted file mode 100644 index a3ee5ab64f5e..000000000000 --- a/apps/HelloAndroidGL/project.properties +++ /dev/null @@ -1,14 +0,0 @@ -# This file is automatically generated by Android Tools. -# Do not modify this file -- YOUR CHANGES WILL BE ERASED! -# -# This file must be checked in Version Control Systems. -# -# To customize properties used by the Ant build system edit -# "ant.properties", and override values to adapt the script to your -# project structure. -# -# To enable ProGuard to shrink and obfuscate your code, uncomment this (available properties: sdk.dir, user.home): -#proguard.config=${sdk.dir}/tools/proguard/proguard-android.txt:proguard-project.txt - -# Project target. -target=android-17 diff --git a/apps/HelloAndroidGL/res/drawable-hdpi/ic_launcher.png b/apps/HelloAndroidGL/res/drawable-hdpi/ic_launcher.png deleted file mode 100644 index 96a442e5b8e9..000000000000 Binary files a/apps/HelloAndroidGL/res/drawable-hdpi/ic_launcher.png and /dev/null differ diff --git a/apps/HelloAndroidGL/res/drawable-ldpi/ic_launcher.png b/apps/HelloAndroidGL/res/drawable-ldpi/ic_launcher.png deleted file mode 100644 index 99238729d875..000000000000 Binary files a/apps/HelloAndroidGL/res/drawable-ldpi/ic_launcher.png and /dev/null differ diff --git a/apps/HelloAndroidGL/res/drawable-mdpi/ic_launcher.png b/apps/HelloAndroidGL/res/drawable-mdpi/ic_launcher.png deleted file mode 100644 index 359047dfa4ed..000000000000 Binary files a/apps/HelloAndroidGL/res/drawable-mdpi/ic_launcher.png and /dev/null differ diff --git a/apps/HelloAndroidGL/res/drawable-xhdpi/ic_launcher.png b/apps/HelloAndroidGL/res/drawable-xhdpi/ic_launcher.png deleted file mode 100644 index 71c6d760f051..000000000000 Binary files a/apps/HelloAndroidGL/res/drawable-xhdpi/ic_launcher.png and /dev/null differ diff --git a/apps/HelloAndroidGL/res/layout/main.xml b/apps/HelloAndroidGL/res/layout/main.xml deleted file mode 100644 index 5a8da6d73556..000000000000 --- a/apps/HelloAndroidGL/res/layout/main.xml +++ /dev/null @@ -1,15 +0,0 @@ - - - - - - \ No newline at end of file diff --git a/apps/HelloAndroidGL/res/values/strings.xml b/apps/HelloAndroidGL/res/values/strings.xml deleted file mode 100644 index 2673566b97f0..000000000000 --- a/apps/HelloAndroidGL/res/values/strings.xml +++ /dev/null @@ -1,4 +0,0 @@ - - - Halide GL Demo - diff --git a/apps/HelloAndroidGL/src/org/halide_lang/hellohalidegl/HelloHalideGL.java b/apps/HelloAndroidGL/src/org/halide_lang/hellohalidegl/HelloHalideGL.java deleted file mode 100644 index 78843d3d498f..000000000000 --- a/apps/HelloAndroidGL/src/org/halide_lang/hellohalidegl/HelloHalideGL.java +++ /dev/null @@ -1,208 +0,0 @@ -package org.halide_lang.hellohalidegl; - -import android.app.Activity; -import android.content.Context; -import android.os.Bundle; -import android.hardware.Camera; -import android.util.Log; -import android.widget.FrameLayout; -import android.view.SurfaceView; -import android.view.Surface; -import android.graphics.Bitmap; -import android.graphics.Canvas; -import android.opengl.GLSurfaceView; -import javax.microedition.khronos.egl.EGLConfig; -import javax.microedition.khronos.opengles.GL10; -import java.nio.ByteBuffer; -import java.nio.FloatBuffer; -import java.nio.ByteOrder; - -class HalideGLView extends GLSurfaceView { - static { - System.loadLibrary("android_halide_gl_native"); - } - private static native void processTextureHalide(int dst, int width, int height); - private static native void halideContextLost(); - - private static final android.opengl.GLES20 gl = new android.opengl.GLES20(); - - // If set to true, let Halide render directly to the framebuffer. - // Otherwise, Halide renders to a texture which we then blit to the - // screen. - private boolean halideDirectRender = true; - - HalideGLView(Context context) { - super(context); - setEGLContextClientVersion(2); - setPreserveEGLContextOnPause(true); - setDebugFlags(DEBUG_CHECK_GL_ERROR); - setRenderer(new MyRenderer()); - } - - class MyRenderer implements GLSurfaceView.Renderer { - private int output; - private int surfaceWidth, surfaceHeight; - private int program; - - private FloatBuffer quad_vertices; - - final String vs_source = - "attribute vec2 position;\n" + - "varying vec2 texpos;\n" + - "void main(void) {\n" + - " gl_Position = vec4(position, 0.0, 1.0);\n" + - " texpos = position * 0.5 + 0.5;\n" + - "}\n"; - final String fs_source = - "uniform sampler2D tex;\n" + - "varying highp vec2 texpos;\n" + - "void main(void) {\n" + - " gl_FragColor = texture2D(tex, texpos.xy);\n" + - "}\n"; - - public MyRenderer() { - final float[] vertices = new float[] { - -1.0f, -1.0f, - 1.0f, -1.0f, - -1.0f, 1.0f, - 1.0f, 1.0f, - }; - quad_vertices = - ByteBuffer.allocateDirect(4 * vertices.length) - .order(ByteOrder.nativeOrder()) - .asFloatBuffer(); - quad_vertices.put(vertices); - } - - /** Compile a single vertex or fragment shader. */ - private int compileShader(int type, String source) { - int shader = gl.glCreateShader(type); - gl.glShaderSource(shader, source); - gl.glCompileShader(shader); - int[] status = new int[1]; - gl.glGetShaderiv(shader, gl.GL_COMPILE_STATUS, status, 0); - if (status[0] == 0) { - String log = gl.glGetShaderInfoLog(shader); - Log.e(HelloHalideGL.TAG, log); - throw new RuntimeException("Compiling shader failed"); - } - return shader; - } - - /** Compile and link simple vertex and fragment shader for rendering - * 2D graphics. */ - private void prepareShaders() { - int vertex_shader = compileShader(gl.GL_VERTEX_SHADER, - vs_source); - int fragment_shader = compileShader(gl.GL_FRAGMENT_SHADER, - fs_source); - - program = gl.glCreateProgram(); - if (program == 0) { - throw new RuntimeException("Invalid GLSL program"); - } - gl.glAttachShader(program, vertex_shader); - gl.glAttachShader(program, fragment_shader); - gl.glBindAttribLocation(program, 0, "position"); - gl.glLinkProgram(program); - - int[] status = new int[1]; - gl.glGetProgramiv(program, gl.GL_LINK_STATUS, status, 0); - if (status[0] == 0) { - String log = gl.glGetProgramInfoLog(program); - Log.e(HelloHalideGL.TAG, log); - throw new RuntimeException("Linking GLSL program failed"); - } - } - - private int createTexture(int w, int h) { - int[] id = new int[1]; - gl.glGenTextures(1, id, 0); - gl.glBindTexture(gl.GL_TEXTURE_2D, id[0]); - gl.glTexParameteri(gl.GL_TEXTURE_2D, gl.GL_TEXTURE_MIN_FILTER, gl.GL_NEAREST); - gl.glTexParameteri(gl.GL_TEXTURE_2D, gl.GL_TEXTURE_MAG_FILTER, gl.GL_NEAREST); - - ByteBuffer buf = ByteBuffer.allocate(w * h * 4); - gl.glTexImage2D(gl.GL_TEXTURE_2D, 0, gl.GL_RGBA, w, h, 0, - gl.GL_RGBA, gl.GL_UNSIGNED_BYTE, buf); - return id[0]; - } - - @Override - public void onSurfaceCreated(GL10 unused, EGLConfig config) { - Log.d("Hello", "onSurfaceCreated"); - prepareShaders(); - } - - @Override - public void onSurfaceChanged(GL10 unused, int w, int h) { - halideContextLost(); - int[] textures = { output }; - gl.glDeleteTextures(1, textures, 0); - output = createTexture(w, h); - surfaceWidth = w; - surfaceHeight = h; - } - - @Override - public void onDrawFrame(GL10 unused) { - Log.d("Hello", "onDrawFrame"); - - if (halideDirectRender) { - // Call Halide filter; 0 as the texture ID in this case - // indicates render to framebuffer. - processTextureHalide(0, surfaceWidth, surfaceHeight); - } else { - // Call Halide filter - processTextureHalide(output, surfaceWidth, surfaceHeight); - - // Draw result to screen - gl.glViewport(0, 0, surfaceWidth, surfaceHeight); - - gl.glUseProgram(program); - - int positionLoc = gl.glGetAttribLocation(program, "position"); - quad_vertices.position(0); - gl.glVertexAttribPointer(positionLoc, 2, gl.GL_FLOAT, false, 0, quad_vertices); - gl.glEnableVertexAttribArray(positionLoc); - - int texLoc = gl.glGetUniformLocation(program, "tex"); - gl.glUniform1i(texLoc, 0); - gl.glActiveTexture(gl.GL_TEXTURE0); - gl.glBindTexture(gl.GL_TEXTURE_2D, output); - - gl.glDrawArrays(gl.GL_TRIANGLE_STRIP, 0, 4); - - gl.glDisableVertexAttribArray(positionLoc); - gl.glBindTexture(gl.GL_TEXTURE_2D, 0); - gl.glUseProgram(0); - gl.glDisableVertexAttribArray(0); - } - } - } -} - -public class HelloHalideGL extends Activity { - static final String TAG = "HelloHalideGL"; - - private GLSurfaceView view; - - @Override - public void onCreate(Bundle b) { - super.onCreate(b); - view = new HalideGLView(this); - setContentView(view); - } - - @Override - public void onResume() { - super.onResume(); - view.onResume(); - } - - @Override - public void onPause() { - super.onPause(); - view.onPause(); - } -} diff --git a/apps/bgu/Makefile b/apps/bgu/Makefile index a05070a3039c..ab8f69baaae5 100644 --- a/apps/bgu/Makefile +++ b/apps/bgu/Makefile @@ -24,7 +24,7 @@ $(BIN)/%/runtime.a: $(GENERATOR_BIN)/bgu.generator $(BIN)/%/filter: filter.cpp $(BIN)/%/bgu.a $(BIN)/%/bgu_auto_schedule.a $(BIN)/%/runtime.a @mkdir -p $(@D) - $(CXX) $(CXXFLAGS) -I$(BIN)/$* -Wall -O3 $^ -o $@ $(LDFLAGS) $(IMAGE_IO_FLAGS) $(CUDA_LDFLAGS) $(OPENCL_LDFLAGS) $(OPENGL_LDFLAGS) + $(CXX) $(CXXFLAGS) -I$(BIN)/$* -Wall -O3 $^ -o $@ $(LDFLAGS) $(IMAGE_IO_FLAGS) $(CUDA_LDFLAGS) $(OPENCL_LDFLAGS) $(BIN)/%/out.png: $(BIN)/%/filter $< ../images/rgb.png $(BIN)/$*/out.png diff --git a/apps/glsl/CMakeLists.txt b/apps/glsl/CMakeLists.txt deleted file mode 100644 index e9a8a5f13765..000000000000 --- a/apps/glsl/CMakeLists.txt +++ /dev/null @@ -1,41 +0,0 @@ -if (WIN32) - # Halide OpenGL is broken on Windows. - return() -endif () - -cmake_minimum_required(VERSION 3.16) -project(glsl) - -enable_testing() - -# Set up language settings -set(CMAKE_CXX_STANDARD 11) -set(CMAKE_CXX_STANDARD_REQUIRED YES) -set(CMAKE_CXX_EXTENSIONS NO) - -# Find Halide -find_package(Halide REQUIRED) - -find_package(OpenGL REQUIRED) -set(opengl_features opengl) -if (TARGET OpenGL::OpenGL AND TARGET OpenGL::EGL) - # EGL requires GLVND (which is found iff ::OpenGL is present) - list(APPEND opengl_features egl) -endif () - -# Generators -add_executable(glsl_blur.generator halide_blur_glsl_generator.cpp) -target_link_libraries(glsl_blur.generator PRIVATE Halide::Generator) - -add_executable(ycc.generator halide_ycc_glsl_generator.cpp) -target_link_libraries(ycc.generator PRIVATE Halide::Generator) - -# Libraries -add_halide_library(halide_blur_glsl FROM glsl_blur.generator FEATURES ${opengl_features} debug) -add_halide_library(halide_ycc_glsl FROM ycc.generator FEATURES ${opengl_features} debug) - -# Final executable -add_executable(opengl_test opengl_test.cpp) -target_link_libraries(opengl_test PRIVATE halide_blur_glsl halide_ycc_glsl) - -add_test(NAME opengl_test COMMAND opengl_test) diff --git a/apps/glsl/Makefile b/apps/glsl/Makefile deleted file mode 100644 index dc12d94ae504..000000000000 --- a/apps/glsl/Makefile +++ /dev/null @@ -1,36 +0,0 @@ -include ../support/Makefile.inc - -# Note: using the -g flag in conjunction with the -debug Feature on OSX may -# produce "failed to insert symbol" warnings at link time; this is annoying but harmless. -CXXFLAGS += -g -O0 - -all: $(BIN)/$(HL_TARGET)/opengl_test - -$(GENERATOR_BIN)/halide_blur_glsl.generator: halide_blur_glsl_generator.cpp $(GENERATOR_DEPS) - @mkdir -p $(@D) - $(CXX) $(CXXFLAGS) $(filter %.cpp,$^) -o $@ $(LIBHALIDE_LDFLAGS) - -$(BIN)/%/halide_blur_glsl.a: $(GENERATOR_BIN)/halide_blur_glsl.generator - @mkdir -p $(@D) - $^ -g halide_blur_glsl -e $(GENERATOR_OUTPUTS) -o $(@D) target=$*-opengl-debug - -$(GENERATOR_BIN)/halide_ycc_glsl.generator: halide_ycc_glsl_generator.cpp $(GENERATOR_DEPS) - @mkdir -p $(@D) - $(CXX) $(CXXFLAGS) $(filter %.cpp,$^) -o $@ $(LIBHALIDE_LDFLAGS) - -$(BIN)/%/halide_ycc_glsl.a: $(GENERATOR_BIN)/halide_ycc_glsl.generator - @mkdir -p $(@D) - $^ -g halide_ycc_glsl -e $(GENERATOR_OUTPUTS) -o $(@D) target=$*-opengl-debug - -$(BIN)/%/opengl_test: opengl_test.cpp $(BIN)/%/halide_blur_glsl.a $(BIN)/%/halide_ycc_glsl.a - @mkdir -p $(@D) - $(CXX) $(CXXFLAGS) -I$(BIN)/$* $^ -o $@ $(LDFLAGS) -L$(TOP)/bin $(PLATFORM_OPENGL_LDFLAGS) - -run: $(BIN)/$(HL_TARGET)/opengl_test - LD_LIBRARY_PATH=../../bin $< - -test: run - -.PHONY: clean -clean: - rm -rf $(BIN) diff --git a/apps/glsl/halide_blur_glsl_generator.cpp b/apps/glsl/halide_blur_glsl_generator.cpp deleted file mode 100644 index 1d9a2eae47dc..000000000000 --- a/apps/glsl/halide_blur_glsl_generator.cpp +++ /dev/null @@ -1,33 +0,0 @@ -#include "Halide.h" - -namespace { - -class HalideBlurGLSL : public Halide::Generator { -public: - Input> input8{"input8", 3}; - Output> blur_filter{"blur_filter", 3}; - void generate() { - assert(get_target().has_feature(Target::OpenGL)); - - Func blur_x("blur_x"), blur_y("blur_y"); - Var x("x"), y("y"), c("c"); - - // The algorithm - Func input; - input(x, y, c) = cast(input8(clamp(x, input8.dim(0).min(), input8.dim(0).max()), - clamp(y, input8.dim(1).min(), input8.dim(1).max()), c)) / - 255.f; - blur_x(x, y, c) = (input(x, y, c) + input(x + 1, y, c) + input(x + 2, y, c)) / 3; - blur_y(x, y, c) = (blur_x(x, y, c) + blur_x(x, y + 1, c) + blur_x(x, y + 2, c)) / 3; - blur_filter(x, y, c) = cast(blur_y(x, y, c) * 255.f); - - // Schedule for GLSL - input8.dim(2).set_bounds(0, 3); - blur_filter.bound(c, 0, 3); - blur_filter.glsl(x, y, c); - } -}; - -} // namespace - -HALIDE_REGISTER_GENERATOR(HalideBlurGLSL, halide_blur_glsl) diff --git a/apps/glsl/halide_ycc_glsl_generator.cpp b/apps/glsl/halide_ycc_glsl_generator.cpp deleted file mode 100644 index 47c05e8f4f3a..000000000000 --- a/apps/glsl/halide_ycc_glsl_generator.cpp +++ /dev/null @@ -1,39 +0,0 @@ -#include "Halide.h" - -namespace { - -class RgbToYcc : public Halide::Generator { -public: - Input> input8{"input8", 3}; - Output> out{"out", 3}; - void generate() { - assert(get_target().has_feature(Target::OpenGL)); - Var x("x"), y("y"), c("c"); - - // The algorithm - Func input("input"); - input(x, y, c) = cast(input8(x, y, c)) / 255.0f; - - Func Y("Y"), Cb("Cb"), Cr("Cr"); - Y(x, y) = 16.f / 255.f + (0.257f * input(x, y, 0) + - 0.504f * input(x, y, 1) + - 0.098f * input(x, y, 2)); - Cb(x, y) = 128.f / 255.f + (0.439f * input(x, y, 0) + - -0.368f * input(x, y, 1) + - -0.071f * input(x, y, 2)); - Cr(x, y) = 128.f / 255.f + (-0.148f * input(x, y, 0) + - -0.291f * input(x, y, 1) + - 0.439f * input(x, y, 2)); - out(x, y, c) = cast( - mux(c, {Y(x, y), Cb(x, y), Cr(x, y), 0.0f}) * 255.f); - - // Schedule for GLSL - input8.dim(2).set_bounds(0, 3); - out.bound(c, 0, 3); - out.glsl(x, y, c); - } -}; - -} // namespace - -HALIDE_REGISTER_GENERATOR(RgbToYcc, halide_ycc_glsl) diff --git a/apps/glsl/opengl_test.cpp b/apps/glsl/opengl_test.cpp deleted file mode 100644 index 161805887daa..000000000000 --- a/apps/glsl/opengl_test.cpp +++ /dev/null @@ -1,58 +0,0 @@ -#include -#include -#include - -#include "HalideBuffer.h" -#include "HalideRuntime.h" -#include "HalideRuntimeOpenGL.h" - -using Halide::Runtime::Buffer; - -#include "halide_blur_glsl.h" -#include "halide_ycc_glsl.h" - -void test_blur() { - const int W = 12, H = 32, C = 3; - Buffer input(W, H, C); - Buffer output(W, H, C); - - fprintf(stderr, "test_blur\n"); - halide_blur_glsl(input, output); - fprintf(stderr, "test_blur complete\n"); -} - -void test_ycc() { - const int W = 12, H = 32, C = 3; - Buffer input(W, H, C); - Buffer output(W, H, C); - - fprintf(stderr, "test_ycc\n"); - halide_ycc_glsl(input, output); - fprintf(stderr, "Ycc complete\n"); -} - -void test_device_sync() { - const int W = 12, H = 32, C = 3; - Buffer temp(W, H, C); - - temp.set_host_dirty(); - int result = temp.copy_to_device(halide_opengl_device_interface()); - if (result != 0) { - fprintf(stderr, "halide_device_malloc failed with return %d.\n", result); - abort(); - } else { - result = temp.device_sync(); - if (result != 0) { - fprintf(stderr, "halide_device_sync failed with return %d.\n", result); - abort(); - } else { - fprintf(stderr, "Test device sync complete.\n"); - } - } -} - -int main(int argc, char *argv[]) { - test_blur(); - test_ycc(); - test_device_sync(); -} diff --git a/apps/harris/Makefile b/apps/harris/Makefile index 3fef49815a1c..713c11d0c2c7 100644 --- a/apps/harris/Makefile +++ b/apps/harris/Makefile @@ -22,7 +22,7 @@ $(BIN)/%/runtime.a: $(GENERATOR_BIN)/harris.generator $(BIN)/%/filter: filter.cpp $(BIN)/%/harris.a $(BIN)/%/harris_auto_schedule.a $(BIN)/%/runtime.a @mkdir -p $(@D) - $(CXX) $(CXXFLAGS) -I$(BIN)/$* -Wall -O3 $^ -o $@ $(LDFLAGS) $(IMAGE_IO_FLAGS) $(CUDA_LDFLAGS) $(OPENCL_LDFLAGS) $(OPENGL_LDFLAGS) + $(CXX) $(CXXFLAGS) -I$(BIN)/$* -Wall -O3 $^ -o $@ $(LDFLAGS) $(IMAGE_IO_FLAGS) $(CUDA_LDFLAGS) $(OPENCL_LDFLAGS) $(BIN)/%/out.png: $(BIN)/%/filter $< ../images/rgba.png $(BIN)/$*/out.png diff --git a/apps/hist/Makefile b/apps/hist/Makefile index 8ab3e5785407..5f4faa1b835a 100644 --- a/apps/hist/Makefile +++ b/apps/hist/Makefile @@ -24,7 +24,7 @@ $(BIN)/%/runtime.a: $(GENERATOR_BIN)/hist.generator $(BIN)/%/filter: filter.cpp $(BIN)/%/hist.a $(BIN)/%/hist_auto_schedule.a $(BIN)/%/runtime.a @mkdir -p $(@D) - $(CXX) $(CXXFLAGS) -I$(BIN)/$* -Wall -O3 $^ -o $@ $(LDFLAGS) $(IMAGE_IO_FLAGS) $(CUDA_LDFLAGS) $(OPENCL_LDFLAGS) $(OPENGL_LDFLAGS) + $(CXX) $(CXXFLAGS) -I$(BIN)/$* -Wall -O3 $^ -o $@ $(LDFLAGS) $(IMAGE_IO_FLAGS) $(CUDA_LDFLAGS) $(OPENCL_LDFLAGS) $(BIN)/%/out.png: $(BIN)/%/filter $< ../images/rgba.png $(BIN)/$*/out.png diff --git a/apps/iir_blur/Makefile b/apps/iir_blur/Makefile index d195ffa7caf5..8c9983c8fa14 100644 --- a/apps/iir_blur/Makefile +++ b/apps/iir_blur/Makefile @@ -22,7 +22,7 @@ $(BIN)/%/runtime.a: $(GENERATOR_BIN)/iir_blur.generator $(BIN)/%/filter: filter.cpp $(BIN)/%/iir_blur.a $(BIN)/%/iir_blur_auto_schedule.a $(BIN)/%/runtime.a @mkdir -p $(@D) - $(CXX) $(CXXFLAGS) -I$(BIN)/$* -Wall -O3 $^ -o $@ $(LDFLAGS) $(IMAGE_IO_FLAGS) $(CUDA_LDFLAGS) $(OPENCL_LDFLAGS) $(OPENGL_LDFLAGS) + $(CXX) $(CXXFLAGS) -I$(BIN)/$* -Wall -O3 $^ -o $@ $(LDFLAGS) $(IMAGE_IO_FLAGS) $(CUDA_LDFLAGS) $(OPENCL_LDFLAGS) $(BIN)/%/out.png: $(BIN)/%/filter $< ../images/rgba.png $(BIN)/$*/out.png diff --git a/apps/interpolate/Makefile b/apps/interpolate/Makefile index fc5d8d7609f1..9d0d5c41f434 100644 --- a/apps/interpolate/Makefile +++ b/apps/interpolate/Makefile @@ -24,7 +24,7 @@ $(BIN)/%/runtime.a: $(GENERATOR_BIN)/interpolate.generator $(BIN)/%/filter: filter.cpp $(BIN)/%/interpolate.a $(BIN)/%/interpolate_auto_schedule.a $(BIN)/%/runtime.a @mkdir -p $(@D) - $(CXX) $(CXXFLAGS) -I$(BIN)/$* -Wall -O3 $^ -o $@ $(LDFLAGS) $(IMAGE_IO_FLAGS) $(CUDA_LDFLAGS) $(OPENCL_LDFLAGS) $(OPENGL_LDFLAGS) + $(CXX) $(CXXFLAGS) -I$(BIN)/$* -Wall -O3 $^ -o $@ $(LDFLAGS) $(IMAGE_IO_FLAGS) $(CUDA_LDFLAGS) $(OPENCL_LDFLAGS) $(BIN)/%/out.png: $(BIN)/%/filter $< ../images/rgba.png $(BIN)/$*/out.png diff --git a/apps/lens_blur/Makefile b/apps/lens_blur/Makefile index ea403c1e5bcf..8ede6b797ffe 100644 --- a/apps/lens_blur/Makefile +++ b/apps/lens_blur/Makefile @@ -19,7 +19,7 @@ $(BIN)/%/lens_blur_auto_schedule.a: $(GENERATOR_BIN)/lens_blur.generator $(BIN)/%/process: process.cpp $(BIN)/%/lens_blur.a $(BIN)/%/lens_blur_auto_schedule.a @mkdir -p $(@D) - $(CXX) $(CXXFLAGS) -I$(BIN)/$* -Wall $^ -o $@ $(LDFLAGS) $(IMAGE_IO_FLAGS) $(CUDA_LDFLAGS) $(OPENCL_LDFLAGS) $(OPENGL_LDFLAGS) + $(CXX) $(CXXFLAGS) -I$(BIN)/$* -Wall $^ -o $@ $(LDFLAGS) $(IMAGE_IO_FLAGS) $(CUDA_LDFLAGS) $(OPENCL_LDFLAGS) $(BIN)/%/out.png: $(BIN)/%/process @mkdir -p $(@D) diff --git a/apps/local_laplacian/Makefile b/apps/local_laplacian/Makefile index e9a9b69fe53f..21fa7bf74f6b 100644 --- a/apps/local_laplacian/Makefile +++ b/apps/local_laplacian/Makefile @@ -18,7 +18,7 @@ $(BIN)/%/local_laplacian_auto_schedule.a: $(GENERATOR_BIN)/local_laplacian.gener $(BIN)/%/process: process.cpp $(BIN)/%/local_laplacian.a $(BIN)/%/local_laplacian_auto_schedule.a @mkdir -p $(@D) - $(CXX) $(CXXFLAGS) -I$(BIN)/$* -Wall $^ -o $@ $(LDFLAGS) $(IMAGE_IO_FLAGS) $(CUDA_LDFLAGS) $(OPENCL_LDFLAGS) $(OPENGL_LDFLAGS) + $(CXX) $(CXXFLAGS) -I$(BIN)/$* -Wall $^ -o $@ $(LDFLAGS) $(IMAGE_IO_FLAGS) $(CUDA_LDFLAGS) $(OPENCL_LDFLAGS) $(BIN)/%/out.png: $(BIN)/%/process @mkdir -p $(@D) @@ -30,7 +30,7 @@ $(BIN)/%/out.tiff: $(BIN)/%/process $(BIN)/%/process_viz: process.cpp $(BIN)/%-trace_all/local_laplacian.a @mkdir -p $(@D) - $(CXX) $(CXXFLAGS) -DNO_AUTO_SCHEDULE -I$(BIN)/$*-trace_all -Wall $^ -o $@ $(LDFLAGS) $(IMAGE_IO_FLAGS) $(CUDA_LDFLAGS) $(OPENCL_LDFLAGS) $(OPENGL_LDFLAGS) + $(CXX) $(CXXFLAGS) -DNO_AUTO_SCHEDULE -I$(BIN)/$*-trace_all -Wall $^ -o $@ $(LDFLAGS) $(IMAGE_IO_FLAGS) $(CUDA_LDFLAGS) $(OPENCL_LDFLAGS) ../../bin/HalideTraceViz: ../../util/HalideTraceViz.cpp $(MAKE) -C ../../ bin/HalideTraceViz diff --git a/apps/max_filter/Makefile b/apps/max_filter/Makefile index 6fcd9a59748c..bd755774b2f5 100644 --- a/apps/max_filter/Makefile +++ b/apps/max_filter/Makefile @@ -24,7 +24,7 @@ $(BIN)/%/runtime.a: $(GENERATOR_BIN)/max_filter.generator $(BIN)/%/filter: filter.cpp $(BIN)/%/max_filter.a $(BIN)/%/max_filter_auto_schedule.a $(BIN)/%/runtime.a @mkdir -p $(@D) - $(CXX) $(CXXFLAGS) -I$(BIN)/$* -Wall -O3 $^ -o $@ $(LDFLAGS) $(IMAGE_IO_FLAGS) $(CUDA_LDFLAGS) $(OPENCL_LDFLAGS) $(OPENGL_LDFLAGS) + $(CXX) $(CXXFLAGS) -I$(BIN)/$* -Wall -O3 $^ -o $@ $(LDFLAGS) $(IMAGE_IO_FLAGS) $(CUDA_LDFLAGS) $(OPENCL_LDFLAGS) $(BIN)/%/out.png: $(BIN)/%/filter $< ../images/rgba.png $(BIN)/$*/out.png diff --git a/apps/nl_means/Makefile b/apps/nl_means/Makefile index 13c9290cd3a2..2c7fecdccc47 100644 --- a/apps/nl_means/Makefile +++ b/apps/nl_means/Makefile @@ -18,7 +18,7 @@ $(BIN)/%/nl_means_auto_schedule.a: $(GENERATOR_BIN)/nl_means.generator $(BIN)/%/process: process.cpp $(BIN)/%/nl_means.a $(BIN)/%/nl_means_auto_schedule.a @mkdir -p $(@D) - $(CXX) $(CXXFLAGS) -I$(BIN)/$* -Wall $^ -o $@ $(LDFLAGS) $(IMAGE_IO_FLAGS) $(CUDA_LDFLAGS) $(OPENCL_LDFLAGS) $(OPENGL_LDFLAGS) + $(CXX) $(CXXFLAGS) -I$(BIN)/$* -Wall $^ -o $@ $(LDFLAGS) $(IMAGE_IO_FLAGS) $(CUDA_LDFLAGS) $(OPENCL_LDFLAGS) $(BIN)/%/out.png: $(BIN)/%/process @mkdir -p $(@D) diff --git a/apps/opengl_demo/Makefile b/apps/opengl_demo/Makefile deleted file mode 100644 index 5d6bc2bb23e7..000000000000 --- a/apps/opengl_demo/Makefile +++ /dev/null @@ -1,102 +0,0 @@ -# -# This could be more DRY using some Makefile magic, but for the example -# app will try to maximize clarity by making most rules explicit -# - -# Where to find Halide. -# -# If you are building this demo using Halide installed systemwide (e.g. on -# OS X installed via homebrew), you can set: -# -# HALIDE_TOOLS_DIR = /usr/local/share/halide/tools -# HALIDE_LIB_PATH = -# HALIDE_INC_PATH = -# -# These settings are for building within the Halide source tree: -HALIDE_TOOLS_DIR = ../../tools -HALIDE_LIB_PATH = -L ../../bin -HALIDE_INC_PATH = -I ../../include -HL_TARGET ?= host - -# Platform-specific settings. -# -UNAME = $(shell uname) - -ifeq ($(UNAME),Darwin) - - # These are for OS X: - DTX_FONT = /Library/Fonts/Arial.ttf - OPENGL_LIBS = -lglfw -framework OpenGL -framework GLUT - GENERATOR_LIBS = -lHalide -lz -lcurses - -else - - # These are for Ubuntu Linux - DTX_FONT = /usr/share/fonts/truetype/dejavu/DejaVuSans.ttf - OPENGL_LIBS = `pkg-config glfw3 --libs` -lGL -lglut -lX11 -lpthread -ldl -lXxf86vm -lXinerama -lXcursor -lXrandr - GENERATOR_LIBS = -lHalide -lz -lcurses -Wl,--rpath=$(HALIDE_LIB_PATH) - -endif - -# -# General build settings. Should be good cross-platform. -# -MAIN_LIBS = -lpng -ldrawtext $(OPENGL_LIBS) -GENERATOR_LIBS = -lHalide -lz -lcurses -CXXFLAGS = -std=c++11 -g -DDTX_FONT=\"$(DTX_FONT)\" $(HALIDE_INC_PATH) - -# Output directory. -BIN ?= bin - -.PHONY: run clean - -default: run - -run: $(BIN)/opengl_demo - $(BIN)/opengl_demo image.png - -clean: - rm -rf $(BIN) - -$(BIN)/opengl_demo: \ - $(BIN)/main.o \ - $(BIN)/layout.o \ - $(BIN)/timer.o \ - $(BIN)/glfw_helpers.o \ - $(BIN)/opengl_helpers.o \ - $(BIN)/png_helpers.o \ - $(BIN)/sample_filter_cpu.o \ - $(BIN)/sample_filter_opengl.o - $(CXX) $(CXXFLAGS) -o $@ $^ $(MAIN_LIBS) - -# -# Explicitly list the dependency on the generated filter header files, -# to ensure that they are created first. -# -$(BIN)/main.o: \ - $(BIN)/sample_filter_cpu.h \ - $(BIN)/sample_filter_opengl.h - -# -# Rules to AOT-compile the halide filter for both CPU and OpenGL; the -# compiled filters depend on $(BIN)/sample_filter.generator, which in turn -# depends on the halide filter source in sample_filter.cpp -# -$(BIN)/sample_filter_cpu.o $(BIN)/sample_filter_cpu.h: $(BIN)/sample_filter.generator - LD_LIBRARY_PATH=../../bin $(BIN)/sample_filter.generator -g sample_filter -e object,c_header,stmt -o $(BIN) -f sample_filter_cpu target=$(HL_TARGET) - -$(BIN)/sample_filter_opengl.o $(BIN)/sample_filter_opengl.h: $(BIN)/sample_filter.generator - LD_LIBRARY_PATH=../../bin $(BIN)/sample_filter.generator -g sample_filter -e object,c_header,stmt -o $(BIN) -f sample_filter_opengl target=host-opengl-debug - -$(BIN)/sample_filter.generator: sample_filter_generator.cpp - @mkdir -p $(@D) - $(CXX) $(CXXFLAGS) -o $@ $^ $(HALIDE_TOOLS_DIR)/GenGen.cpp $(HALIDE_LIB_PATH) $(GENERATOR_LIBS) $(HALIDE_SYSTEM_LIBS) - -# -# Build in subdir using auto-dependency mechanism -# -$(BIN)/%.o: %.cpp - @mkdir -p $(@D) - $(CXX) -c $(CXXFLAGS) -I$(BIN) -MMD -MF $(patsubst %.o,%.d,$@) -o $@ $< - --include $(wildcard $(BIN)/*.d) diff --git a/apps/opengl_demo/README.md b/apps/opengl_demo/README.md deleted file mode 100644 index d2e7e8c91b88..000000000000 --- a/apps/opengl_demo/README.md +++ /dev/null @@ -1,104 +0,0 @@ -# Halide OpenGL Demo - -This demo contains an OpenGL desktop app that displays an input image side by -side with the result of running a sample halide filter in three different ways: - -1. On the CPU, not using OpenGL. - -2. In OpenGL, with Halide transfering the input data from the host and - transferring the result data back to the host. - -3. In OpenGL, with Halide accepting input data that's in an OpenGL texture, and - leaving the result in an OpenGL texture. - -The display reports the timing for each. You should expect to see that #3 is -fastest as it runs entirely on the GPU, while #2 is slowest because of the data -transfer times. - -In this example we use AOT compilation twice: Once with `target=host` to produce -the filter that runs on the CPU; and once with `target=host-opengl` to produce -the filter that runs in OpenGL (which we call twice). - -The sample filter inverts the RGB channels of the input image. - -_This demo is known to work on OS X 10.11 and Ubuntu Linux 14.04 & 16.04. -Windows has not yet been tested._ - -### Instructions: - -Build and run the app by simply running `make`. It should open a window showing -the input and the three (identical) filtering results. You can close the window -and exit by pressing ESCAPE. - -The `Makefile` has variables to specify where to find Halide, how to link -OpenGL, and so forth. You may need to tweak them for your platform. - -See the Makefile for details on how the filter gets AOT-compiled for CPU and -OpenGL. Note that the `Makefile` actually specifies `target=host-opengl-debug` -when AOT-compiling the opengl filter; that enables tracing of Halide's -management of its OpenGL pipeline. - -#### Dependencies: - -This app depends on: - -- [GLFW 3](http://www.glfw.org) -- [libpng](http://www.libpng.org) -- [libdrawtext](http://nuclear.mutantstargoat.com/sw/libdrawtext/) - -On OS X, all three can be installed using [homebrew](http://brew.sh) - -```sh -brew install glfw -brew install libpng -brew install libdrawtext -``` - -Halide itself can be installed on OS X via - -```sh -brew tap halide/halide -brew install halide -``` - -On Ubuntu Linux, everything but libdrawtext can be installed via system -packages: - -```sh -sudo apt-get install libglfw3-dev libx11-dev freeglut3-dev libfreetype6-dev libgl-dev libpng-dev -``` - -For libdrawtext, try this: - -``` -git clone https://github.com/jtsiomb/libdrawtext.git -cd libdrawtext -./configure -make -sudo make install -``` - -### Files: - -- `sample_filter.cpp` - - The Halide filter generator source. - -- `main.cpp` - - Contains all the Halide client code. - - Note that it `#include`s the generated files `build/sample_filter_cpu.h` and - `build/sample_filter_opengl.h`. - -- `layout.{h,cpp}` - - A minimal rendering framework for this example app. - -- `timer.{h,cpp}` - - A minimal timing & reporting library. - -- `{glfw,opengl,png}_helpers.{cpp,h}` - - Conveniences that hide the dirty details of the low-level packages. diff --git a/apps/opengl_demo/glfw_helpers.cpp b/apps/opengl_demo/glfw_helpers.cpp deleted file mode 100644 index 07752597e42d..000000000000 --- a/apps/opengl_demo/glfw_helpers.cpp +++ /dev/null @@ -1,63 +0,0 @@ -#include "glfw_helpers.h" -#include -#include -#include -#include - -using namespace GlfwHelpers; - -static GLFWwindow *window; - -static void die(const char *msg) { - fprintf(stderr, "%s\n", msg); - exit(EXIT_FAILURE); -} - -static void error_callback(int error, const char *description) { - die(description); -} - -static void key_callback(GLFWwindow *window, int key, int scancode, int action, int mods) { - if (key == GLFW_KEY_ESCAPE && action == GLFW_PRESS) - glfwSetWindowShouldClose(window, GL_TRUE); -} - -static bool first_focus = false; -static void focus_callback(GLFWwindow *window, int) { - first_focus = true; -} - -struct info GlfwHelpers::setup(int width, int height) { - struct info info; - - glfwSetErrorCallback(error_callback); - if (!glfwInit()) die("couldn't init glfw!"); - glfwWindowHint(GLFW_DOUBLEBUFFER, GL_FALSE); // Single buffer mode, to avoid any doublebuffering timing issues - window = glfwCreateWindow(width, height, "opengl_halide_test", NULL, NULL); - if (!window) die("couldn't create window!"); - glfwSetKeyCallback(window, key_callback); - glfwSetWindowFocusCallback(window, focus_callback); - glfwMakeContextCurrent(window); - - while (!first_focus) { - glfwWaitEvents(); - } - - int framebuffer_width, framebuffer_height; - glfwGetFramebufferSize(window, &framebuffer_width, &framebuffer_height); - info.dpi_scale = float(framebuffer_width) / float(width); - - return info; -} - -void GlfwHelpers::terminate() { - while (!glfwWindowShouldClose(window)) { - glfwPollEvents(); - } - glfwDestroyWindow(window); - glfwTerminate(); -} - -void GlfwHelpers::set_opengl_context() { - glfwMakeContextCurrent(window); -} diff --git a/apps/opengl_demo/glfw_helpers.h b/apps/opengl_demo/glfw_helpers.h deleted file mode 100644 index cd3a0f05bdcf..000000000000 --- a/apps/opengl_demo/glfw_helpers.h +++ /dev/null @@ -1,15 +0,0 @@ -#ifndef _GLFW_HELPERS_H_ -#define _GLFW_HELPERS_H_ - -namespace GlfwHelpers { - -struct info { - float dpi_scale; -}; - -struct info setup(int width, int height); -void set_opengl_context(); -void terminate(); -} // namespace GlfwHelpers - -#endif diff --git a/apps/opengl_demo/image.png b/apps/opengl_demo/image.png deleted file mode 100644 index c73df2103613..000000000000 Binary files a/apps/opengl_demo/image.png and /dev/null differ diff --git a/apps/opengl_demo/layout.cpp b/apps/opengl_demo/layout.cpp deleted file mode 100644 index be313c068814..000000000000 --- a/apps/opengl_demo/layout.cpp +++ /dev/null @@ -1,59 +0,0 @@ -#include - -#include "opengl_helpers.h" - -#include "layout.h" - -using namespace Layout; - -static const int border_sz = 10; // pixels -static const int header_sz = 20; // pixels - -static struct info state; - -const struct info &Layout::setup(int image_width, int image_height) { - state.window_width = 2 * image_width + 3 * border_sz; - state.window_height = 2 * image_height + border_sz + 2 * header_sz; - return state; -} - -void Layout::draw_texture(enum location location, GLuint texture_id, int width, int height, const std::string &label) { - int x0, x1, y0, y1, lx, ly; - switch (location) { // set X coords - case LL: - case UL: - x0 = border_sz; - x1 = x0 + width; - lx = x0 + 2; - break; - case LR: - case UR: - x1 = state.window_width - border_sz; - x0 = x1 - width; - lx = x0 + 2; - break; - } - switch (location) { // set Y coords - case LL: - case LR: - y0 = header_sz; - y1 = y0 + height; - ly = 6; - break; - case UL: - case UR: - y1 = state.window_height - header_sz; - y0 = y1 - height; - ly = y1 + 6; - break; - } - - OpenGLHelpers::display_texture(texture_id, 2.0 * x0 / state.window_width - 1.0, 2.0 * x1 / state.window_width - 1.0, 2.0 * y0 / state.window_height - 1.0, 2.0 * y1 / state.window_height - 1.0); - OpenGLHelpers::draw_text(label, 2.0 * lx / state.window_width - 1.0, 2.0 * ly / state.window_height - 1.0); -} - -void Layout::draw_image(enum location location, const uint8_t *data, int width, int height, const std::string &label) { - const auto texture_id = OpenGLHelpers::create_texture(width, height, data); - draw_texture(location, texture_id, width, height, label); - OpenGLHelpers::delete_texture(texture_id); -} diff --git a/apps/opengl_demo/layout.h b/apps/opengl_demo/layout.h deleted file mode 100644 index be4947abc122..000000000000 --- a/apps/opengl_demo/layout.h +++ /dev/null @@ -1,28 +0,0 @@ -#ifndef _LAYOUT_HELPERS_H_ -#define _LAYOUT_HELPERS_H_ - -#if defined(__APPLE__) -#include -#else -#include -#endif - -namespace Layout { - -enum location { UL, - UR, - LL, - LR }; - -struct info { - int window_width; - int window_height; -}; - -const struct info &setup(int image_width, int image_height); - -void draw_image(enum location location, const uint8_t *data, int width, int height, const std::string &label); -void draw_texture(enum location location, GLuint texture_id, int width, int height, const std::string &label); -} // namespace Layout - -#endif diff --git a/apps/opengl_demo/main.cpp b/apps/opengl_demo/main.cpp deleted file mode 100644 index 955f21ac812c..000000000000 --- a/apps/opengl_demo/main.cpp +++ /dev/null @@ -1,172 +0,0 @@ -#include -#include - -#include "glfw_helpers.h" -#include "layout.h" -#include "opengl_helpers.h" -#include "png_helpers.h" -#include "timer.h" - -#include "sample_filter_cpu.h" -#include "sample_filter_opengl.h" -#include -#include - -/* - * Initializes a halide_buffer_t object for 8-bit RGBA data stored - * interleaved as rgbargba... in row-major order. - */ -Halide::Runtime::Buffer create_buffer(uint8_t *data, int width, int height) { - return Halide::Runtime::Buffer::make_interleaved(data, width, height, 4); -} - -/* - * Runs the filter on the CPU. Takes a pointer to memory with the image - * data to filter, and a pointer to memory in which to place the result - * data. - */ -std::string run_cpu_filter(const uint8_t *image_data, uint8_t *result_data, int width, int height) { - const auto time = Timer::start("CPU"); - - // Create halide input buffer and point it at the passed image data - auto input_buf = create_buffer((uint8_t *)image_data, width, height); - - // Create halide output buffer and point it at the passed result data storage - auto output_buf = create_buffer(result_data, width, height); - - // Run the AOT-compiled OpenGL filter - sample_filter_cpu(input_buf, output_buf); - - return Timer::report(time); -} - -/* - * Runs the filter on OpenGL. Takes a pointer to memory with the image - * data to filter, and a pointer to memory in which to place the result - * data. - */ -std::string run_opengl_filter_from_host_to_host(const uint8_t *image_data, uint8_t *result_data, int width, int height) { - const auto time = Timer::start("OpenGL host-to-host"); - - // Create halide input buffer and point it at the passed image data for - // the host memory. Halide will automatically allocate a texture to - // hold the data on the GPU. Mark the host memory as "dirty" so halide - // will know it needs to transfer the data to the GPU texture. - auto input_buf = create_buffer((uint8_t *)image_data, width, height); - input_buf.set_host_dirty(); - - // Create halide output buffer and point it at the passed result data - // memory. Halide will automatically allocate a texture to hold the - // data on the GPU. - auto output_buf = create_buffer(result_data, width, height); - - // Run the AOT-compiled OpenGL filter - sample_filter_opengl(input_buf, output_buf); - - // Ensure that halide copies the data back to the host - output_buf.copy_to_host(); - - return Timer::report(time); -} - -/* - * Runs the filter on OpenGL. Assumes the data is already in a texture, - * and leaves the output in a texture - */ -std::string run_opengl_filter_from_texture_to_texture(GLuint input_texture_id, GLuint output_texture_id, int width, int height) { - const auto time = Timer::start("OpenGL texture-to-texture"); - - // Create halide input buffer and tell it to use the existing GPU - // texture. No need to allocate memory on the host since this simple - // pipeline will run entirely on the GPU. - auto input_buf = create_buffer(nullptr, width, height); - halide_opengl_wrap_texture(nullptr, input_buf.raw_buffer(), input_texture_id); - - // Create halide output buffer and tell it to use the existing GPU texture. - // No need to allocate memory on the host since this simple pipeline will run - // entirely on the GPU. - auto output_buf = create_buffer(nullptr, width, height); - halide_opengl_wrap_texture(nullptr, output_buf.raw_buffer(), output_texture_id); - - // Run the AOT-compiled OpenGL filter - sample_filter_opengl(input_buf, output_buf); - - // Tell halide we are finished using the textures - halide_opengl_detach_texture(nullptr, output_buf.raw_buffer()); - halide_opengl_detach_texture(nullptr, input_buf.raw_buffer()); - - return Timer::report(time); -} - -int main(const int argc, const char *argv[]) { - if (argc != 2) { - std::cerr << "Usage: " << argv[0] << " filename\n"; - exit(1); - } - const std::string filename = argv[1]; - - const auto image = PNGHelpers::load(filename); - const auto width = image.width; - const auto height = image.height; - - const auto layout = Layout::setup(width, height); - const auto glfw = GlfwHelpers::setup(layout.window_width, layout.window_height); - OpenGLHelpers::setup(glfw.dpi_scale); - - /* - * Draw the original image - */ - Layout::draw_image(Layout::UL, image.data, width, height, "Input"); - - std::string report; - - /* - * Draw the result of running the filter on the CPU - */ - const auto cpu_result_data = (uint8_t *)calloc(width * height * 4, sizeof(uint8_t)); - report = run_cpu_filter(image.data, cpu_result_data, width, height); - Layout::draw_image(Layout::UR, cpu_result_data, width, height, report); - free((void *)cpu_result_data); - - /* - * Draw the result of running the filter on OpenGL, with data starting - * from and ending up on the host - */ - const auto opengl_result_data = (uint8_t *)calloc(width * height * 4, sizeof(uint8_t)); - report = run_opengl_filter_from_host_to_host(image.data, opengl_result_data, width, height); - Layout::draw_image(Layout::LL, opengl_result_data, width, height, report); - free((void *)opengl_result_data); - - /* - * Draw the result of running the filter on OpenGL, with data starting - * from and ending up in a texture on the device - */ - const auto image_texture_id = OpenGLHelpers::create_texture(width, height, image.data); - const auto result_texture_id = OpenGLHelpers::create_texture(width, height, nullptr); - report = run_opengl_filter_from_texture_to_texture(image_texture_id, result_texture_id, width, height); - Layout::draw_texture(Layout::LR, result_texture_id, width, height, report); - OpenGLHelpers::delete_texture(image_texture_id); - OpenGLHelpers::delete_texture(result_texture_id); - - // Release all Halide internal structures for the OpenGL context - halide_opengl_context_lost(nullptr); - - GlfwHelpers::terminate(); - - free((void *)image.data); - - return 0; -} - -/* - * Global definition required by halide with OpenGL backend, to prevent - * Halide from allocating its own OpenGL context. - * - * In general, this function needs to set an active OpenGL context - * and return 0 on success. - */ - -int halide_opengl_create_context(void * /*user_context*/) { - GlfwHelpers::set_opengl_context(); - return 0; -} diff --git a/apps/opengl_demo/opengl_helpers.cpp b/apps/opengl_demo/opengl_helpers.cpp deleted file mode 100644 index 1cf994f19879..000000000000 --- a/apps/opengl_demo/opengl_helpers.cpp +++ /dev/null @@ -1,67 +0,0 @@ -#include - -#include "opengl_helpers.h" - -using namespace OpenGLHelpers; - -static const int font_size = 12; - -void OpenGLHelpers::setup(float dpi_scale) { - const int scaled_font_size = font_size * dpi_scale; - dtx_use_font(dtx_open_font(DTX_FONT, scaled_font_size), scaled_font_size); - glClear(GL_COLOR_BUFFER_BIT); -} - -GLuint OpenGLHelpers::create_texture(int width, int height, const uint8_t *data) { - GLuint texture_id; - glEnable(GL_TEXTURE_2D); - glGenTextures(1, &texture_id); - glBindTexture(GL_TEXTURE_2D, texture_id); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); - glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, width, height, 0, GL_RGBA, GL_UNSIGNED_BYTE, data); - return texture_id; -} - -void OpenGLHelpers::delete_texture(GLuint texture_id) { - glDeleteTextures(1, &texture_id); -} - -void OpenGLHelpers::display_texture(GLuint texture_id, float x0, float x1, float y0, float y1) { - glBindTexture(GL_TEXTURE_2D, texture_id); - glMatrixMode(GL_PROJECTION); - glLoadIdentity(); - glMatrixMode(GL_MODELVIEW); - glLoadIdentity(); - glMatrixMode(GL_TEXTURE); - glLoadIdentity(); - glColor3f(1, 1, 1); - glBegin(GL_QUADS); - glTexCoord2d(1, 0); - glVertex2f(x1, y1); - glTexCoord2d(0, 0); - glVertex2f(x0, y1); - glTexCoord2d(0, 1); - glVertex2f(x0, y0); - glTexCoord2d(1, 1); - glVertex2f(x1, y0); - glEnd(); - glFinish(); -} - -void OpenGLHelpers::draw_text(const std::string &text, float x, float y) { - glMatrixMode(GL_PROJECTION); - glLoadIdentity(); - glOrtho(-1, 1, -1, 1, -1, 1); - glMatrixMode(GL_MODELVIEW); - glLoadIdentity(); - glTranslatef(x, y, 0); - glColor3f(1, 1, 1); - GLint viewport[4]; - glGetIntegerv(GL_VIEWPORT, viewport); - glScalef(2.0f / viewport[2], 2.0f / viewport[3], 1); - dtx_string(text.c_str()); - glFinish(); -} diff --git a/apps/opengl_demo/opengl_helpers.h b/apps/opengl_demo/opengl_helpers.h deleted file mode 100644 index 962f61989928..000000000000 --- a/apps/opengl_demo/opengl_helpers.h +++ /dev/null @@ -1,20 +0,0 @@ -#ifndef _OPENGL_HELPERS_H_ -#define _OPENGL_HELPERS_H_ - -#include - -#if defined(__APPLE__) -#include -#else -#include -#endif - -namespace OpenGLHelpers { -void setup(float dpi_scale); -GLuint create_texture(int width, int height, const uint8_t *data); -void delete_texture(GLuint texture_id); -void display_texture(GLuint texture_id, float x0, float x1, float y0, float y1); -void draw_text(const std::string &text, float x, float y); -} // namespace OpenGLHelpers - -#endif diff --git a/apps/opengl_demo/png_helpers.cpp b/apps/opengl_demo/png_helpers.cpp deleted file mode 100644 index c7d48f00949f..000000000000 --- a/apps/opengl_demo/png_helpers.cpp +++ /dev/null @@ -1,73 +0,0 @@ -#include -#include -#include - -#include "png_helpers.h" - -using namespace PNGHelpers; - -struct image_info PNGHelpers::load(const std::string &filepath) { - const auto fp = fopen(filepath.c_str(), "rb"); - if (fp == 0) { - perror(filepath.c_str()); - exit(1); - } - - // verify the header - png_byte header[8]; - fread(header, 1, 8, fp); - if (png_sig_cmp(header, 0, 8)) { - std::cerr << "error: " << filepath << " is not a PNG file.\n"; - exit(1); - } - - auto png = png_create_read_struct(PNG_LIBPNG_VER_STRING, NULL, NULL, NULL); - auto png_info = png_create_info_struct(png); - - if (setjmp(png_jmpbuf(png))) abort(); - - png_init_io(png, fp); - png_set_sig_bytes(png, 8); // already read header - png_read_info(png, png_info); - - const auto width = png_get_image_width(png, png_info); - const auto height = png_get_image_height(png, png_info); - const auto color_type = png_get_color_type(png, png_info); - const auto bit_depth = png_get_bit_depth(png, png_info); - - if (bit_depth == 16) - png_set_strip_16(png); - - if (color_type == PNG_COLOR_TYPE_PALETTE) - png_set_palette_to_rgb(png); - - if (color_type == PNG_COLOR_TYPE_GRAY && bit_depth < 8) - png_set_expand_gray_1_2_4_to_8(png); - - if (png_get_valid(png, png_info, PNG_INFO_tRNS)) - png_set_tRNS_to_alpha(png); - - if (color_type == PNG_COLOR_TYPE_RGB || color_type == PNG_COLOR_TYPE_GRAY || color_type == PNG_COLOR_TYPE_PALETTE) - png_set_filler(png, 0xFF, PNG_FILLER_AFTER); - - if (color_type == PNG_COLOR_TYPE_GRAY || color_type == PNG_COLOR_TYPE_GRAY_ALPHA) - png_set_gray_to_rgb(png); - - png_read_update_info(png, png_info); - - const auto rowbytes = png_get_rowbytes(png, png_info); - const auto image_data = (png_byte *)malloc(rowbytes * height * sizeof(png_byte)); - - const auto row_pointers = (png_byte **)malloc(height * sizeof(png_byte *)); - for (int i = 0; i < height; i++) { - row_pointers[i] = image_data + i * rowbytes; - } - - png_read_image(png, row_pointers); - - png_destroy_read_struct(&png, &png_info, nullptr); - free(row_pointers); - fclose(fp); - - return {width, height, image_data}; -} diff --git a/apps/opengl_demo/png_helpers.h b/apps/opengl_demo/png_helpers.h deleted file mode 100644 index da1791526e00..000000000000 --- a/apps/opengl_demo/png_helpers.h +++ /dev/null @@ -1,15 +0,0 @@ -#ifndef _PNG_HELPERS_ -#define _PNG_HELPERS_ - -namespace PNGHelpers { - -struct image_info { - unsigned int width; - unsigned int height; - const uint8_t *data; -}; - -struct image_info load(const std::string &filepath); -} // namespace PNGHelpers - -#endif diff --git a/apps/opengl_demo/sample_filter_generator.cpp b/apps/opengl_demo/sample_filter_generator.cpp deleted file mode 100644 index 4bf30eaf641b..000000000000 --- a/apps/opengl_demo/sample_filter_generator.cpp +++ /dev/null @@ -1,24 +0,0 @@ -#include "Halide.h" - -class SampleFilter : public Halide::Generator { -public: - Input> input{"input", 3}; - Output> output{"output", 3}; - - void generate() { - Var x, y, c; - - output(x, y, c) = select(c == 3, input(x, y, c), cast(255.0f - input(x, y, c))); - - input.dim(0).set_stride(4).dim(2).set_stride(1).set_bounds(0, 4); - - output.dim(0).set_stride(4).dim(2).set_stride(1); - output.bound(c, 0, 4); - - if (get_target().has_feature(Target::OpenGL)) { - output.glsl(x, y, c); - } - } -}; - -HALIDE_REGISTER_GENERATOR(SampleFilter, sample_filter) diff --git a/apps/opengl_demo/timer.cpp b/apps/opengl_demo/timer.cpp deleted file mode 100644 index 2cd243a323ab..000000000000 --- a/apps/opengl_demo/timer.cpp +++ /dev/null @@ -1,24 +0,0 @@ -#include -#include - -#include "timer.h" - -using namespace Timer; - -struct info Timer::start(const std::string &what) { - struct info info { - what - }; - std::cerr << "\n-------------- Starting " << info.what << "\n"; - info.time = std::chrono::high_resolution_clock::now(); - return info; -} - -std::string Timer::report(const struct info &info) { - const auto end_time = std::chrono::high_resolution_clock::now(); - const auto ms = std::chrono::duration(end_time - info.time).count(); - std::stringstream report; - report << info.what << ": " << ms << "ms"; - std::cerr << "-------------- Finished " << report.str() << "\n"; - return report.str(); -} diff --git a/apps/opengl_demo/timer.h b/apps/opengl_demo/timer.h deleted file mode 100644 index 596e5c78fe55..000000000000 --- a/apps/opengl_demo/timer.h +++ /dev/null @@ -1,16 +0,0 @@ -#ifndef _TIMER_H_ -#define _TIMER_H_ - -#include - -namespace Timer { -struct info { - const std::string what; - std::chrono::time_point time; -}; - -struct info start(const std::string &what); -std::string report(const struct info &); -} // namespace Timer - -#endif diff --git a/apps/resnet_50/Makefile b/apps/resnet_50/Makefile index 188620382940..3d1dd30c9ce8 100644 --- a/apps/resnet_50/Makefile +++ b/apps/resnet_50/Makefile @@ -13,7 +13,7 @@ $(BIN)/%/pytorch_weights/ok: $(GENERATOR_BIN)/resnet50.generator: Resnet50Generator.cpp $(GENERATOR_DEPS) @mkdir -p $(@D) - $(CXX) $(CXXFLAGS) -g -fno-rtti $(filter %.cpp,$^) -o $@ $(LIBHALIDE_LDFLAGS) + $(CXX) $(CXXFLAGS) -g -fno-rtti $(filter %.cpp,$^) -o $@ $(LIBHALIDE_LDFLAGS) $(BIN)/%/resnet50.a: $(GENERATOR_BIN)/resnet50.generator @mkdir -p $(@D) @@ -21,7 +21,7 @@ $(BIN)/%/resnet50.a: $(GENERATOR_BIN)/resnet50.generator $(BIN)/%/process: process.cpp $(BIN)/%/resnet50.a @mkdir -p $(@D) - $(CXX) $(CXXFLAGS) -I$(BIN)/$* -Wall $^ -o $@ $(LDFLAGS) $(IMAGE_IO_FLAGS) $(CUDA_LDFLAGS) $(OPENCL_LDFLAGS) $(OPENGL_LDFLAGS) + $(CXX) $(CXXFLAGS) -I$(BIN)/$* -Wall $^ -o $@ $(LDFLAGS) $(IMAGE_IO_FLAGS) $(CUDA_LDFLAGS) $(OPENCL_LDFLAGS) benchmark_and_validate: $(BIN)/$(HL_TARGET)/process $(BIN)/$(HL_TARGET)/pytorch_weights/ok $< 10 $* $(BIN)/$(HL_TARGET)/pytorch_weights/ $(SEED) $(BIN)/$(HL_TARGET)/res50gen_output.bin diff --git a/apps/stencil_chain/Makefile b/apps/stencil_chain/Makefile index 91750f988869..116922d03095 100644 --- a/apps/stencil_chain/Makefile +++ b/apps/stencil_chain/Makefile @@ -6,7 +6,7 @@ build: $(BIN)/$(HL_TARGET)/process $(GENERATOR_BIN)/stencil_chain.generator: stencil_chain_generator.cpp $(GENERATOR_DEPS) @mkdir -p $(@D) - $(CXX) $(CXXFLAGS) $(filter %.cpp,$^) -o $@ $(LIBHALIDE_LDFLAGS) + $(CXX) $(CXXFLAGS) $(filter %.cpp,$^) -o $@ $(LIBHALIDE_LDFLAGS) $(BIN)/%/stencil_chain.a: $(GENERATOR_BIN)/stencil_chain.generator @mkdir -p $(@D) @@ -18,7 +18,7 @@ $(BIN)/%/stencil_chain_auto_schedule.a: $(GENERATOR_BIN)/stencil_chain.generator $(BIN)/%/process: process.cpp $(BIN)/%/stencil_chain.a $(BIN)/%/stencil_chain_auto_schedule.a @mkdir -p $(@D) - $(CXX) $(CXXFLAGS) -I$(BIN)/$* -Wall $^ -o $@ $(LDFLAGS) $(IMAGE_IO_FLAGS) $(CUDA_LDFLAGS) $(OPENCL_LDFLAGS) $(OPENGL_LDFLAGS) + $(CXX) $(CXXFLAGS) -I$(BIN)/$* -Wall $^ -o $@ $(LDFLAGS) $(IMAGE_IO_FLAGS) $(CUDA_LDFLAGS) $(OPENCL_LDFLAGS) $(BIN)/%/out.png: $(BIN)/%/process @mkdir -p $(@D) diff --git a/apps/support/Makefile.inc b/apps/support/Makefile.inc index c72bf1c36735..8f647457dd9f 100644 --- a/apps/support/Makefile.inc +++ b/apps/support/Makefile.inc @@ -98,7 +98,6 @@ ANDROID_API_VERSION ?= 26 CXX-host ?= $(CXX) CXX-host-opencl ?= $(CXX) -CXX-host-opengl ?= $(CXX) CXX-host-cuda ?= $(CXX) CXX-host-metal ?= $(CXX) CXX-host-hvx_128 ?= $(CXX) @@ -111,7 +110,6 @@ CXX-arm-32-profile-android ?= $(CXX-arm-32-android) CXXFLAGS-host ?= $(CXXFLAGS) CXXFLAGS-host-opencl ?= $(CXXFLAGS) -CXXFLAGS-host-opengl ?= $(CXXFLAGS) CXXFLAGS-host-cuda ?= $(CXXFLAGS) CXXFLAGS-host-metal ?= $(CXXFLAGS) CXXFLAGS-host-hvx_128 ?= $(CXXFLAGS) @@ -121,7 +119,6 @@ CXXFLAGS-arm-32-android ?= $(CXXFLAGS) LDFLAGS-host ?= $(LDFLAGS) LDFLAGS-host-opencl ?= $(LDFLAGS) -LDFLAGS-host-opengl ?= $(LDFLAGS) LDFLAGS-host-cuda ?= $(LDFLAGS) LDFLAGS-host-metal ?= $(LDFLAGS) LDFLAGS-host-hvx_128 ?= $(LDFLAGS) @@ -185,15 +182,6 @@ IMAGE_IO_CXX_FLAGS = $(LIBPNG_CXX_FLAGS) $(LIBJPEG_CXX_FLAGS) IMAGE_IO_FLAGS = $(IMAGE_IO_LIBS) $(IMAGE_IO_CXX_FLAGS) -PLATFORM_OPENGL_LDFLAGS=-lGL -lX11 -ifeq ($(UNAME), Darwin) -PLATFORM_OPENGL_LDFLAGS=-framework OpenGL -endif - -ifneq (, $(findstring opengl,$(HL_TARGET))) - OPENGL_LDFLAGS=$(PLATFORM_OPENGL_LDFLAGS) -endif - ifneq (, $(findstring metal,$(HL_TARGET))) LDFLAGS += -framework Metal -framework Foundation endif diff --git a/apps/unsharp/Makefile b/apps/unsharp/Makefile index 1accb3c498ea..fa912ad172e1 100644 --- a/apps/unsharp/Makefile +++ b/apps/unsharp/Makefile @@ -22,7 +22,7 @@ $(BIN)/%/runtime.a: $(GENERATOR_BIN)/unsharp.generator $(BIN)/%/filter: filter.cpp $(BIN)/%/unsharp.a $(BIN)/%/unsharp_auto_schedule.a $(BIN)/%/runtime.a @mkdir -p $(@D) - $(CXX) $(CXXFLAGS) -I$(BIN)/$* -Wall -O3 $^ -o $@ $(LDFLAGS) $(IMAGE_IO_FLAGS) $(CUDA_LDFLAGS) $(OPENCL_LDFLAGS) $(OPENGL_LDFLAGS) + $(CXX) $(CXXFLAGS) -I$(BIN)/$* -Wall -O3 $^ -o $@ $(LDFLAGS) $(IMAGE_IO_FLAGS) $(CUDA_LDFLAGS) $(OPENCL_LDFLAGS) $(BIN)/%/out.png: $(BIN)/%/filter $< ../images/rgba.png $(BIN)/$*/out.png diff --git a/cmake/HalideGeneratorHelpers.cmake b/cmake/HalideGeneratorHelpers.cmake index 220f1f56ceb8..d48e02778970 100644 --- a/cmake/HalideGeneratorHelpers.cmake +++ b/cmake/HalideGeneratorHelpers.cmake @@ -342,7 +342,8 @@ function(_Halide_add_targets_to_runtime TARGET) endfunction() function(_Halide_target_link_gpu_libs TARGET VISIBILITY) - if ("${ARGN}" MATCHES "opengl") + # TODO(https://github.com/halide/Halide/issues/5633): verify that this is correct & necessary for OpenGLCompute + if ("${ARGN}" MATCHES "openglcompute") if ("${ARGN}" MATCHES "egl") find_package(OpenGL REQUIRED COMPONENTS OpenGL EGL) target_link_libraries(${TARGET} ${VISIBILITY} OpenGL::OpenGL OpenGL::EGL) diff --git a/dependencies/CMakeLists.txt b/dependencies/CMakeLists.txt index 3fc8ba95ce6e..002afd0bcd7d 100644 --- a/dependencies/CMakeLists.txt +++ b/dependencies/CMakeLists.txt @@ -6,6 +6,7 @@ set(THREADS_PREFER_PTHREAD_FLAG YES) find_package(Threads REQUIRED) set_target_properties(Threads::Threads PROPERTIES IMPORTED_GLOBAL TRUE) +# TODO(https://github.com/halide/Halide/issues/5633): verify this is still correct / necessary for OpenGLCompute find_package(OpenGL) if (TARGET OpenGL::GL) set_target_properties(OpenGL::GL PROPERTIES IMPORTED_GLOBAL TRUE) @@ -18,7 +19,7 @@ endif () ## # Third-party dependencies in their own subdirectories -## +## add_subdirectory(llvm) diff --git a/python_bindings/correctness/target.py b/python_bindings/correctness/target.py index b54fb2984969..3f8e8347b23b 100644 --- a/python_bindings/correctness/target.py +++ b/python_bindings/correctness/target.py @@ -46,10 +46,10 @@ def test_target(): # Full specification round-trip, crazy features t1 = hl.Target(hl.TargetOS.Android, hl.TargetArch.ARM, 32, [hl.TargetFeature.JIT, hl.TargetFeature.SSE41, hl.TargetFeature.AVX, hl.TargetFeature.AVX2, - hl.TargetFeature.CUDA, hl.TargetFeature.OpenCL, hl.TargetFeature.OpenGL, hl.TargetFeature.OpenGLCompute, + hl.TargetFeature.CUDA, hl.TargetFeature.OpenCL, hl.TargetFeature.OpenGLCompute, hl.TargetFeature.Debug]) ts = t1.to_string() - assert ts == "arm-32-android-avx-avx2-cuda-debug-jit-opencl-opengl-openglcompute-sse41" + assert ts == "arm-32-android-avx-avx2-cuda-debug-jit-opencl-openglcompute-sse41" assert hl.Target.validate_target_string(ts) # Expected failures: diff --git a/python_bindings/src/PyEnums.cpp b/python_bindings/src/PyEnums.cpp index c64352f73101..b47cd3e761a9 100644 --- a/python_bindings/src/PyEnums.cpp +++ b/python_bindings/src/PyEnums.cpp @@ -15,7 +15,6 @@ void define_enums(py::module &m) { .value("Default_GPU", DeviceAPI::Default_GPU) .value("CUDA", DeviceAPI::CUDA) .value("OpenCL", DeviceAPI::OpenCL) - .value("GLSL", DeviceAPI::GLSL) .value("OpenGLCompute", DeviceAPI::OpenGLCompute) .value("Metal", DeviceAPI::Metal) .value("Hexagon", DeviceAPI::Hexagon); @@ -106,7 +105,6 @@ void define_enums(py::module &m) { .value("CLDoubles", Target::Feature::CLDoubles) .value("CLHalf", Target::Feature::CLHalf) .value("CLAtomics64", Target::Feature::CLAtomics64) - .value("OpenGL", Target::Feature::OpenGL) .value("OpenGLCompute", Target::Feature::OpenGLCompute) .value("EGL", Target::Feature::EGL) .value("UserContext", Target::Feature::UserContext) diff --git a/python_bindings/src/PyFunc.cpp b/python_bindings/src/PyFunc.cpp index 4cf8eb6736e2..2bb24193a962 100644 --- a/python_bindings/src/PyFunc.cpp +++ b/python_bindings/src/PyFunc.cpp @@ -342,10 +342,6 @@ void define_func(py::module &m) { .def("bound_extent", &Func::bound_extent, py::arg("var"), py::arg("extent")) - .def("shader", &Func::shader, py::arg("x"), py::arg("y"), py::arg("c"), py::arg("device_api")) - - .def("glsl", &Func::glsl, py::arg("x"), py::arg("y"), py::arg("c")) - .def("align_storage", &Func::align_storage, py::arg("dim"), py::arg("alignment")) .def("fold_storage", &Func::fold_storage, py::arg("dim"), py::arg("extent"), py::arg("fold_forward") = true) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 29458c7db0d9..0b45adf43715 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -81,7 +81,6 @@ set(HEADER_FILES ImageParam.h InferArguments.h InjectHostDevBufferCopies.h - InjectOpenGLIntrinsics.h Inline.h InlineReductions.h IntegerDivisionTable.h @@ -163,7 +162,6 @@ set(HEADER_FILES UnsafePromises.h Util.h Var.h - VaryingAttributes.h VectorizeLoops.h WasmExecutor.h WrapCalls.h @@ -241,7 +239,6 @@ set(SOURCE_FILES ImageParam.cpp InferArguments.cpp InjectHostDevBufferCopies.cpp - InjectOpenGLIntrinsics.cpp Inline.cpp InlineReductions.cpp IntegerDivisionTable.cpp @@ -335,7 +332,6 @@ set(SOURCE_FILES UnsafePromises.cpp Util.cpp Var.cpp - VaryingAttributes.cpp VectorizeLoops.cpp WasmExecutor.cpp WrapCalls.cpp @@ -454,11 +450,6 @@ if (TARGET_OPENCL) target_compile_definitions(Halide PRIVATE WITH_OPENCL) endif () -option(TARGET_OPENGL "Include OpenGL/GLSL target" ON) -if (TARGET_OPENGL) - target_compile_definitions(Halide PRIVATE WITH_OPENGL) -endif () - option(TARGET_METAL "Include Metal target" ON) if (TARGET_METAL) target_compile_definitions(Halide PRIVATE WITH_METAL) diff --git a/src/CodeGen_C.cpp b/src/CodeGen_C.cpp index 9aac7010eee3..72cc3ef29969 100644 --- a/src/CodeGen_C.cpp +++ b/src/CodeGen_C.cpp @@ -29,7 +29,6 @@ extern "C" unsigned char halide_internal_runtime_header_HalideRuntimeHexagonHost extern "C" unsigned char halide_internal_runtime_header_HalideRuntimeMetal_h[]; extern "C" unsigned char halide_internal_runtime_header_HalideRuntimeOpenCL_h[]; extern "C" unsigned char halide_internal_runtime_header_HalideRuntimeOpenGLCompute_h[]; -extern "C" unsigned char halide_internal_runtime_header_HalideRuntimeOpenGL_h[]; extern "C" unsigned char halide_internal_runtime_header_HalideRuntimeQurt_h[]; extern "C" unsigned char halide_internal_runtime_header_HalideRuntimeD3D12Compute_h[]; @@ -418,9 +417,6 @@ CodeGen_C::~CodeGen_C() { if (target.has_feature(Target::OpenGLCompute)) { stream << halide_internal_runtime_header_HalideRuntimeOpenGLCompute_h << "\n"; } - if (target.has_feature(Target::OpenGL)) { - stream << halide_internal_runtime_header_HalideRuntimeOpenGL_h << "\n"; - } if (target.has_feature(Target::D3D12Compute)) { stream << halide_internal_runtime_header_HalideRuntimeD3D12Compute_h << "\n"; } diff --git a/src/CodeGen_GPU_Host.cpp b/src/CodeGen_GPU_Host.cpp index 488a8703bfdc..7f3d3a46fb68 100644 --- a/src/CodeGen_GPU_Host.cpp +++ b/src/CodeGen_GPU_Host.cpp @@ -22,7 +22,6 @@ #include "LLVM_Headers.h" #include "Simplify.h" #include "Util.h" -#include "VaryingAttributes.h" namespace Halide { namespace Internal { @@ -104,13 +103,9 @@ template CodeGen_GPU_Host::CodeGen_GPU_Host(const Target &target) : CodeGen_CPU(target) { // For the default GPU, the order of preferences is: Metal, - // OpenCL, CUDA, OpenGLCompute, and OpenGL last. + // OpenCL, CUDA, OpenGLCompute last. // The code is in reverse order to allow later tests to override // earlier ones. - if (target.has_feature(Target::OpenGL)) { - debug(1) << "Constructing OpenGL device codegen\n"; - cgdev[DeviceAPI::GLSL] = std::make_unique(target); - } if (target.has_feature(Target::OpenGLCompute)) { debug(1) << "Constructing OpenGL Compute device codegen\n"; cgdev[DeviceAPI::OpenGLCompute] = new_CodeGen_OpenGLCompute_Dev(target); @@ -255,77 +250,31 @@ void CodeGen_GPU_Host::visit(const For *loop) { Value *gpu_num_coords_dim0 = zero_int32; Value *gpu_num_coords_dim1 = zero_int32; - if (loop->device_api == DeviceAPI::GLSL) { - - // GL draw calls that invoke the GLSL shader are issued for pairs of - // for-loops over spatial x and y dimensions. For each for-loop we create - // one scalar vertex attribute for the spatial dimension corresponding to - // that loop, plus one scalar attribute for each expression previously - // labeled as "glsl_varying" - - // Pass variables created during setup_gpu_vertex_buffer to the - // dev run function call. - gpu_num_padded_attributes = codegen(Variable::make(Int(32), "glsl.num_padded_attributes")); - gpu_num_coords_dim0 = codegen(Variable::make(Int(32), "glsl.num_coords_dim0")); - gpu_num_coords_dim1 = codegen(Variable::make(Int(32), "glsl.num_coords_dim1")); - - // Look up the allocation for the vertex buffer and cast it to the - // right type - gpu_vertex_buffer = codegen(Variable::make(type_of(), "glsl.vertex_buffer")); - gpu_vertex_buffer = builder->CreatePointerCast(gpu_vertex_buffer, - CodeGen_LLVM::f32_t->getPointerTo()); - } - // compute a closure over the state passed into the kernel HostClosure c(loop->body, loop->name); // Determine the arguments that must be passed into the halide function vector closure_args = c.arguments(); - // Halide allows passing of scalar float and integer arguments. For - // OpenGL, pack these into vec4 uniforms and varying attributes - if (loop->device_api == DeviceAPI::GLSL) { - - int num_uniform_floats = 0; - - // The spatial x and y coordinates are passed in the first two - // scalar float varying slots - int num_varying_floats = 2; - int num_uniform_ints = 0; - - // Pack scalar parameters into vec4 - for (size_t i = 0; i < closure_args.size(); i++) { - if (closure_args[i].is_buffer) { - continue; - } else if (ends_with(closure_args[i].name, ".varying")) { - closure_args[i].packed_index = num_varying_floats++; - } else if (closure_args[i].type.is_float()) { - closure_args[i].packed_index = num_uniform_floats++; - } else if (closure_args[i].type.is_int()) { - closure_args[i].packed_index = num_uniform_ints++; - } - } - } else { - // Sort the args by the size of the underlying type. This is - // helpful for avoiding struct-packing ambiguities in metal, - // which passes the scalar args as a struct. - std::sort(closure_args.begin(), closure_args.end(), - [](const DeviceArgument &a, const DeviceArgument &b) { - if (a.is_buffer == b.is_buffer) { - return a.type.bits() > b.type.bits(); - } else { - // Ensure that buffer arguments come first: - // for many OpenGL/Compute systems, the - // legal indices for buffer args are much - // more restrictive than for scalar args, - // and scalar args can be 'grown' by - // LICM. Putting buffers first makes it much - // more likely we won't fail on some - // hardware. - return a.is_buffer > b.is_buffer; - } - }); - } + // Sort the args by the size of the underlying type. This is + // helpful for avoiding struct-packing ambiguities in metal, + // which passes the scalar args as a struct. + std::sort(closure_args.begin(), closure_args.end(), + [](const DeviceArgument &a, const DeviceArgument &b) { + if (a.is_buffer == b.is_buffer) { + return a.type.bits() > b.type.bits(); + } else { + // Ensure that buffer arguments come first: + // for many OpenGL/Compute systems, the + // legal indices for buffer args are much + // more restrictive than for scalar args, + // and scalar args can be 'grown' by + // LICM. Putting buffers first makes it much + // more likely we won't fail on some + // hardware. + return a.is_buffer > b.is_buffer; + } + }); for (size_t i = 0; i < closure_args.size(); i++) { if (closure_args[i].is_buffer && allocations.contains(closure_args[i].name)) { diff --git a/src/CodeGen_Internal.cpp b/src/CodeGen_Internal.cpp index f118ec9b9f2e..86a1baf60bb0 100644 --- a/src/CodeGen_Internal.cpp +++ b/src/CodeGen_Internal.cpp @@ -222,7 +222,6 @@ bool function_takes_user_context(const std::string &name) { "halide_memoization_cache_release", "halide_cuda_run", "halide_opencl_run", - "halide_opengl_run", "halide_openglcompute_run", "halide_metal_run", "halide_d3d12compute_run", @@ -246,7 +245,6 @@ bool function_takes_user_context(const std::string &name) { "halide_vtcm_free", "halide_cuda_initialize_kernels", "halide_opencl_initialize_kernels", - "halide_opengl_initialize_kernels", "halide_openglcompute_initialize_kernels", "halide_metal_initialize_kernels", "halide_d3d12compute_initialize_kernels", diff --git a/src/CodeGen_LLVM.cpp b/src/CodeGen_LLVM.cpp index ae1bf51bf4fd..e5d86abc753a 100644 --- a/src/CodeGen_LLVM.cpp +++ b/src/CodeGen_LLVM.cpp @@ -240,7 +240,6 @@ std::unique_ptr CodeGen_LLVM::new_for_target(const Target &target, // The awkward mapping from targets to code generators if (target.features_any_of({Target::CUDA, Target::OpenCL, - Target::OpenGL, Target::OpenGLCompute, Target::Metal, Target::D3D12Compute})) { diff --git a/src/CodeGen_OpenGLCompute_Dev.cpp b/src/CodeGen_OpenGLCompute_Dev.cpp index 1bc8c7d46cdb..1be9045bb1d1 100644 --- a/src/CodeGen_OpenGLCompute_Dev.cpp +++ b/src/CodeGen_OpenGLCompute_Dev.cpp @@ -7,7 +7,6 @@ #include "IRMutator.h" #include "IROperator.h" #include "Simplify.h" -#include "VaryingAttributes.h" #include #include #include diff --git a/src/CodeGen_OpenGL_Dev.cpp b/src/CodeGen_OpenGL_Dev.cpp index 333d837eb64b..2081fd37b75c 100644 --- a/src/CodeGen_OpenGL_Dev.cpp +++ b/src/CodeGen_OpenGL_Dev.cpp @@ -6,7 +6,6 @@ #include "IRMutator.h" #include "IROperator.h" #include "Simplify.h" -#include "VaryingAttributes.h" #include #include #include @@ -759,127 +758,6 @@ void CodeGen_GLSL::visit(const Evaluate *op) { print_expr(op->value); } -void CodeGen_GLSL::visit(const Call *op) { - ostringstream rhs; - if (op->is_intrinsic(Call::glsl_texture_load)) { - // This intrinsic takes five arguments - // glsl_texture_load(, , , , ) - internal_assert(op->args.size() == 5); - - // The argument to the call is either a StringImm or a broadcasted - // StringImm if this is part of a vectorized expression - internal_assert(op->args[0].as() || - (op->args[0].as() && op->args[0].as()->value.as())); - - const StringImm *string_imm = op->args[0].as(); - if (!string_imm) { - string_imm = op->args[0].as()->value.as(); - } - - // Determine the halide buffer associated with this load - string buffername = string_imm->value; - - internal_assert((op->type.code() == Type::UInt || op->type.code() == Type::Float) && - (op->type.lanes() >= 1 && op->type.lanes() <= 4)); - - if (op->type.is_uint()) { - rhs << print_type(op->type) << "(floor("; - } - - if (op->type.is_vector()) { - // The channel argument must be a ramp or a broadcast of a constant. - Expr c = op->args[4]; - internal_assert(is_const(c)); - - const Ramp *rc = c.as(); - const Broadcast *bx = op->args[2].as(); - const Broadcast *by = op->args[3].as(); - if (rc && is_const_zero(rc->base) && is_const_one(rc->stride) && bx && by) { - // If the x and y coordinates are broadcasts, and the c - // coordinate is a dense ramp, we can do a single - // texture2D call. - rhs << "texture2D(" << print_name(buffername) << ", vec2(" - << print_expr(bx->value) << ", " - << print_expr(by->value) << "))"; - - // texture2D always returns a vec4. Swizzle out the lanes we want. - switch (op->type.lanes()) { - case 1: - rhs << ".r"; - break; - case 2: - rhs << ".rg"; - break; - case 3: - rhs << ".rgb"; - break; - default: - break; - } - } else { - // Otherwise do one load per lane and make a vector - vector xs = print_lanes(op->args[2]); - vector ys = print_lanes(op->args[3]); - vector cs = print_lanes(op->args[4]); - string name = print_name(buffername); - - string x = print_expr(op->args[2]), y = print_expr(op->args[3]); - rhs << print_type(op->type) << "("; - for (int i = 0; i < op->type.lanes(); i++) { - if (i > 0) { - rhs << ", "; - } - rhs << "texture2D(" << name << ", vec2(" - << xs[i] << ", " << ys[i] << "))[" << cs[i] << "]"; - } - rhs << ")"; - } - } else if (const int64_t *ic = as_const_int(op->args[4])) { - internal_assert(*ic >= 0 && *ic < 4); - rhs << "texture2D(" << print_name(buffername) << ", vec2(" - << print_expr(op->args[2]) << ", " - << print_expr(op->args[3]) << "))." - << get_lane_suffix(*ic); - } else { - rhs << "texture2D(" << print_name(buffername) << ", vec2(" - << print_expr(op->args[2]) << ", " - << print_expr(op->args[3]) << "))[" - << print_expr(op->args[4]) << "]"; - } - - if (op->type.is_uint()) { - rhs << " * " << print_expr(cast(op->type.max())) << " + 0.5))"; - } - - } else if (op->is_intrinsic(Call::glsl_texture_store)) { - internal_assert(op->args.size() == 6); - string sval = print_expr(op->args[5]); - string suffix = get_vector_suffix(op->args[4]); - stream << get_indent() << "gl_FragColor" << suffix - << " = " << sval; - if (op->args[5].type().is_uint()) { - stream << " / " << print_expr(cast(op->args[5].type().max())); - } - stream << ";\n"; - // glsl_texture_store is called only for its side effect; there is - // no return value. - id = ""; - return; - } else if (op->is_intrinsic(Call::glsl_varying)) { - // Varying attributes should be substituted out by this point in - // codegen. - debug(2) << "Found skipped varying attribute: " << op->args[0] << "\n"; - - // Output the tagged expression. - print_expr(op->args[1]); - return; - } else { - CodeGen_GLSLBase::visit(op); - return; - } - print_assignment(op->type, rhs.str()); -} - namespace { class AllAccessConstant : public IRVisitor { using IRVisitor::visit; @@ -1256,16 +1134,6 @@ void CodeGen_GLSL::test() { Broadcast::make(2.f, 4)), "vec4 $ = vec4(2.0, 1.0, 2.0, 2.0);\n"); - // Test codegen for texture loads - Expr load4 = Call::make(Float(32, 4), Call::glsl_texture_load, - {string("buf"), - 0, - Broadcast::make(0, 4), - Broadcast::make(0, 4), - Ramp::make(0, 1, 4)}, - Call::Intrinsic); - check(load4, "vec4 $ = texture2D($buf, vec2(int(0), int(0)));\n"); - check(log(1.0f), "float $ = log(1.0);\n"); check(exp(1.0f), "float $ = exp(1.0);\n"); diff --git a/src/CodeGen_OpenGL_Dev.h b/src/CodeGen_OpenGL_Dev.h index 03cf43e1a1c8..b180b5e0ef12 100644 --- a/src/CodeGen_OpenGL_Dev.h +++ b/src/CodeGen_OpenGL_Dev.h @@ -134,7 +134,6 @@ class CodeGen_GLSL : public CodeGen_GLSLBase { void visit(const Allocate *) override; void visit(const Free *) override; - void visit(const Call *) override; void visit(const AssertStmt *) override; void visit(const Ramp *op) override; void visit(const Broadcast *) override; diff --git a/src/Deinterleave.cpp b/src/Deinterleave.cpp index f5bd78b41d97..9ca31a012ee2 100644 --- a/src/Deinterleave.cpp +++ b/src/Deinterleave.cpp @@ -320,14 +320,6 @@ class Deinterleaver : public IRGraphMutator { // Don't mutate scalars if (op->type.is_scalar()) { return op; - } else if (op->is_intrinsic(Call::glsl_texture_load)) { - // glsl_texture_load returns a result. Deinterleave by - // wrapping the call in a shuffle_vector - std::vector indices; - for (int i = 0; i < new_lanes; i++) { - indices.push_back(i * lane_stride + starting_lane); - } - return Shuffle::make({op}, indices); } else { // Vector calls are always parallel across the lanes, so we diff --git a/src/DeviceAPI.h b/src/DeviceAPI.h index ab132e091f8d..e75711592558 100644 --- a/src/DeviceAPI.h +++ b/src/DeviceAPI.h @@ -18,7 +18,6 @@ enum class DeviceAPI { Default_GPU, CUDA, OpenCL, - GLSL, OpenGLCompute, Metal, Hexagon, @@ -33,7 +32,6 @@ const DeviceAPI all_device_apis[] = {DeviceAPI::None, DeviceAPI::Default_GPU, DeviceAPI::CUDA, DeviceAPI::OpenCL, - DeviceAPI::GLSL, DeviceAPI::OpenGLCompute, DeviceAPI::Metal, DeviceAPI::Hexagon, diff --git a/src/DeviceArgument.cpp b/src/DeviceArgument.cpp index 5746958235b4..77f81c5001ff 100644 --- a/src/DeviceArgument.cpp +++ b/src/DeviceArgument.cpp @@ -40,9 +40,7 @@ std::vector HostClosure::arguments() { } void HostClosure::visit(const Call *op) { - if (op->is_intrinsic(Call::glsl_texture_load) || - op->is_intrinsic(Call::image_load) || - op->is_intrinsic(Call::glsl_texture_store) || + if (op->is_intrinsic(Call::image_load) || op->is_intrinsic(Call::image_store)) { // The argument to the call is either a StringImm or a broadcasted @@ -64,12 +62,10 @@ void HostClosure::visit(const Call *op) { MemoryType::GPUTexture : MemoryType::Auto; - if (op->is_intrinsic(Call::glsl_texture_load) || - op->is_intrinsic(Call::image_load)) { + if (op->is_intrinsic(Call::image_load)) { ref.read = true; ref.dimensions = (op->args.size() - 2) / 2; - } else if (op->is_intrinsic(Call::glsl_texture_store) || - op->is_intrinsic(Call::image_store)) { + } else if (op->is_intrinsic(Call::image_store)) { ref.write = true; ref.dimensions = op->args.size() - 3; } diff --git a/src/DeviceInterface.cpp b/src/DeviceInterface.cpp index 3285d7f3b3ac..4d72805e153a 100644 --- a/src/DeviceInterface.cpp +++ b/src/DeviceInterface.cpp @@ -96,8 +96,6 @@ const halide_device_interface_t *get_device_interface_for_device_api(DeviceAPI d name = "cuda"; } else if (d == DeviceAPI::OpenGLCompute) { name = "openglcompute"; - } else if (d == DeviceAPI::GLSL) { - name = "opengl"; } else if (d == DeviceAPI::HexagonDma) { name = "hexagon_dma"; } else if (d == DeviceAPI::D3D12Compute) { @@ -152,8 +150,6 @@ DeviceAPI get_default_device_api_for_target(const Target &target) { return DeviceAPI::CUDA; } else if (target.has_feature(Target::OpenGLCompute)) { return DeviceAPI::OpenGLCompute; - } else if (target.has_feature(Target::OpenGL)) { - return DeviceAPI::GLSL; } else if (target.has_feature(Target::HexagonDma)) { return DeviceAPI::HexagonDma; } else if (target.has_feature(Target::D3D12Compute)) { @@ -184,9 +180,6 @@ Expr make_device_interface_call(DeviceAPI device_api, MemoryType memory_type) { case DeviceAPI::Metal: interface_name = "halide_metal_device_interface"; break; - case DeviceAPI::GLSL: - interface_name = "halide_opengl_device_interface"; - break; case DeviceAPI::OpenGLCompute: interface_name = "halide_openglcompute_device_interface"; break; diff --git a/src/Func.cpp b/src/Func.cpp index 0cbb5a563c2d..cc6ab6d2ea21 100644 --- a/src/Func.cpp +++ b/src/Func.cpp @@ -2454,35 +2454,6 @@ Func &Func::gpu_tile(const VarOrRVar &x, const VarOrRVar &y, const VarOrRVar &z, return *this; } -Func &Func::shader(const Var &x, const Var &y, const Var &c, DeviceAPI device_api) { - invalidate_cache(); - - reorder(c, x, y); - // GLSL outputs must be stored interleaved - reorder_storage(c, x, y); - - // TODO: Set appropriate constraints if this is the output buffer? - - Stage(func, func.definition(), 0).gpu_blocks(x, y, device_api); - - bool constant_bounds = false; - FuncSchedule &sched = func.schedule(); - for (size_t i = 0; i < sched.bounds().size(); i++) { - if (c.name() == sched.bounds()[i].var) { - constant_bounds = is_const(sched.bounds()[i].min) && - is_const(sched.bounds()[i].extent); - break; - } - } - user_assert(constant_bounds) - << "The color channel for image loops must have constant bounds, e.g., .bound(c, 0, 3).\n"; - return *this; -} - -Func &Func::glsl(const Var &x, const Var &y, const Var &c) { - return shader(x, y, c, DeviceAPI::GLSL).vectorize(c); -} - Func &Func::hexagon(const VarOrRVar &x) { invalidate_cache(); Stage(func, func.definition(), 0).hexagon(x); diff --git a/src/Func.h b/src/Func.h index a0c3e82e242a..d6cb36e2f35e 100644 --- a/src/Func.h +++ b/src/Func.h @@ -1974,16 +1974,6 @@ class Func { DeviceAPI device_api = DeviceAPI::Default_GPU); // @} - /** Schedule for execution using coordinate-based hardware api. - * GLSL is an example of this. Conceptually, this is - * similar to parallelization over 'x' and 'y' (since GLSL shaders compute - * individual output pixels in parallel) and vectorization over 'c' - * (since GLSL/RS implicitly vectorizes the color channel). */ - Func &shader(const Var &x, const Var &y, const Var &c, DeviceAPI device_api); - - /** Schedule for execution as GLSL kernel. */ - Func &glsl(const Var &x, const Var &y, const Var &c); - /** Schedule for execution on Hexagon. When a loop is marked with * Hexagon, that loop is executed on a Hexagon DSP. */ Func &hexagon(const VarOrRVar &x = Var::outermost()); diff --git a/src/FuseGPUThreadLoops.cpp b/src/FuseGPUThreadLoops.cpp index 9faf0d7a41df..6b1798b25528 100644 --- a/src/FuseGPUThreadLoops.cpp +++ b/src/FuseGPUThreadLoops.cpp @@ -1442,10 +1442,6 @@ class FuseGPUThreadLoops : public IRMutator { using IRMutator::visit; Stmt visit(const For *op) override { - if (op->device_api == DeviceAPI::GLSL) { - return op; - } - user_assert(!(CodeGen_GPU_Dev::is_gpu_thread_var(op->name))) << "Loops over GPU thread variable: \"" << op->name << "\" is outside of any loop over a GPU block variable. " diff --git a/src/Generator.cpp b/src/Generator.cpp index e732fca13889..e973885ee53d 100644 --- a/src/Generator.cpp +++ b/src/Generator.cpp @@ -757,7 +757,7 @@ std::string halide_type_to_c_type(const Type &t) { int generate_filter_main_inner(int argc, char **argv, std::ostream &cerr) { const char kUsage[] = - "gengen \n" + "gengen\n" " [-g GENERATOR_NAME] [-f FUNCTION_NAME] [-o OUTPUT_DIR] [-r RUNTIME_NAME] [-d 1|0]\n" " [-e EMIT_OPTIONS] [-n FILE_BASE_NAME] [-p PLUGIN_NAME] [-s AUTOSCHEDULER_NAME]\n" " target=target-string[,target-string...] [generator_arg=value [...]]\n" diff --git a/src/Generator.h b/src/Generator.h index bc60b7ff4fb5..8edec2015961 100644 --- a/src/Generator.h +++ b/src/Generator.h @@ -2220,7 +2220,6 @@ class GeneratorOutputBase : public GIOBase { HALIDE_FORWARD_METHOD_CONST(Func, defined) HALIDE_FORWARD_METHOD(Func, fold_storage) HALIDE_FORWARD_METHOD(Func, fuse) - HALIDE_FORWARD_METHOD(Func, glsl) HALIDE_FORWARD_METHOD(Func, gpu) HALIDE_FORWARD_METHOD(Func, gpu_blocks) HALIDE_FORWARD_METHOD(Func, gpu_single_thread) @@ -2242,7 +2241,6 @@ class GeneratorOutputBase : public GIOBase { HALIDE_FORWARD_METHOD_CONST(Func, rvars) HALIDE_FORWARD_METHOD(Func, serial) HALIDE_FORWARD_METHOD(Func, set_estimate) - HALIDE_FORWARD_METHOD(Func, shader) HALIDE_FORWARD_METHOD(Func, specialize) HALIDE_FORWARD_METHOD(Func, specialize_fail) HALIDE_FORWARD_METHOD(Func, split) diff --git a/src/IR.cpp b/src/IR.cpp index edc293b6ce71..96b56ed01fed 100644 --- a/src/IR.cpp +++ b/src/IR.cpp @@ -597,9 +597,6 @@ const char *const intrinsic_op_names[] = { "div_round_to_zero", "dynamic_shuffle", "extract_mask_element", - "glsl_texture_load", - "glsl_texture_store", - "glsl_varying", "gpu_thread_barrier", "hvx_gather", "hvx_scatter", diff --git a/src/IR.h b/src/IR.h index ce45483882e6..da57626e7c42 100644 --- a/src/IR.h +++ b/src/IR.h @@ -509,9 +509,6 @@ struct Call : public ExprNode { div_round_to_zero, dynamic_shuffle, extract_mask_element, - glsl_texture_load, - glsl_texture_store, - glsl_varying, gpu_thread_barrier, hvx_gather, hvx_scatter, diff --git a/src/IRPrinter.cpp b/src/IRPrinter.cpp index 79318513ca31..240005b9c8a8 100644 --- a/src/IRPrinter.cpp +++ b/src/IRPrinter.cpp @@ -93,9 +93,6 @@ ostream &operator<<(ostream &out, const DeviceAPI &api) { case DeviceAPI::OpenGLCompute: out << ""; break; - case DeviceAPI::GLSL: - out << ""; - break; case DeviceAPI::Metal: out << ""; break; diff --git a/src/InjectOpenGLIntrinsics.cpp b/src/InjectOpenGLIntrinsics.cpp deleted file mode 100644 index b9e1d8c3fa46..000000000000 --- a/src/InjectOpenGLIntrinsics.cpp +++ /dev/null @@ -1,111 +0,0 @@ -#include "InjectOpenGLIntrinsics.h" -#include "CodeGen_GPU_Dev.h" -#include "FuseGPUThreadLoops.h" -#include "IRMutator.h" -#include "IROperator.h" -#include "Scope.h" -#include "Substitute.h" - -namespace Halide { -namespace Internal { - -using std::string; -using std::vector; - -namespace { - -/** Normalizes image loads/stores and produces glsl_texture_load/stores. */ -class InjectOpenGLIntrinsics : public IRMutator { -public: - InjectOpenGLIntrinsics() = default; - Scope scope; - bool inside_kernel_loop = false; - -private: - using IRMutator::visit; - - Expr visit(const Call *call) override { - if (call->is_intrinsic(Call::image_load)) { - vector call_args = call->args; - // - // Create - // glsl_texture_load("name", - // name.buffer, - // (x - x_min + 0.5)/x_extent, - // (y - y_min + 0.5)/y_extent, - // c) - // from - // image_load("name", - // name.buffer, - // x - x_min, x_extent, - // y - y_min, y_extent, - // c - c_min, c_extent - // ) - // - int dims = (call_args.size() - 2) / 2; - internal_assert(dims >= 1 && dims <= 3); - - vector args(5); - args[0] = call_args[0]; // "name" - args[1] = call_args[1]; // name.buffer - - // Normalize first two coordinates. - for (int i = 0; i < std::min(dims, 2); i++) { - int to_index = 2 + i; - int from_index = 2 + i * 2; - args[to_index] = - (Cast::make(Float(32), mutate(call_args[from_index])) + 0.5f) / - mutate(call_args[from_index + 1]); - } - - if (dims < 3) { - args[3] = FloatImm::make(Float(32), 0.5f); - args[4] = IntImm::make(Int(32), 0); - } else { - // Confirm that user explicitly specified constant value for min - // value of c dimension for ImageParams accessed by GLSL-based filters. - if (call->param.defined()) { - bool const_min_constraint = - call->param.min_constraint(2).defined() && - is_const(call->param.min_constraint(2)); - user_assert(const_min_constraint) - << "GLSL: Requires minimum for c-dimension set to constant " - << "for ImageParam '" << args[0] << "'. " - << "Call set_min(2, min) or set_bounds(2, min, extent) to set.\n"; - } - - Expr c_coordinate = mutate(call_args[2 + 2 * 2]); - args[4] = c_coordinate; - } - - return Call::make(call->type, Call::glsl_texture_load, - vector(&args[0], &args[5]), - Call::Intrinsic, FunctionPtr(), 0, - call->image, call->param); - } else if (call->is_intrinsic(Call::image_store)) { - user_assert(call->args.size() == 6) - << "GLSL stores require three coordinates.\n"; - - // Create - // gl_texture_store(name, name.buffer, x, y, c, value) - // out of - // image_store(name, name.buffer, x, y, c, value) - vector args(call->args); - args[5] = mutate(call->args[5]); // mutate value - return Call::make(call->type, Call::glsl_texture_store, - args, Call::Intrinsic); - } else { - return IRMutator::visit(call); - } - } -}; - -} // namespace - -Stmt inject_opengl_intrinsics(const Stmt &s) { - InjectOpenGLIntrinsics gl; - return gl.mutate(s); -} - -} // namespace Internal -} // namespace Halide diff --git a/src/InjectOpenGLIntrinsics.h b/src/InjectOpenGLIntrinsics.h deleted file mode 100644 index 3fcf9875d024..000000000000 --- a/src/InjectOpenGLIntrinsics.h +++ /dev/null @@ -1,22 +0,0 @@ -#ifndef HALIDE_INJECT_OPENGL_INTRINSICS_H -#define HALIDE_INJECT_OPENGL_INTRINSICS_H - -/** \file - * Defines the lowering pass that injects texture loads and texture - * stores for opengl. - */ - -#include "Expr.h" - -namespace Halide { -namespace Internal { - -/** Take a statement with for kernel for loops and turn loads and - * stores inside the loops into OpenGL texture load and store - * intrinsics. Should only be run when the OpenGL target is active. */ -Stmt inject_opengl_intrinsics(const Stmt &s); - -} // namespace Internal -} // namespace Halide - -#endif diff --git a/src/JITModule.cpp b/src/JITModule.cpp index dd67ca1d2dcd..0e3837ec6913 100644 --- a/src/JITModule.cpp +++ b/src/JITModule.cpp @@ -624,14 +624,12 @@ enum RuntimeKind { OpenCL, Metal, CUDA, - OpenGL, // NOTE: this feature is deprecated and will be removed in Halide 12. OpenGLCompute, Hexagon, D3D12Compute, OpenCLDebug, MetalDebug, CUDADebug, - OpenGLDebug, // NOTE: this feature is deprecated and will be removed in Halide 12. OpenGLComputeDebug, HexagonDebug, D3D12ComputeDebug, @@ -668,7 +666,6 @@ JITModule &make_module(llvm::Module *for_module, Target target, one_gpu.set_feature(Target::Metal, false); one_gpu.set_feature(Target::CUDA, false); one_gpu.set_feature(Target::HVX, false); - one_gpu.set_feature(Target::OpenGL, false); one_gpu.set_feature(Target::OpenGLCompute, false); one_gpu.set_feature(Target::D3D12Compute, false); string module_name; @@ -702,17 +699,6 @@ JITModule &make_module(llvm::Module *for_module, Target target, one_gpu.set_feature(Target::CUDA); module_name += "cuda"; break; - case OpenGLDebug: - one_gpu.set_feature(Target::Debug); - one_gpu.set_feature(Target::OpenGL); - module_name = "debug_opengl"; - load_opengl(one_gpu.has_feature(Target::EGL)); - break; - case OpenGL: - one_gpu.set_feature(Target::OpenGL); - module_name += "opengl"; - load_opengl(one_gpu.has_feature(Target::EGL)); - break; case OpenGLComputeDebug: one_gpu.set_feature(Target::Debug); one_gpu.set_feature(Target::OpenGLCompute); @@ -874,13 +860,6 @@ std::vector JITSharedRuntime::get(llvm::Module *for_module, const Tar result.push_back(m); } } - if (target.has_feature(Target::OpenGL)) { - auto kind = target.has_feature(Target::Debug) ? OpenGLDebug : OpenGL; - JITModule m = make_module(for_module, target, kind, result, create); - if (m.compiled()) { - result.push_back(m); - } - } if (target.has_feature(Target::OpenGLCompute)) { auto kind = target.has_feature(Target::Debug) ? OpenGLComputeDebug : OpenGLCompute; JITModule m = make_module(for_module, target, kind, result, create); diff --git a/src/LICM.cpp b/src/LICM.cpp index 64360fd64b9d..bb80691f79b0 100644 --- a/src/LICM.cpp +++ b/src/LICM.cpp @@ -246,9 +246,6 @@ class LICM : public IRMutator { if (old_in_gpu_loop && in_gpu_loop) { // Don't lift lets to in-between gpu blocks/threads return IRMutator::visit(op); - } else if (op->device_api == DeviceAPI::GLSL) { - // GLSL uses magic names for varying things. Just skip LICM. - return IRMutator::visit(op); } else { // Lift invariants diff --git a/src/LLVM_Runtime_Linker.cpp b/src/LLVM_Runtime_Linker.cpp index d715ed4e984d..75b5d5c212d4 100644 --- a/src/LLVM_Runtime_Linker.cpp +++ b/src/LLVM_Runtime_Linker.cpp @@ -105,7 +105,6 @@ DECLARE_CPP_INITMOD(module_jit_ref_count) DECLARE_CPP_INITMOD(msan) DECLARE_CPP_INITMOD(msan_stubs) DECLARE_CPP_INITMOD(opencl) -DECLARE_CPP_INITMOD(opengl) DECLARE_CPP_INITMOD(openglcompute) DECLARE_CPP_INITMOD(opengl_egl_context) DECLARE_CPP_INITMOD(opengl_glx_context) @@ -1091,22 +1090,6 @@ std::unique_ptr get_initial_module_for_target(Target t, llvm::LLVM modules.push_back(get_initmod_opencl(c, bits_64, debug)); } } - if (t.has_feature(Target::OpenGL)) { - modules.push_back(get_initmod_opengl(c, bits_64, debug)); - if (t.os == Target::Linux) { - if (t.has_feature(Target::EGL)) { - modules.push_back(get_initmod_opengl_egl_context(c, bits_64, debug)); - } else { - modules.push_back(get_initmod_opengl_glx_context(c, bits_64, debug)); - } - } else if (t.os == Target::OSX) { - modules.push_back(get_initmod_osx_opengl_context(c, bits_64, debug)); - } else if (t.os == Target::Android) { - modules.push_back(get_initmod_opengl_egl_context(c, bits_64, debug)); - } else { - // You're on your own to provide definitions of halide_opengl_get_proc_address and halide_opengl_create_context - } - } if (t.has_feature(Target::OpenGLCompute)) { modules.push_back(get_initmod_openglcompute(c, bits_64, debug)); if (t.os == Target::Android) { diff --git a/src/Lower.cpp b/src/Lower.cpp index a25d18de30b3..35f2096addd2 100644 --- a/src/Lower.cpp +++ b/src/Lower.cpp @@ -34,7 +34,6 @@ #include "IRPrinter.h" #include "InferArguments.h" #include "InjectHostDevBufferCopies.h" -#include "InjectOpenGLIntrinsics.h" #include "Inline.h" #include "LICM.h" #include "LoopCarry.h" @@ -68,7 +67,6 @@ #include "UnpackBuffers.h" #include "UnrollLoops.h" #include "UnsafePromises.h" -#include "VaryingAttributes.h" #include "VectorizeLoops.h" #include "WrapCalls.h" @@ -205,7 +203,6 @@ Module lower(const vector &output_funcs, bool will_inject_host_copies = (t.has_gpu_feature() || t.has_feature(Target::OpenGLCompute) || - t.has_feature(Target::OpenGL) || t.has_feature(Target::HexagonDma) || (t.arch != Target::Hexagon && (t.has_feature(Target::HVX)))); @@ -257,8 +254,7 @@ Module lower(const vector &output_funcs, // OpenGL relies on GPU var canonicalization occurring before // storage flattening. if (t.has_gpu_feature() || - t.has_feature(Target::OpenGLCompute) || - t.has_feature(Target::OpenGL)) { + t.has_feature(Target::OpenGLCompute)) { debug(1) << "Canonicalizing GPU var names...\n"; s = canonicalize_gpu_vars(s); debug(2) << "Lowering after canonicalizing GPU var names:\n" @@ -312,13 +308,6 @@ Module lower(const vector &output_funcs, << s << "\n\n"; } - if (t.has_feature(Target::OpenGL)) { - debug(1) << "Injecting OpenGL texture intrinsics...\n"; - s = inject_opengl_intrinsics(s); - debug(2) << "Lowering after OpenGL intrinsics:\n" - << s << "\n\n"; - } - debug(1) << "Simplifying...\n"; s = simplify(s); s = unify_duplicate_lets(s); @@ -416,18 +405,6 @@ Module lower(const vector &output_funcs, debug(1) << "Simplifying...\n"; s = common_subexpression_elimination(s); - if (t.has_feature(Target::OpenGL)) { - debug(1) << "Detecting varying attributes...\n"; - s = find_linear_expressions(s); - debug(2) << "Lowering after detecting varying attributes:\n" - << s << "\n\n"; - - debug(1) << "Moving varying attribute expressions out of the shader...\n"; - s = setup_gpu_vertex_buffer(s); - debug(2) << "Lowering after removing varying attributes:\n" - << s << "\n\n"; - } - debug(1) << "Lowering unsafe promises...\n"; s = lower_unsafe_promises(s, t); debug(2) << "Lowering after lowering unsafe promises:\n" diff --git a/src/Module.cpp b/src/Module.cpp index 618c8ea7eb39..119635535e69 100644 --- a/src/Module.cpp +++ b/src/Module.cpp @@ -557,10 +557,6 @@ std::map Module::get_metadata_name_map() const { void Module::compile(const std::map &output_files) const { validate_outputs(output_files); - if (target().has_feature(Target::OpenGL)) { - user_warning << "WARNING: OpenGL is deprecated in Halide 11 and will be removed in Halide 12.\n"; - } - // output stmt and html prior to resolving submodules. We need to // clear the output after writing it, otherwise the output will // be overwritten by recursive calls after submodules are resolved. diff --git a/src/PartitionLoops.cpp b/src/PartitionLoops.cpp index dfe9a30d1931..0a5381972000 100644 --- a/src/PartitionLoops.cpp +++ b/src/PartitionLoops.cpp @@ -496,12 +496,6 @@ class PartitionLoops : public IRMutator { return IRMutator::visit(op); } - // We shouldn't partition GLSL loops - they have control-flow - // constraints. - if (op->device_api == DeviceAPI::GLSL) { - return op; - } - // Find simplifications in this loop body FindSimplifications finder(op->name); body.accept(&finder); @@ -777,11 +771,6 @@ class RenormalizeGPULoops : public IRMutator { vector> lifted_lets; Stmt visit(const For *op) override { - if (op->device_api == DeviceAPI::GLSL) { - // The partitioner did not enter GLSL loops - return op; - } - bool old_in_gpu_loop = in_gpu_loop; Stmt stmt; diff --git a/src/Pipeline.cpp b/src/Pipeline.cpp index 349a94c22ea2..b46b879b88a6 100644 --- a/src/Pipeline.cpp +++ b/src/Pipeline.cpp @@ -1075,10 +1075,6 @@ void Pipeline::realize(RealizationArg outputs, const Target &t, Target target = t; user_assert(defined()) << "Can't realize an undefined Pipeline\n"; - if (t.has_feature(Target::OpenGL)) { - user_warning << "WARNING: OpenGL is deprecated in Halide 11 and will be removed in Halide 12.\n"; - } - debug(2) << "Realizing Pipeline for " << target << "\n"; if (target.has_unknowns()) { diff --git a/src/StorageFlattening.cpp b/src/StorageFlattening.cpp index d78662fe73b2..2ed7fa278939 100644 --- a/src/StorageFlattening.cpp +++ b/src/StorageFlattening.cpp @@ -39,8 +39,7 @@ class FlattenDimensions : public IRMutator { set outputs; set textures; const Target ⌖ - Scope<> realizations, shader_scope_realizations; - bool in_shader = false; + Scope<> realizations; bool in_gpu = false; Expr make_shape_var(string name, const string &field, size_t dim, @@ -110,10 +109,6 @@ class FlattenDimensions : public IRMutator { Stmt visit(const Realize *op) override { realizations.push(op->name); - if (in_shader) { - shader_scope_realizations.push(op->name); - } - if (op->memory_type == MemoryType::GPUTexture) { textures.insert(op->name); debug(2) << "found texture " << op->name << "\n"; @@ -131,10 +126,6 @@ class FlattenDimensions : public IRMutator { realizations.pop(op->name); - if (in_shader) { - shader_scope_realizations.pop(op->name); - } - // The allocation extents of the function taken into account of // the align_storage directives. It is only used to determine the // host allocation size and the strides in halide_buffer_t objects (which @@ -247,19 +238,7 @@ class FlattenDimensions : public IRMutator { } Expr value = mutate(op->values[0]); - if (in_shader && !shader_scope_realizations.contains(op->name)) { - user_assert(op->args.size() == 3) - << "Image stores require three coordinates.\n"; - Expr buffer_var = - Variable::make(type_of(), op->name + ".buffer", output_buf); - vector args = { - op->name, buffer_var, - op->args[0], op->args[1], op->args[2], - value}; - Expr store = Call::make(value.type(), Call::image_store, - args, Call::Intrinsic); - return Evaluate::make(store); - } else if (in_gpu && textures.count(op->name)) { + if (in_gpu && textures.count(op->name)) { Expr buffer_var = Variable::make(type_of(), op->name + ".buffer", output_buf); vector args(2); @@ -296,7 +275,7 @@ class FlattenDimensions : public IRMutator { internal_assert(op->value_index == 0); - if ((in_shader && !shader_scope_realizations.contains(op->name)) || (in_gpu && textures.count(op->name))) { + if (in_gpu && textures.count(op->name)) { ReductionDomain rdom; Expr buffer_var = Variable::make(type_of(), op->name + ".buffer", @@ -396,19 +375,12 @@ class FlattenDimensions : public IRMutator { } Stmt visit(const For *op) override { - bool old_in_shader = in_shader; bool old_in_gpu = in_gpu; - if ((op->for_type == ForType::GPUBlock || - op->for_type == ForType::GPUThread) && - op->device_api == DeviceAPI::GLSL) { - in_shader = true; - } if (op->for_type == ForType::GPUBlock || op->for_type == ForType::GPUThread) { in_gpu = true; } Stmt stmt = IRMutator::visit(op); - in_shader = old_in_shader; in_gpu = old_in_gpu; return stmt; } diff --git a/src/Target.cpp b/src/Target.cpp index ac1e6548ce8b..c9ea844729c2 100644 --- a/src/Target.cpp +++ b/src/Target.cpp @@ -330,7 +330,6 @@ const std::map feature_name_map = { {"cl_doubles", Target::CLDoubles}, {"cl_half", Target::CLHalf}, {"cl_atomics64", Target::CLAtomics64}, - {"opengl", Target::OpenGL}, {"openglcompute", Target::OpenGLCompute}, {"egl", Target::EGL}, {"user_context", Target::UserContext}, @@ -661,7 +660,7 @@ bool Target::supported() const { bad |= has_feature(Target::Metal); #endif #if !defined(WITH_OPENGL) - bad |= has_feature(Target::OpenGL) || has_feature(Target::OpenGLCompute); + bad |= has_feature(Target::OpenGLCompute); #endif #if !defined(WITH_D3D12) bad |= has_feature(Target::D3D12Compute); @@ -774,14 +773,12 @@ bool Target::supports_type(const Type &t) const { if (t.bits() == 64) { if (t.is_float()) { return !has_feature(Metal) && - !has_feature(OpenGL) && !has_feature(OpenGLCompute) && !has_feature(D3D12Compute) && (!has_feature(Target::OpenCL) || has_feature(Target::CLDoubles)); } else { return (!has_feature(Metal) && !has_feature(OpenGLCompute) && - !has_feature(OpenGL) && !has_feature(D3D12Compute)); } } @@ -854,9 +851,6 @@ DeviceAPI Target::get_required_device_api() const { if (has_feature(Target::OpenCL)) { return DeviceAPI::OpenCL; } - if (has_feature(Target::OpenGL)) { - return DeviceAPI::GLSL; - } if (has_feature(Target::OpenGLCompute)) { return DeviceAPI::OpenGLCompute; } @@ -869,8 +863,6 @@ Target::Feature target_feature_for_device_api(DeviceAPI api) { return Target::CUDA; case DeviceAPI::OpenCL: return Target::OpenCL; - case DeviceAPI::GLSL: - return Target::OpenGL; case DeviceAPI::OpenGLCompute: return Target::OpenGLCompute; case DeviceAPI::Metal: @@ -957,7 +949,6 @@ bool Target::get_runtime_compatible_target(const Target &other, Target &result) Metal, NoNEON, OpenCL, - OpenGL, OpenGLCompute, // These features are actually intersection-y, but because targets only record the _highest_, @@ -1123,7 +1114,6 @@ void target_test() { {{"x86-64-linux-cuda", "x86-64-linux", "x86-64-linux-cuda"}}, {{"x86-64-linux-cuda-cuda_capability_50", "x86-64-linux-cuda", "x86-64-linux-cuda"}}, {{"x86-64-linux-cuda-cuda_capability_50", "x86-64-linux-cuda-cuda_capability_30", "x86-64-linux-cuda-cuda_capability_30"}}, - {{"x86-64-linux-cuda", "x86-64-linux-opengl", "x86-64-linux-cuda-opengl"}}, {{"hexagon-32-qurt-hvx_v65", "hexagon-32-qurt-hvx_v62", "hexagon-32-qurt-hvx_v62"}}, {{"hexagon-32-qurt-hvx_v62", "hexagon-32-qurt", "hexagon-32-qurt"}}, {{"hexagon-32-qurt-hvx_v62-hvx", "hexagon-32-qurt", ""}}, diff --git a/src/Target.h b/src/Target.h index 2ae94a71c3a0..84aeb07e0e87 100644 --- a/src/Target.h +++ b/src/Target.h @@ -82,7 +82,6 @@ struct Target { CLDoubles = halide_target_feature_cl_doubles, CLHalf = halide_target_feature_cl_half, CLAtomics64 = halide_target_feature_cl_atomic64, - OpenGL = halide_target_feature_opengl, // NOTE: this feature is deprecated and will be removed in Halide 12. OpenGLCompute = halide_target_feature_openglcompute, EGL = halide_target_feature_egl, UserContext = halide_target_feature_user_context, diff --git a/src/VaryingAttributes.cpp b/src/VaryingAttributes.cpp deleted file mode 100644 index df9ebf94f6b1..000000000000 --- a/src/VaryingAttributes.cpp +++ /dev/null @@ -1,1389 +0,0 @@ -#include "VaryingAttributes.h" - -#include - -#include "CSE.h" -#include "CodeGen_GPU_Dev.h" -#include "IR.h" -#include "IRMutator.h" -#include "IROperator.h" -#include "Simplify.h" - -namespace Halide { -namespace Internal { - -namespace { - -Stmt make_block(Stmt first, Stmt rest) { - if (first.defined() && rest.defined()) { - return Block::make(first, rest); - } else if (first.defined()) { - return first; - } else { - return rest; - } -} - -// Find expressions that we can evaluate with interpolation hardware in the GPU -// -// This visitor keeps track of the "order" of the expression in terms of the -// specified variables. The order value 0 means that the expression is contant; -// order value 1 means that it is linear in terms of only one variable, check -// the member found to determine which; order value 2 means non-linear, it -// could be disqualified due to being quadratic, bilinear or the result of an -// unknown function. -class FindLinearExpressions : public IRMutator { -protected: - using IRMutator::visit; - - bool in_glsl_loops = false; - - Expr tag_linear_expression(Expr e, const std::string &name = unique_name('a')) { - - internal_assert(name.length() > 0); - - if (total_found >= max_expressions) { - return e; - } - - // Wrap the expression with an intrinsic to tag that it is a varying - // attribute. These tagged variables will be pulled out of the fragment - // shader during a subsequent pass - Expr intrinsic = Call::make(e.type(), Call::glsl_varying, - {name + ".varying", e}, - Call::Intrinsic); - ++total_found; - - return intrinsic; - } - - Expr visit(const Call *op) override { - std::vector new_args = op->args; - - // Check to see if this call is a load - if (op->is_intrinsic(Call::glsl_texture_load)) { - // Check if the texture coordinate arguments are linear wrt the GPU - // loop variables - internal_assert(!loop_vars.empty()) << "No GPU loop variables found at texture load\n"; - - // Iterate over the texture coordinate arguments - for (int i = 2; i != 4; ++i) { - new_args[i] = mutate(op->args[i]); - if (order == 1) { - new_args[i] = tag_linear_expression(new_args[i]); - } - } - } else if (op->is_intrinsic(Call::glsl_texture_store)) { - // Check if the value expression is linear wrt the loop variables - internal_assert(!loop_vars.empty()) << "No GPU loop variables found at texture store\n"; - - // The value is the 5th argument to the intrinsic - new_args[5] = mutate(new_args[5]); - if (order == 1) { - new_args[5] = tag_linear_expression(new_args[5]); - } - } - - // The texture lookup itself is counted as a non-linear operation - order = 2; - return Call::make(op->type, op->name, new_args, op->call_type, - op->func, op->value_index, op->image, op->param); - } - - Expr visit(const Let *op) override { - Expr mutated_value = mutate(op->value); - int value_order = order; - - ScopedBinding bind(scope, op->name, order); - - Expr mutated_body = mutate(op->body); - - if ((value_order == 1) && (total_found < max_expressions)) { - // Wrap the let value with a varying tag - mutated_value = Call::make(mutated_value.type(), Call::glsl_varying, - {op->name + ".varying", mutated_value}, - Call::Intrinsic); - ++total_found; - } - - return Let::make(op->name, mutated_value, mutated_body); - } - - Stmt visit(const For *op) override { - bool old_in_glsl_loops = in_glsl_loops; - bool kernel_loop = op->device_api == DeviceAPI::GLSL; - bool within_kernel_loop = !kernel_loop && in_glsl_loops; - // Check if the loop variable is a GPU variable thread variable and for GLSL - if (kernel_loop) { - loop_vars.push_back(op->name); - in_glsl_loops = true; - } else if (within_kernel_loop) { - // The inner loop variable is non-linear w.r.t the glsl pixel coordinate. - scope.push(op->name, 2); - } - - Stmt mutated_body = mutate(op->body); - - if (kernel_loop) { - loop_vars.pop_back(); - } else if (within_kernel_loop) { - scope.pop(op->name); - } - - in_glsl_loops = old_in_glsl_loops; - - if (mutated_body.same_as(op->body)) { - return op; - } else { - return For::make(op->name, op->min, op->extent, op->for_type, op->device_api, mutated_body); - } - } - - Expr visit(const Variable *op) override { - if (std::find(loop_vars.begin(), loop_vars.end(), op->name) != loop_vars.end()) { - order = 1; - } else if (scope.contains(op->name)) { - order = scope.get(op->name); - } else { - // If the variable is not found in scope, then we assume it is - // constant in terms of the independent variables. - order = 0; - } - return op; - } - - Expr visit(const IntImm *op) override { - order = 0; - return op; - } - Expr visit(const UIntImm *op) override { - order = 0; - return op; - } - Expr visit(const FloatImm *op) override { - order = 0; - return op; - } - Expr visit(const StringImm *op) override { - order = 0; - return op; - } - - Expr visit(const Cast *op) override { - - Expr mutated_value = mutate(op->value); - int value_order = order; - - // We can only interpolate float values, disqualify the expression if - // this is a cast to a different type - if (order && (!op->type.is_float())) { - order = 2; - } - - if ((order > 1) && (value_order == 1)) { - mutated_value = tag_linear_expression(mutated_value); - } - - return Cast::make(op->type, mutated_value); - } - - // Add and subtract do not make the expression non-linear, if it is already - // linear or constant - template - Expr visit_binary_linear(T *op) { - Expr a = mutate(op->a); - unsigned int order_a = order; - Expr b = mutate(op->b); - unsigned int order_b = order; - - order = std::max(order_a, order_b); - - // If the whole expression is greater than linear, check to see if - // either argument is linear and if so, add it to a candidate list - if ((order > 1) && (order_a == 1)) { - a = tag_linear_expression(a); - } - if ((order > 1) && (order_b == 1)) { - b = tag_linear_expression(b); - } - - return T::make(a, b); - } - - Expr visit(const Add *op) override { - return visit_binary_linear(op); - } - Expr visit(const Sub *op) override { - return visit_binary_linear(op); - } - - // Multiplying increases the order of the expression, possibly making it - // non-linear - Expr visit(const Mul *op) override { - Expr a = mutate(op->a); - unsigned int order_a = order; - Expr b = mutate(op->b); - unsigned int order_b = order; - - order = order_a + order_b; - - // If the whole expression is greater than linear, check to see if - // either argument is linear and if so, add it to a candidate list - if ((order > 1) && (order_a == 1)) { - a = tag_linear_expression(a); - } - if ((order > 1) && (order_b == 1)) { - b = tag_linear_expression(b); - } - - return Mul::make(a, b); - } - - // Dividing is either multiplying by a constant, or makes the result - // non-linear (i.e. order -1) - Expr visit(const Div *op) override { - Expr a = mutate(op->a); - unsigned int order_a = order; - Expr b = mutate(op->b); - unsigned int order_b = order; - - if (order_a && !order_b) { - // Case: x / c - order = order_a; - } else if (!order_a && order_b) { - // Case: c / x - order = 2; - } else { - order = order_a + order_b; - } - - if ((order > 1) && (order_a == 1)) { - a = tag_linear_expression(a); - } - if ((order > 1) && (order_b == 1)) { - b = tag_linear_expression(b); - } - - return Div::make(a, b); - } - - // For other binary operators, if either argument is non-constant, then the - // whole expression is non-linear - template - Expr visit_binary(T *op) { - - Expr a = mutate(op->a); - unsigned int order_a = order; - Expr b = mutate(op->b); - unsigned int order_b = order; - - if (order_a || order_b) { - order = 2; - } - - if ((order > 1) && (order_a == 1)) { - a = tag_linear_expression(a); - } - if ((order > 1) && (order_b == 1)) { - b = tag_linear_expression(b); - } - - return T::make(a, b); - } - - Expr visit(const Mod *op) override { - return visit_binary(op); - } - - // Break the expression into a piecewise function, if the expressions are - // linear, we treat the piecewise behavior specially during codegen - - // Once this is done, Min and Max should call visit_binary_linear and the code - // in setup_mesh will handle piecewise linear behavior introduced by these - // expressions - Expr visit(const Min *op) override { - return visit_binary(op); - } - Expr visit(const Max *op) override { - return visit_binary(op); - } - - Expr visit(const EQ *op) override { - return visit_binary(op); - } - Expr visit(const NE *op) override { - return visit_binary(op); - } - Expr visit(const LT *op) override { - return visit_binary(op); - } - Expr visit(const LE *op) override { - return visit_binary(op); - } - Expr visit(const GT *op) override { - return visit_binary(op); - } - Expr visit(const GE *op) override { - return visit_binary(op); - } - Expr visit(const And *op) override { - return visit_binary(op); - } - Expr visit(const Or *op) override { - return visit_binary(op); - } - - Expr visit(const Not *op) override { - Expr a = mutate(op->a); - unsigned int order_a = order; - - if (order_a) { - order = 2; - } - - return Not::make(a); - } - - Expr visit(const Broadcast *op) override { - Expr a = mutate(op->value); - - if (order == 1) { - a = tag_linear_expression(a); - } - - if (order) { - order = 2; - } - - return Broadcast::make(a, op->lanes); - } - - Expr visit(const Select *op) override { - - // If either the true expression or the false expression is non-linear - // in terms of the loop variables, then the select expression might - // evaluate to a non-linear expression and is disqualified. - - // If both are either linear or constant, and the condition expression - // is constant with respect to the loop variables, then either the true - // or false expression will be evaluated across the whole loop domain, - // and the select expression is linear. Otherwise, the expression is - // disqualified. - - // The condition expression must be constant (order == 0) with respect - // to the loop variables. - Expr mutated_condition = mutate(op->condition); - int condition_order = (order != 0) ? 2 : 0; - - Expr mutated_true_value = mutate(op->true_value); - int true_value_order = order; - - Expr mutated_false_value = mutate(op->false_value); - int false_value_order = order; - - order = std::max(std::max(condition_order, true_value_order), false_value_order); - - if ((order > 1) && (condition_order == 1)) { - mutated_condition = tag_linear_expression(mutated_condition); - } - if ((order > 1) && (true_value_order == 1)) { - mutated_true_value = tag_linear_expression(mutated_true_value); - } - if ((order > 1) && (false_value_order == 1)) { - mutated_false_value = tag_linear_expression(mutated_false_value); - } - - return Select::make(mutated_condition, mutated_true_value, mutated_false_value); - } - -public: - std::vector loop_vars; - - Scope scope; - - unsigned int order; - bool found; - - unsigned int total_found = 0; - - // This parameter controls the maximum number of linearly varying - // expressions halide will pull out of the fragment shader and evaluate per - // vertex, and allow the GPU to linearly interpolate across the domain. For - // OpenGL ES 2.0 we can pass 16 vec4 varying attributes, or 64 scalars. Two - // scalar slots are used by boilerplate code to pass pixel coordinates. - const unsigned int max_expressions = 62; - - FindLinearExpressions() = default; -}; - -} // namespace - -Stmt find_linear_expressions(const Stmt &s) { - - return FindLinearExpressions().mutate(s); -} - -namespace { - -// This visitor produces a map containing name and expression pairs from varying -// tagged intrinsics -class FindVaryingAttributeTags : public IRVisitor { -public: - FindVaryingAttributeTags(std::map &varyings_) - : varyings(varyings_) { - } - - using IRVisitor::visit; - - void visit(const Call *op) override { - if (op->is_intrinsic(Call::glsl_varying)) { - std::string name = op->args[0].as()->value; - varyings[name] = op->args[1]; - } - IRVisitor::visit(op); - } - - std::map &varyings; -}; - -// This visitor removes glsl_varying intrinsics. -class RemoveVaryingAttributeTags : public IRMutator { -public: - using IRMutator::visit; - - Expr visit(const Call *op) override { - if (op->is_intrinsic(Call::glsl_varying)) { - // Replace the call expression with its wrapped argument expression - return op->args[1]; - } else { - return IRMutator::visit(op); - } - } -}; - -} // namespace - -Stmt remove_varying_attributes(const Stmt &s) { - return RemoveVaryingAttributeTags().mutate(s); -} - -namespace { - -// This visitor removes glsl_varying intrinsics and replaces them with -// variables. After this visitor is called, the varying attribute expressions -// will no longer appear in the IR tree, only variables with the .varying tag -// will remain. -class ReplaceVaryingAttributeTags : public IRMutator { -public: - using IRMutator::visit; - - Expr visit(const Call *op) override { - if (op->is_intrinsic(Call::glsl_varying)) { - // Replace the intrinsic tag wrapper with a variable the variable - // name ends with the tag ".varying" - std::string name = op->args[0].as()->value; - - internal_assert(ends_with(name, ".varying")); - - return Variable::make(op->type, name); - } else { - return IRMutator::visit(op); - } - } -}; - -} // namespace - -Stmt replace_varying_attributes(const Stmt &s) { - return ReplaceVaryingAttributeTags().mutate(s); -} - -namespace { - -// This visitor produces a set of variable names that are tagged with -// ".varying". -class FindVaryingAttributeVars : public IRVisitor { -public: - using IRVisitor::visit; - - void visit(const Variable *op) override { - if (ends_with(op->name, ".varying")) { - variables.insert(op->name); - } - } - - std::set variables; -}; - -} // namespace - -// Remove varying attributes from the varying's map if they do not appear in the -// loop_stmt because they were simplified away. -void prune_varying_attributes(const Stmt &loop_stmt, std::map &varying) { - FindVaryingAttributeVars find; - loop_stmt.accept(&find); - - std::vector remove_list; - - for (const std::pair &i : varying) { - const std::string &name = i.first; - if (find.variables.find(name) == find.variables.end()) { - debug(2) << "Removed varying attribute " << name << "\n"; - remove_list.push_back(name); - } - } - - for (const std::string &i : remove_list) { - varying.erase(i); - } -} - -namespace { - -// This visitor changes the type of variables tagged with .varying to float, -// since GLSL will only interpolate floats. In the case that the type of the -// varying attribute was integer, the interpolated float value is snapped to the -// integer grid and cast to the integer type. This case occurs with coordinate -// expressions where the integer loop variables are manipulated without being -// converted to floating point. In other cases, like an affine transformation of -// image coordinates, the loop variables are cast to floating point within the -// interpolated expression. -class CastVaryingVariables : public IRMutator { -protected: - using IRMutator::visit; - - Expr visit(const Variable *op) override { - if ((ends_with(op->name, ".varying")) && (op->type != Float(32))) { - // The incoming variable will be float type because GLSL only - // interpolates floats - Expr v = Variable::make(Float(32), op->name); - - // If the varying attribute expression that this variable replaced - // was integer type, snap the interpolated floating point variable - // back to the integer grid. - return Cast::make(op->type, floor(v + 0.5f)); - } else { - // Otherwise, the variable keeps its float type. - return op; - } - } -}; - -// This visitor casts the named variables to float, and then propagates the -// float type through the expression. The variable is offset by 0.5f -class CastVariablesToFloatAndOffset : public IRMutator { -protected: - using IRMutator::visit; - - Expr visit(const Variable *op) override { - - // Check to see if the variable matches a loop variable name - if (std::find(names.begin(), names.end(), op->name) != names.end()) { - // This case is used by integer type loop variables. They are cast - // to float and offset. - return Expr(op) - 0.5f; - - } else if (scope.contains(op->name) && (op->type != scope.get(op->name).type())) { - // Otherwise, check to see if it is defined by a modified let - // expression and if so, change the type of the variable to match - // the modified expression - return Variable::make(scope.get(op->name).type(), op->name); - } else { - return op; - } - } - - Type float_type(const Expr &e) { - return Float(e.type().bits(), e.type().lanes()); - } - - template - Expr visit_binary_op(const T *op) { - Expr mutated_a = mutate(op->a); - Expr mutated_b = mutate(op->b); - - bool a_float = mutated_a.type().is_float(); - bool b_float = mutated_b.type().is_float(); - - // If either argument is a float, then make sure both are float - if (a_float || b_float) { - if (!a_float) { - mutated_a = Cast::make(float_type(op->b), mutated_a); - } - if (!b_float) { - mutated_b = Cast::make(float_type(op->a), mutated_b); - } - } - - return T::make(mutated_a, mutated_b); - } - - Expr visit(const Add *op) override { - return visit_binary_op(op); - } - Expr visit(const Sub *op) override { - return visit_binary_op(op); - } - Expr visit(const Mul *op) override { - return visit_binary_op(op); - } - Expr visit(const Div *op) override { - return visit_binary_op(op); - } - Expr visit(const Mod *op) override { - return visit_binary_op(op); - } - Expr visit(const Min *op) override { - return visit_binary_op(op); - } - Expr visit(const Max *op) override { - return visit_binary_op(op); - } - Expr visit(const EQ *op) override { - return visit_binary_op(op); - } - Expr visit(const NE *op) override { - return visit_binary_op(op); - } - Expr visit(const LT *op) override { - return visit_binary_op(op); - } - Expr visit(const LE *op) override { - return visit_binary_op(op); - } - Expr visit(const GT *op) override { - return visit_binary_op(op); - } - Expr visit(const GE *op) override { - return visit_binary_op(op); - } - Expr visit(const And *op) override { - return visit_binary_op(op); - } - Expr visit(const Or *op) override { - return visit_binary_op(op); - } - - Expr visit(const Select *op) override { - Expr mutated_condition = mutate(op->condition); - Expr mutated_true_value = mutate(op->true_value); - Expr mutated_false_value = mutate(op->false_value); - - bool t_float = mutated_true_value.type().is_float(); - bool f_float = mutated_false_value.type().is_float(); - - // If either argument is a float, then make sure both are float - if (t_float || f_float) { - if (!t_float) { - mutated_true_value = Cast::make(float_type(op->true_value), mutated_true_value); - } - if (!f_float) { - mutated_false_value = Cast::make(float_type(op->false_value), mutated_false_value); - } - } - - return Select::make(mutated_condition, mutated_true_value, mutated_false_value); - } - - Expr visit(const Ramp *op) override { - Expr mutated_base = mutate(op->base); - Expr mutated_stride = mutate(op->stride); - - // If either base or stride is a float, then make sure both are float - bool base_float = mutated_base.type().is_float(); - bool stride_float = mutated_stride.type().is_float(); - if (!base_float && stride_float) { - mutated_base = Cast::make(float_type(op->base), mutated_base); - } else if (base_float && !stride_float) { - mutated_stride = Cast::make(float_type(op->stride), mutated_stride); - } - - if (mutated_base.same_as(op->base) && mutated_stride.same_as(op->stride)) { - return op; - } else { - return Ramp::make(mutated_base, mutated_stride, op->lanes); - } - } - - Expr visit(const Let *op) override { - Expr mutated_value = mutate(op->value); - - bool changed = op->value.type().is_float() != mutated_value.type().is_float(); - if (changed) { - scope.push(op->name, mutated_value); - } - - Expr mutated_body = mutate(op->body); - - if (changed) { - scope.pop(op->name); - } - - return Let::make(op->name, mutated_value, mutated_body); - } - Stmt visit(const LetStmt *op) override { - - Expr mutated_value = mutate(op->value); - - bool changed = op->value.type().is_float() != mutated_value.type().is_float(); - if (changed) { - scope.push(op->name, mutated_value); - } - - Stmt mutated_body = mutate(op->body); - - if (changed) { - scope.pop(op->name); - } - - return LetStmt::make(op->name, mutated_value, mutated_body); - } - -public: - CastVariablesToFloatAndOffset(const std::vector &names_) - : names(names_) { - } - - const std::vector &names; - Scope scope; -}; - -// This is the base class for a special mutator that, by default, turns an IR -// tree into a tree of Stmts. Derived classes overload visit methods to filter -// out specific expressions which are placed in Evaluate nodes within the new -// tree. This functionality is used by GLSL varying attributes to transform -// tagged linear expressions into Store nodes for the vertex buffer. The -// IRFilter allows these expressions to be filtered out while maintaining the -// existing structure of Let variable scopes around them. -// -// TODO: could this be made to use the IRMutator pattern instead? -class IRFilter : public IRVisitor { -public: - virtual Stmt mutate(const Expr &e); - virtual Stmt mutate(const Stmt &s); - -protected: - using IRVisitor::visit; - - Stmt stmt; - - void visit(const IntImm *) override; - void visit(const FloatImm *) override; - void visit(const StringImm *) override; - void visit(const Cast *) override; - void visit(const Variable *) override; - void visit(const Add *) override; - void visit(const Sub *) override; - void visit(const Mul *) override; - void visit(const Div *) override; - void visit(const Mod *) override; - void visit(const Min *) override; - void visit(const Max *) override; - void visit(const EQ *) override; - void visit(const NE *) override; - void visit(const LT *) override; - void visit(const LE *) override; - void visit(const GT *) override; - void visit(const GE *) override; - void visit(const And *) override; - void visit(const Or *) override; - void visit(const Not *) override; - void visit(const Select *) override; - void visit(const Load *) override; - void visit(const Ramp *) override; - void visit(const Broadcast *) override; - void visit(const Call *) override; - void visit(const Let *) override; - void visit(const LetStmt *) override; - void visit(const AssertStmt *) override; - void visit(const ProducerConsumer *) override; - void visit(const For *) override; - void visit(const Store *) override; - void visit(const Provide *) override; - void visit(const Allocate *) override; - void visit(const Free *) override; - void visit(const Realize *) override; - void visit(const Block *) override; - void visit(const IfThenElse *) override; - void visit(const Evaluate *) override; -}; - -Stmt IRFilter::mutate(const Expr &e) { - if (e.defined()) { - e.accept(this); - } else { - stmt = Stmt(); - } - return stmt; -} - -Stmt IRFilter::mutate(const Stmt &s) { - if (s.defined()) { - s.accept(this); - } else { - stmt = Stmt(); - } - return stmt; -} - -template -void mutate_operator(IRFilter *mutator, const T *op, const A op_a, Stmt *stmt) { - Stmt a = mutator->mutate(op_a); - *stmt = a; -} -template -void mutate_operator(IRFilter *mutator, const T *op, const A op_a, const B op_b, Stmt *stmt) { - Stmt a = mutator->mutate(op_a); - Stmt b = mutator->mutate(op_b); - *stmt = make_block(a, b); -} -template -void mutate_operator(IRFilter *mutator, const T *op, const A op_a, const B op_b, const C op_c, Stmt *stmt) { - Stmt a = mutator->mutate(op_a); - Stmt b = mutator->mutate(op_b); - Stmt c = mutator->mutate(op_c); - *stmt = make_block(make_block(a, b), c); -} - -void IRFilter::visit(const IntImm *op) { - stmt = Stmt(); -} -void IRFilter::visit(const FloatImm *op) { - stmt = Stmt(); -} -void IRFilter::visit(const StringImm *op) { - stmt = Stmt(); -} -void IRFilter::visit(const Variable *op) { - stmt = Stmt(); -} - -void IRFilter::visit(const Cast *op) { - mutate_operator(this, op, op->value, &stmt); -} - -void IRFilter::visit(const Add *op) { - mutate_operator(this, op, op->a, op->b, &stmt); -} -void IRFilter::visit(const Sub *op) { - mutate_operator(this, op, op->a, op->b, &stmt); -} -void IRFilter::visit(const Mul *op) { - mutate_operator(this, op, op->a, op->b, &stmt); -} -void IRFilter::visit(const Div *op) { - mutate_operator(this, op, op->a, op->b, &stmt); -} -void IRFilter::visit(const Mod *op) { - mutate_operator(this, op, op->a, op->b, &stmt); -} -void IRFilter::visit(const Min *op) { - mutate_operator(this, op, op->a, op->b, &stmt); -} -void IRFilter::visit(const Max *op) { - mutate_operator(this, op, op->a, op->b, &stmt); -} -void IRFilter::visit(const EQ *op) { - mutate_operator(this, op, op->a, op->b, &stmt); -} -void IRFilter::visit(const NE *op) { - mutate_operator(this, op, op->a, op->b, &stmt); -} -void IRFilter::visit(const LT *op) { - mutate_operator(this, op, op->a, op->b, &stmt); -} -void IRFilter::visit(const LE *op) { - mutate_operator(this, op, op->a, op->b, &stmt); -} -void IRFilter::visit(const GT *op) { - mutate_operator(this, op, op->a, op->b, &stmt); -} -void IRFilter::visit(const GE *op) { - mutate_operator(this, op, op->a, op->b, &stmt); -} -void IRFilter::visit(const And *op) { - mutate_operator(this, op, op->a, op->b, &stmt); -} -void IRFilter::visit(const Or *op) { - mutate_operator(this, op, op->a, op->b, &stmt); -} - -void IRFilter::visit(const Not *op) { - mutate_operator(this, op, op->a, &stmt); -} - -void IRFilter::visit(const Select *op) { - mutate_operator(this, op, op->condition, op->true_value, op->false_value, &stmt); -} - -void IRFilter::visit(const Load *op) { - mutate_operator(this, op, op->predicate, op->index, &stmt); -} - -void IRFilter::visit(const Ramp *op) { - mutate_operator(this, op, op->base, op->stride, &stmt); -} - -void IRFilter::visit(const Broadcast *op) { - mutate_operator(this, op, op->value, &stmt); -} - -void IRFilter::visit(const Call *op) { - std::vector new_args(op->args.size()); - - // Mutate the args - for (size_t i = 0; i < op->args.size(); i++) { - Expr old_arg = op->args[i]; - Stmt new_arg = mutate(old_arg); - new_args[i] = new_arg; - } - - stmt = Stmt(); - for (size_t i = 0; i < new_args.size(); ++i) { - if (new_args[i].defined()) { - stmt = make_block(new_args[i], stmt); - } - } -} - -void IRFilter::visit(const Let *op) { - mutate_operator(this, op, op->value, op->body, &stmt); -} - -void IRFilter::visit(const LetStmt *op) { - mutate_operator(this, op, op->value, op->body, &stmt); -} - -void IRFilter::visit(const AssertStmt *op) { - mutate_operator(this, op, op->condition, op->message, &stmt); -} - -void IRFilter::visit(const ProducerConsumer *op) { - mutate_operator(this, op, op->body, &stmt); -} - -void IRFilter::visit(const For *op) { - mutate_operator(this, op, op->min, op->extent, op->body, &stmt); -} - -void IRFilter::visit(const Store *op) { - mutate_operator(this, op, op->predicate, op->value, op->index, &stmt); -} - -void IRFilter::visit(const Provide *op) { - stmt = Stmt(); - for (size_t i = 0; i < op->args.size(); i++) { - Stmt new_arg = mutate(op->args[i]); - if (new_arg.defined()) { - stmt = make_block(new_arg, stmt); - } - Stmt new_value = mutate(op->values[i]); - if (new_value.defined()) { - stmt = make_block(new_value, stmt); - } - } -} - -void IRFilter::visit(const Allocate *op) { - stmt = Stmt(); - for (size_t i = 0; i < op->extents.size(); i++) { - Stmt new_extent = mutate(op->extents[i]); - if (new_extent.defined()) { - stmt = make_block(new_extent, stmt); - } - } - - Stmt body = mutate(op->body); - if (body.defined()) { - stmt = make_block(body, stmt); - } - - Stmt condition = mutate(op->condition); - if (condition.defined()) { - stmt = make_block(condition, stmt); - } -} - -void IRFilter::visit(const Free *op) { -} - -void IRFilter::visit(const Realize *op) { - stmt = Stmt(); - - // Mutate the bounds - for (size_t i = 0; i < op->bounds.size(); i++) { - Expr old_min = op->bounds[i].min; - Expr old_extent = op->bounds[i].extent; - Stmt new_min = mutate(old_min); - Stmt new_extent = mutate(old_extent); - - if (new_min.defined()) { - stmt = make_block(new_min, stmt); - } - if (new_extent.defined()) { - stmt = make_block(new_extent, stmt); - } - } - - Stmt body = mutate(op->body); - if (body.defined()) { - stmt = make_block(body, stmt); - } - - Stmt condition = mutate(op->condition); - if (condition.defined()) { - stmt = make_block(condition, stmt); - } -} - -void IRFilter::visit(const Block *op) { - mutate_operator(this, op, op->first, op->rest, &stmt); -} - -void IRFilter::visit(const IfThenElse *op) { - mutate_operator(this, op, op->condition, op->then_case, op->else_case, &stmt); -} - -void IRFilter::visit(const Evaluate *op) { - mutate_operator(this, op, op->value, &stmt); -} - -// This visitor takes a IR tree containing a set of .glsl scheduled for-loops -// and creates a matching set of serial for-loops to setup a vertex buffer on -// the host. The visitor filters out glsl_varying intrinsics and transforms -// them into Store nodes to evaluate the linear expressions they tag within the -// scope of all of the Let definitions they fall within. -// The statement returned by this operation should be executed on the host -// before the call to halide_dev_run. -class CreateVertexBufferOnHost : public IRFilter { -public: - using IRFilter::visit; - - void visit(const Call *op) override { - - // Transform glsl_varying intrinsics into store operations to output the - // vertex coordinate values. - if (op->is_intrinsic(Call::glsl_varying)) { - - // Construct an expression for the offset of the coordinate value in - // terms of the current integer loop variables and the varying - // attribute channel number - std::string attribute_name = op->args[0].as()->value; - - Expr offset_expression = Variable::make(Int(32), "gpu.vertex_offset") + - attribute_order[attribute_name]; - - stmt = Store::make(vertex_buffer_name, op->args[1], offset_expression, - Parameter(), const_true(op->args[1].type().lanes()), ModulusRemainder()); - } else { - IRFilter::visit(op); - } - } - - void visit(const Let *op) override { - stmt = nullptr; - - Stmt mutated_value = mutate(op->value); - Stmt mutated_body = mutate(op->body); - - // If an operation was filtered out of the body, also filter out the - // whole let expression so that the body may be evaluated completely. In - // the case that the let variable is not used in the mutated body, it - // will be removed by simplification. - if (mutated_body.defined()) { - stmt = LetStmt::make(op->name, op->value, mutated_body); - } - - // If an operation with a side effect was filtered out of the value, the - // stmt'ified value is placed in a Block, so that the side effect will - // be included in filtered IR tree. - if (mutated_value.defined()) { - stmt = make_block(mutated_value, stmt); - } - } - - void visit(const LetStmt *op) override { - stmt = Stmt(); - - Stmt mutated_value = mutate(op->value); - Stmt mutated_body = mutate(op->body); - - if (mutated_body.defined()) { - stmt = LetStmt::make(op->name, op->value, mutated_body); - } - - if (mutated_value.defined()) { - stmt = make_block(mutated_value, stmt); - } - } - - void visit(const For *op) override { - if (CodeGen_GPU_Dev::is_gpu_var(op->name) && op->device_api == DeviceAPI::GLSL) { - // Create a for-loop of integers iterating over the coordinates in - // this dimension - - std::string name = op->name + ".idx"; - const std::vector &dim = dims[op->name]; - - internal_assert(for_loops.size() <= 1); - for_loops.push_back(op); - - Expr loop_variable = Variable::make(Int(32), name); - loop_variables.push_back(loop_variable); - - // TODO: When support for piecewise linear expressions is added this - // expression must support more than two coordinates in each - // dimension. - Expr coord_expr = select(loop_variable == 0, dim[0], dim[1]); - - // Visit the body of the for-loop - Stmt mutated_body = mutate(op->body); - - // If this was the inner most for-loop of the .glsl scheduled pair, - // add a let definition for the vertex index and Store the spatial - // coordinates - const For *nested_for = op->body.as(); - if (!(nested_for && CodeGen_GPU_Dev::is_gpu_var(nested_for->name))) { - - // Create a variable to store the offset in floats of this - // vertex - Expr gpu_varying_offset = Variable::make(Int(32), "gpu.vertex_offset"); - - // Add expressions for the x and y vertex coordinates. - Expr coord1 = cast(Variable::make(Int(32), for_loops[0]->name)); - Expr coord0 = cast(Variable::make(Int(32), for_loops[1]->name)); - - // Transform the vertex coordinates to GPU device coordinates on - // [-1,1] - coord1 = (coord1 / for_loops[0]->extent) * 2.0f - 1.0f; - coord0 = (coord0 / for_loops[1]->extent) * 2.0f - 1.0f; - - // Remove varying attribute intrinsics from the vertex setup IR - // tree. - mutated_body = remove_varying_attributes(mutated_body); - - // The GPU will take texture coordinates at pixel centers during - // interpolation, we offset the Halide integer grid by 0.5 so that - // these coordinates line up on integer coordinate values. - std::vector names = {for_loops[0]->name, for_loops[1]->name}; - CastVariablesToFloatAndOffset cast_and_offset(names); - mutated_body = cast_and_offset.mutate(mutated_body); - - // Store the coordinates into the vertex buffer in interleaved - // order - mutated_body = make_block(Store::make(vertex_buffer_name, - coord1, - gpu_varying_offset + 1, - Parameter(), const_true(), - ModulusRemainder()), - mutated_body); - - mutated_body = make_block(Store::make(vertex_buffer_name, - coord0, - gpu_varying_offset + 0, - Parameter(), const_true(), - ModulusRemainder()), - mutated_body); - - // TODO: The value 2 in this expression must be changed to reflect - // addition coordinate values in the fastest changing dimension when - // support for piecewise linear functions is added - Expr offset_expression = (loop_variables[0] * num_padded_attributes * 2) + - (loop_variables[1] * num_padded_attributes); - mutated_body = LetStmt::make("gpu.vertex_offset", - offset_expression, mutated_body); - } - - // Add a let statement for the for-loop name variable - Stmt loop_var = LetStmt::make(op->name, coord_expr, mutated_body); - - stmt = For::make(name, 0, (int)dim.size(), ForType::Serial, DeviceAPI::None, loop_var); - - } else { - IRFilter::visit(op); - } - } - - // The name of the previously allocated vertex buffer to store values - std::string vertex_buffer_name; - - // Expressions for the spatial values of each coordinate in the GPU scheduled - // loop dimensions. - typedef std::map> DimsType; - DimsType dims; - - // The channel of each varying attribute in the interleaved vertex buffer - std::map attribute_order; - - // The number of attributes padded up to the next multiple of four. This is - // the stride from one vertex to the next in the buffer - int num_padded_attributes; - - // Independent variable names in the linear expressions - std::vector for_loops; - - // Loop variables iterated across per GPU scheduled loop dimension to - // construct the vertex buffer - std::vector loop_variables; -}; - -// These two methods provide a workaround to maintain unused let statements in -// the IR tree util calls are added that used them in codegen. - -// TODO: We want to define a set of variables during lowering, and then use -// them during GLSL host codegen to pass values to the -// halide_dev_run function. It turns out that these variables will -// be simplified away since the call to the function does not appear -// in the IR. To avoid this we wrap the declaration in a -// return_second intrinsic as well as add a return_second intrinsic -// to consume the value. -// This prevents simplification passes that occur before codegen -// from removing the variables or substituting in their constant -// values. - -Expr dont_simplify(const Expr &v_) { - return Internal::Call::make(v_.type(), - Internal::Call::return_second, - {0, v_}, - Internal::Call::Intrinsic); -} - -Stmt used_in_codegen(Type type_, const std::string &v_) { - return Evaluate::make(Internal::Call::make(Int(32), - Internal::Call::return_second, - {Variable::make(type_, v_), 0}, - Internal::Call::Intrinsic)); -} - -// This mutator inserts a set of serial for-loops to create the vertex buffer -// on the host using CreateVertexBufferOnHost above. -class CreateVertexBufferHostLoops : public IRMutator { -public: - using IRMutator::visit; - - Stmt visit(const For *op) override { - if (CodeGen_GPU_Dev::is_gpu_var(op->name) && op->device_api == DeviceAPI::GLSL) { - - const For *loop1 = op; - const For *loop0 = loop1->body.as(); - - internal_assert(loop1->body.as()) << "Did not find pair of nested For loops"; - - // Construct a mesh of expressions to instantiate during runtime - std::map varyings; - - FindVaryingAttributeTags tag_finder(varyings); - op->accept(&tag_finder); - - // Establish and order for the attributes in each vertex - std::map attribute_order; - - // Add the attribute names to the mesh in the order that they appear in - // each vertex - attribute_order["__vertex_x"] = 0; - attribute_order["__vertex_y"] = 1; - - int idx = 2; - for (const std::pair &v : varyings) { - attribute_order[v.first] = idx++; - } - - // Construct a list of expressions giving to coordinate locations along - // each dimension, starting with the minimum and maximum coordinates - - attribute_order[loop0->name] = 0; - attribute_order[loop1->name] = 1; - - Expr loop0_max = Add::make(loop0->min, loop0->extent); - Expr loop1_max = Add::make(loop1->min, loop1->extent); - - std::vector> coords(2); - - coords[0].push_back(loop0->min); - coords[0].push_back(loop0_max); - - coords[1].push_back(loop1->min); - coords[1].push_back(loop1_max); - - // Count the two spatial x and y coordinates plus the number of - // varying attribute expressions found - int num_attributes = varyings.size() + 2; - - // Pad the number of attributes up to a multiple of four - int num_padded_attributes = (num_attributes + 0x3) & ~0x3; - int vertex_buffer_size = num_padded_attributes * coords[0].size() * coords[1].size(); - - // Filter out varying attribute expressions from the glsl scheduled - // loops. The expressions are filtered out in situ, among the - // variables in scope - CreateVertexBufferOnHost vs; - vs.vertex_buffer_name = "glsl.vertex_buffer"; - vs.num_padded_attributes = num_padded_attributes; - vs.dims[loop0->name] = coords[0]; - vs.dims[loop1->name] = coords[1]; - vs.attribute_order = attribute_order; - - Stmt vertex_setup = vs.mutate(loop1); - - // Remove varying attribute intrinsics from the vertex setup IR - // tree. These may occur if an expression such as a Let-value was - // filtered out without being mutated. - vertex_setup = remove_varying_attributes(vertex_setup); - - // Simplify the new host code. Workaround for #588 - vertex_setup = simplify(vertex_setup); - vertex_setup = simplify(vertex_setup); - vertex_setup = simplify(vertex_setup); - vertex_setup = simplify(vertex_setup); - - // Replace varying attribute intriniscs in the gpu scheduled loops - // with variables with ".varying" tagged names - Stmt loop_stmt = replace_varying_attributes(op); - - // Simplify - loop_stmt = simplify(loop_stmt, true); - - // It is possible that linear expressions we tagged in higher-level - // intrinsics were removed by simplification if they were only used in - // subsequent tagged linear expressions. Run a pass to check for - // these and remove them from the varying attribute list - prune_varying_attributes(loop_stmt, varyings); - - // At this point the varying attribute expressions have been removed from - // loop_stmt- it only contains variables tagged with .varying - - // The GPU will only interpolate floating point values so the varying - // attribute variables must be converted to floating point. If the - // original varying expression was integer, casts are inserts to - // snap the value back to the integer grid. - loop_stmt = CastVaryingVariables().mutate(loop_stmt); - - // clang-format off - // Insert two new for-loops for vertex buffer generation on the host - // before the two GPU scheduled for-loops - return LetStmt::make("glsl.num_coords_dim0", dont_simplify((int)(coords[0].size())), - LetStmt::make("glsl.num_coords_dim1", dont_simplify((int)(coords[1].size())), - LetStmt::make("glsl.num_padded_attributes", dont_simplify(num_padded_attributes), - Allocate::make(vs.vertex_buffer_name, Float(32), MemoryType::Auto, {vertex_buffer_size}, const_true(), - Block::make(vertex_setup, - Block::make(loop_stmt, - Block::make(used_in_codegen(Int(32), "glsl.num_coords_dim0"), - Block::make(used_in_codegen(Int(32), "glsl.num_coords_dim1"), - Block::make(used_in_codegen(Int(32), "glsl.num_padded_attributes"), - Free::make(vs.vertex_buffer_name)))))))))); - // clang-format on - } else { - return IRMutator::visit(op); - } - } -}; - -} // namespace - -Stmt setup_gpu_vertex_buffer(const Stmt &s) { - CreateVertexBufferHostLoops vb; - return vb.mutate(s); -} - -} // namespace Internal -} // namespace Halide diff --git a/src/VaryingAttributes.h b/src/VaryingAttributes.h deleted file mode 100644 index 55475471e1aa..000000000000 --- a/src/VaryingAttributes.h +++ /dev/null @@ -1,33 +0,0 @@ -#ifndef __HALIDE_VARYING_ATTRIBUTES__H -#define __HALIDE_VARYING_ATTRIBUTES__H - -/** \file - * This file contains functions that detect expressions in a GLSL scheduled - * function that may be evaluated per vertex and interpolated across the domain - * instead of being evaluated at each pixel location across the image. - */ - -#include "Expr.h" - -namespace Halide { -namespace Internal { - -/** find_linear_expressions(Stmt s) identifies expressions that may be moved - * out of the generated fragment shader into a varying attribute. These - * expressions are tagged by wrapping them in a glsl_varying intrinsic - */ -Stmt find_linear_expressions(const Stmt &s); - -/** Compute a set of 2D mesh coordinates based on the behavior of varying - * attribute expressions contained within a GLSL scheduled for loop. This - * method is called during lowering to extract varying attribute - * expressions and generate code to evalue them at each mesh vertex - * location. The operation is performed on the host before the draw call - * to invoke the shader - */ -Stmt setup_gpu_vertex_buffer(const Stmt &s); - -} // namespace Internal -} // namespace Halide - -#endif diff --git a/src/runtime/CMakeLists.txt b/src/runtime/CMakeLists.txt index 1385e04ae13a..25214f37e68f 100644 --- a/src/runtime/CMakeLists.txt +++ b/src/runtime/CMakeLists.txt @@ -47,7 +47,6 @@ set(RUNTIME_CPP msan msan_stubs opencl - opengl opengl_egl_context opengl_glx_context openglcompute @@ -132,7 +131,6 @@ set(RUNTIME_HEADER_FILES HalideRuntimeHexagonHost.h HalideRuntimeMetal.h HalideRuntimeOpenCL.h - HalideRuntimeOpenGL.h HalideRuntimeOpenGLCompute.h HalideRuntimeQurt.h ) diff --git a/src/runtime/HalideRuntime.h b/src/runtime/HalideRuntime.h index e4b7dc24fd54..3a88b91a0611 100644 --- a/src/runtime/HalideRuntime.h +++ b/src/runtime/HalideRuntime.h @@ -1283,7 +1283,6 @@ typedef enum halide_target_feature_t { halide_target_feature_cl_doubles, ///< Enable double support on OpenCL targets halide_target_feature_cl_atomic64, ///< Enable 64-bit atomics operations on OpenCL targets - halide_target_feature_opengl, ///< Enable the OpenGL runtime. NOTE: this feature is deprecated and will be removed in Halide 12. halide_target_feature_openglcompute, ///< Enable OpenGL Compute runtime. halide_target_feature_user_context, ///< Generated code takes a user_context pointer as first argument diff --git a/src/runtime/HalideRuntimeOpenGL.h b/src/runtime/HalideRuntimeOpenGL.h deleted file mode 100644 index 14bb8b57f945..000000000000 --- a/src/runtime/HalideRuntimeOpenGL.h +++ /dev/null @@ -1,105 +0,0 @@ -#ifndef HALIDE_HALIDERUNTIMEOPENGL_H -#define HALIDE_HALIDERUNTIMEOPENGL_H - -// Don't include HalideRuntime.h if the contents of it were already pasted into a generated header above this one -#ifndef HALIDE_HALIDERUNTIME_H - -#include "HalideRuntime.h" - -#endif - -#ifdef __cplusplus -extern "C" { -#endif - -/** \file - * Routines specific to the Halide OpenGL runtime. - */ - -#define HALIDE_RUNTIME_OPENGL - -HALIDE_ATTRIBUTE_DEPRECATED("OpenGL is deprecated in Halide 11 and will be removed in Halide 12") -extern const struct halide_device_interface_t *halide_opengl_device_interface(); - -/** These are forward declared here to allow clients to override the - * Halide Glsl runtime. Do not call them. */ -// @{ -HALIDE_ATTRIBUTE_DEPRECATED("OpenGL is deprecated in Halide 11 and will be removed in Halide 12") -extern int halide_opengl_initialize_kernels(void *user_context, void **state_ptr, - const char *src, int size); - -HALIDE_ATTRIBUTE_DEPRECATED("OpenGL is deprecated in Halide 11 and will be removed in Halide 12") -extern int halide_opengl_run(void *user_context, - void *state_ptr, - const char *entry_name, - int blocksX, int blocksY, int blocksZ, - int threadsX, int threadsY, int threadsZ, - int shared_mem_bytes, - size_t arg_sizes[], - void *args[], - int8_t is_buffer[], - int num_attributes, - float *vertex_buffer, - int num_coords_dim0, - int num_coords_dim1); -// @} - -/** Set the underlying OpenGL texture for a buffer. The texture must - * have an extent large enough to cover that specified by the - * halide_buffer_t extent fields. The dev field of the halide_buffer_t - * must be NULL when this routine is called. This call can fail due to - * being passed an invalid texture. The device and host dirty bits are - * left unmodified. */ -HALIDE_ATTRIBUTE_DEPRECATED("OpenGL is deprecated in Halide 11 and will be removed in Halide 12") -extern int halide_opengl_wrap_texture(void *user_context, struct halide_buffer_t *buf, uint64_t texture_id); - -/** Set the underlying OpenGL texture for a buffer to refer to the - * current render target (e.g., the frame buffer or an FBO). The - * render target must have an extent large enough to cover that - * specified by the halide_buffer_t extent fields. The dev field of - * the halide_buffer_t must be NULL when this routine is called. This - * call can fail due to running out of memory. The device and host - * dirty bits are left unmodified. */ -HALIDE_ATTRIBUTE_DEPRECATED("OpenGL is deprecated in Halide 11 and will be removed in Halide 12") -extern int halide_opengl_wrap_render_target(void *user_context, struct halide_buffer_t *buf); - -/** Disconnect this halide_buffer_t from the texture it was previously - * wrapped around. Should only be called for a halide_buffer_t that - * halide_opengl_wrap_texture was previously called on. Frees any - * storage associated with the binding of the halide_buffer_t and the - * device pointer, but does not free the texture. The dev field of - * the halide_buffer_t will be NULL on return. - */ -HALIDE_ATTRIBUTE_DEPRECATED("OpenGL is deprecated in Halide 11 and will be removed in Halide 12") -extern int halide_opengl_detach_texture(void *user_context, struct halide_buffer_t *buf); - -/** Return the underlying texture for a halide_buffer_t. This buffer - * must be valid on an OpenGL device, or not have any associated - * device memory. If there is no device memory (dev field is NULL), - * or if the buffer was wrapped via - * halide_opengl_wrap_render_target(), this returns 0. - */ -HALIDE_ATTRIBUTE_DEPRECATED("OpenGL is deprecated in Halide 11 and will be removed in Halide 12") -extern uintptr_t halide_opengl_get_texture(void *user_context, struct halide_buffer_t *buf); - -/** Forget all state associated with the previous OpenGL context. This is - * similar to halide_opengl_release, except that we assume that all OpenGL - * resources have already been reclaimed by the OS. */ -HALIDE_ATTRIBUTE_DEPRECATED("OpenGL is deprecated in Halide 11 and will be removed in Halide 12") -extern void halide_opengl_context_lost(void *user_context); - -/** This functions MUST be provided by the host environment to retrieve pointers - * to OpenGL API functions. */ -HALIDE_ATTRIBUTE_DEPRECATED("OpenGL is deprecated in Halide 11 and will be removed in Halide 12") -void *halide_opengl_get_proc_address(void *user_context, const char *name); - -/** This functions MUST be provided by the host environment to create an OpenGL - * context for use by the OpenGL backend. */ -HALIDE_ATTRIBUTE_DEPRECATED("OpenGL is deprecated in Halide 11 and will be removed in Halide 12") -int halide_opengl_create_context(void *user_context); - -#ifdef __cplusplus -} // End extern "C" -#endif - -#endif // HALIDE_HALIDERUNTIMEOPENGL_H diff --git a/src/runtime/opengl.cpp b/src/runtime/opengl.cpp deleted file mode 100644 index 73964bfb64ee..000000000000 --- a/src/runtime/opengl.cpp +++ /dev/null @@ -1,2101 +0,0 @@ -// Ignore deprecation warnings inside our own runtime -#define HALIDE_ALLOW_DEPRECATED 1 - -#include "HalideRuntimeOpenGL.h" -#include "device_interface.h" -#include "mini_opengl.h" -#include "printer.h" - -// This constant is used to indicate that the application will take -// responsibility for binding the output render target before calling the -// Halide function. -#define HALIDE_OPENGL_RENDER_TARGET ((uint64_t)-1) - -// Implementation note: all function that directly or indirectly access the -// runtime state in halide_opengl_state must be declared as WEAK, otherwise -// the behavior at runtime is undefined. - -// List of all OpenGL functions used by the runtime. The list is used to -// declare and initialize the dispatch table in OpenGLState below. -#define USED_GL_FUNCTIONS \ - GLFUNC(PFNGLDELETETEXTURESPROC, DeleteTextures); \ - GLFUNC(PFNGLGENTEXTURESPROC, GenTextures); \ - GLFUNC(PFNGLBINDTEXTUREPROC, BindTexture); \ - GLFUNC(PFNGLGETERRORPROC, GetError); \ - GLFUNC(PFNGLVIEWPORTPROC, Viewport); \ - GLFUNC(PFNGLGENBUFFERSPROC, GenBuffers); \ - GLFUNC(PFNGLDELETEBUFFERSPROC, DeleteBuffers); \ - GLFUNC(PFNGLBINDBUFFERPROC, BindBuffer); \ - GLFUNC(PFNGLBUFFERDATAPROC, BufferData); \ - GLFUNC(PFNGLTEXPARAMETERIPROC, TexParameteri); \ - GLFUNC(PFNGLTEXIMAGE2DPROC, TexImage2D); \ - GLFUNC(PFNGLTEXSUBIMAGE2DPROC, TexSubImage2D); \ - GLFUNC(PFNGLDISABLEPROC, Disable); \ - GLFUNC(PFNGLDISABLEPROC, Enable); \ - GLFUNC(PFNGLCREATESHADERPROC, CreateShader); \ - GLFUNC(PFNGLACTIVETEXTUREPROC, ActiveTexture); \ - GLFUNC(PFNGLSHADERSOURCEPROC, ShaderSource); \ - GLFUNC(PFNGLCOMPILESHADERPROC, CompileShader); \ - GLFUNC(PFNGLGETSHADERIVPROC, GetShaderiv); \ - GLFUNC(PFNGLGETSHADERINFOLOGPROC, GetShaderInfoLog); \ - GLFUNC(PFNGLDELETESHADERPROC, DeleteShader); \ - GLFUNC(PFNGLCREATEPROGRAMPROC, CreateProgram); \ - GLFUNC(PFNGLATTACHSHADERPROC, AttachShader); \ - GLFUNC(PFNGLLINKPROGRAMPROC, LinkProgram); \ - GLFUNC(PFNGLGETPROGRAMIVPROC, GetProgramiv); \ - GLFUNC(PFNGLGETPROGRAMINFOLOGPROC, GetProgramInfoLog); \ - GLFUNC(PFNGLUSEPROGRAMPROC, UseProgram); \ - GLFUNC(PFNGLDELETEPROGRAMPROC, DeleteProgram); \ - GLFUNC(PFNGLGETUNIFORMLOCATIONPROC, GetUniformLocation); \ - GLFUNC(PFNGLUNIFORM1IVPROC, Uniform1iv); \ - GLFUNC(PFNGLUNIFORM2IVPROC, Uniform2iv); \ - GLFUNC(PFNGLUNIFORM2IVPROC, Uniform4iv); \ - GLFUNC(PFNGLUNIFORM1FVPROC, Uniform1fv); \ - GLFUNC(PFNGLUNIFORM1FVPROC, Uniform4fv); \ - GLFUNC(PFNGLGENFRAMEBUFFERSPROC, GenFramebuffers); \ - GLFUNC(PFNGLDELETEFRAMEBUFFERSPROC, DeleteFramebuffers); \ - GLFUNC(PFNGLCHECKFRAMEBUFFERSTATUSPROC, CheckFramebufferStatus); \ - GLFUNC(PFNGLBINDFRAMEBUFFERPROC, BindFramebuffer); \ - GLFUNC(PFNGLFRAMEBUFFERTEXTURE2DPROC, FramebufferTexture2D); \ - GLFUNC(PFNGLGETATTRIBLOCATIONPROC, GetAttribLocation); \ - GLFUNC(PFNGLVERTEXATTRIBPOINTERPROC, VertexAttribPointer); \ - GLFUNC(PFNGLDRAWELEMENTSPROC, DrawElements); \ - GLFUNC(PFNGLENABLEVERTEXATTRIBARRAYPROC, EnableVertexAttribArray); \ - GLFUNC(PFNGLDISABLEVERTEXATTRIBARRAYPROC, DisableVertexAttribArray); \ - GLFUNC(PFNGLGETVERTEXATTRIBIVPROC, GetVertexAttribiv); \ - GLFUNC(PFNGLPIXELSTOREIPROC, PixelStorei); \ - GLFUNC(PFNGLREADPIXELS, ReadPixels); \ - GLFUNC(PFNGLGETSTRINGPROC, GetString); \ - GLFUNC(PFNGLGETINTEGERV, GetIntegerv); \ - GLFUNC(PFNGLGETBOOLEANV, GetBooleanv); \ - GLFUNC(PFNGLFINISHPROC, Finish); - -// List of all OpenGL functions used by the runtime, which may not -// exist due to an older or less capable version of GL. In using any -// of these functions, code must test if they are nullptr. -#define OPTIONAL_GL_FUNCTIONS \ - GLFUNC(PFNGLGENVERTEXARRAYS, GenVertexArrays); \ - GLFUNC(PFNGLBINDVERTEXARRAY, BindVertexArray); \ - GLFUNC(PFNGLDELETEVERTEXARRAYS, DeleteVertexArrays); \ - GLFUNC(PFNDRAWBUFFERS, DrawBuffers) - -// ---------- Types ---------- - -using namespace Halide::Runtime::Internal; - -namespace Halide { -namespace Runtime { -namespace Internal { -namespace OpenGL { - -extern WEAK halide_device_interface_t opengl_device_interface; - -WEAK const char *gl_error_name(int32_t err) { - const char *result; - switch (err) { - case 0x500: - result = "GL_INVALID_ENUM"; - break; - case 0x501: - result = "GL_INVALID_VALUE"; - break; - case 0x502: - result = "GL_INVALID_OPERATION"; - break; - case 0x503: - result = "GL_STACK_OVERFLOW"; - break; - case 0x504: - result = "GL_STACK_UNDERFLOW"; - break; - case 0x505: - result = "GL_OUT_OF_MEMORY"; - break; - case 0x506: - result = "GL_INVALID_FRAMEBUFFER_OPERATION"; - break; - case 0x507: - result = "GL_CONTEXT_LOST"; - break; - case 0x8031: - result = "GL_TABLE_TOO_LARGE"; - break; - default: - result = ""; - break; - } - return result; -} - -struct HalideMalloc { - ALWAYS_INLINE HalideMalloc(void *user_context, size_t size) - : user_context(user_context), ptr(halide_malloc(user_context, size)) { - } - ALWAYS_INLINE ~HalideMalloc() { - halide_free(user_context, ptr); - } - void *const user_context; - void *const ptr; -}; - -enum OpenGLProfile { - OpenGL, - OpenGLES -}; - -struct Argument { - // The kind of data stored in an argument - enum Kind { - Invalid, - Uniform, // uniform variable - Varying, // varying attribute - Inbuf, // input texture - Outbuf // output texture - }; - - // The elementary data type of the argument - enum Type { - Void, - Bool, - Float, - Int8, - Int16, - Int32, - UInt8, - UInt16, - UInt32 - }; - - char *name; - Kind kind; - Type type; - Argument *next; -}; - -struct KernelInfo { - char *name; - char *source; - Argument *arguments; - GLuint shader_id; - GLuint program_id; -}; - -struct ModuleState { - KernelInfo *kernel; - ModuleState *next; -}; - -// All persistent state maintained by the runtime. -struct GlobalState { - void init(); - bool CheckAndReportError(void *user_context, const char *location); - - bool initialized; - - // Information about the OpenGL platform we're running on. - OpenGLProfile profile; - int major_version, minor_version; - bool have_vertex_array_objects; - bool have_texture_rg; - bool have_texture_float; - bool have_texture_rgb8_rgba8; - - // Various objects shared by all filter kernels - GLuint framebuffer_id; - GLuint vertex_array_object; - GLuint vertex_buffer; - GLuint element_buffer; - - // Declare pointers used OpenGL functions -#define GLFUNC(PTYPE, VAR) PTYPE VAR - USED_GL_FUNCTIONS; - OPTIONAL_GL_FUNCTIONS; -#undef GLFUNC -}; - -WEAK bool GlobalState::CheckAndReportError(void *user_context, const char *location) { - GLenum err = GetError(); - if (err != GL_NO_ERROR) { - error(user_context) << "OpenGL error " << gl_error_name(err) << "(" << (int)err << ")" - << " at " << location << ".\n"; - return true; - } - return false; -} - -WEAK GlobalState global_state; - -// Saves & restores OpenGL state -class GLStateSaver { -public: - ALWAYS_INLINE GLStateSaver() { - save(); - } - ALWAYS_INLINE ~GLStateSaver() { - restore(); - } - -private: - // The state variables - GLint active_texture; - GLint array_buffer_binding; - GLint element_array_buffer_binding; - GLint framebuffer_binding; - GLint program; - GLint vertex_array_binding; - GLint viewport[4]; - GLboolean cull_face; - GLboolean depth_test; - int max_combined_texture_image_units; - GLint *texture_2d_binding; - int max_vertex_attribs; - GLint *vertex_attrib_array_enabled; - - // Define these out-of-line as WEAK, to avoid LLVM error "MachO doesn't support COMDATs" - void save(); - void restore(); -}; - -WEAK void GLStateSaver::save() { - global_state.GetIntegerv(GL_ACTIVE_TEXTURE, &active_texture); - global_state.GetIntegerv(GL_ARRAY_BUFFER_BINDING, &array_buffer_binding); - global_state.GetIntegerv(GL_ELEMENT_ARRAY_BUFFER_BINDING, &element_array_buffer_binding); - global_state.GetIntegerv(GL_FRAMEBUFFER_BINDING, &framebuffer_binding); - global_state.GetIntegerv(GL_CURRENT_PROGRAM, &program); - global_state.GetBooleanv(GL_CULL_FACE, &cull_face); - global_state.GetBooleanv(GL_DEPTH_TEST, &depth_test); - global_state.GetIntegerv(GL_VIEWPORT, viewport); - - global_state.GetIntegerv(GL_MAX_COMBINED_TEXTURE_IMAGE_UNITS, &max_combined_texture_image_units); - texture_2d_binding = (GLint *)malloc(max_combined_texture_image_units * sizeof(GLint)); - for (int i = 0; i < max_combined_texture_image_units; i++) { - global_state.ActiveTexture(GL_TEXTURE0 + i); - global_state.GetIntegerv(GL_TEXTURE_BINDING_2D, &texture_2d_binding[i]); - } - - global_state.GetIntegerv(GL_MAX_VERTEX_ATTRIBS, &max_vertex_attribs); - vertex_attrib_array_enabled = (GLint *)malloc(max_vertex_attribs * sizeof(GLint)); - for (int i = 0; i < max_vertex_attribs; i++) { - global_state.GetVertexAttribiv(i, GL_VERTEX_ATTRIB_ARRAY_ENABLED, &vertex_attrib_array_enabled[i]); - } - - if (global_state.have_vertex_array_objects) { - global_state.GetIntegerv(GL_VERTEX_ARRAY_BINDING, &vertex_array_binding); - } - -#ifdef DEBUG_RUNTIME - debug(nullptr) << "Saved OpenGL state\n"; -#endif -} - -WEAK void GLStateSaver::restore() { -#ifdef DEBUG_RUNTIME - debug(nullptr) << "Restoring OpenGL state\n"; -#endif - - for (int i = 0; i < max_combined_texture_image_units; i++) { - global_state.ActiveTexture(GL_TEXTURE0 + i); - global_state.BindTexture(GL_TEXTURE_2D, texture_2d_binding[i]); - } - free(texture_2d_binding); - - if (global_state.have_vertex_array_objects) { - global_state.BindVertexArray(vertex_array_binding); - } - - for (int i = 0; i < max_vertex_attribs; i++) { - if (vertex_attrib_array_enabled[i]) { - global_state.EnableVertexAttribArray(i); - } else { - global_state.DisableVertexAttribArray(i); - } - } - free(vertex_attrib_array_enabled); - - global_state.ActiveTexture(active_texture); - global_state.BindFramebuffer(GL_FRAMEBUFFER, framebuffer_binding); - global_state.BindBuffer(GL_ARRAY_BUFFER, array_buffer_binding); - global_state.BindBuffer(GL_ELEMENT_ARRAY_BUFFER, element_array_buffer_binding); - global_state.UseProgram(program); - global_state.Viewport(viewport[0], viewport[1], viewport[2], viewport[3]); - (cull_face ? global_state.Enable : global_state.Disable)(GL_CULL_FACE); - (depth_test ? global_state.Enable : global_state.Disable)(GL_DEPTH_TEST); -} - -// A list of module-specific state. Each module corresponds to a single Halide filter -WEAK ModuleState *state_list; - -WEAK const char *kernel_marker = "/// KERNEL "; -WEAK const char *input_marker = "/// IN_BUFFER "; -WEAK const char *output_marker = "/// OUT_BUFFER "; -WEAK const char *uniform_marker = "/// UNIFORM "; -WEAK const char *varying_marker = "/// VARYING "; - -// ---------- Helper functions ---------- - -WEAK char *strndup(const char *s, size_t n) { - char *p = (char *)malloc(n + 1); - memcpy(p, s, n); - p[n] = '\0'; - return p; -} - -// Strip whitespace from the right side of -// a string -WEAK char *strstrip(char *str, size_t n) { - char *pos = str; - while (pos != str + n && *pos != '\0' && *pos != '\n' && *pos != ' ') { - pos++; - } - *pos = '\0'; - return str; -} - -WEAK void debug_buffer(void *user_context, halide_buffer_t *buf) { - debug(user_context) << *buf << "\n"; -} - -WEAK GLuint make_shader(void *user_context, GLenum type, - const char *source, GLint *length) { -#ifdef DEBUG_RUNTIME - { - debug(user_context) << ((type == GL_VERTEX_SHADER) ? "GL_VERTEX_SHADER" : "GL_FRAGMENT_SHADER") - << " SOURCE:\n"; - // debug() will go thru Printer<> which has a fixed, non-growing size. - // Just pass the source directly to halide_print instead, so it won't get clipped. - halide_print(user_context, source); - } -#endif - - GLuint shader = global_state.CreateShader(type); - if (global_state.CheckAndReportError(user_context, "make_shader(1)")) { - return 1; - } - if (*source == '\0') { - debug(user_context) << "Halide GLSL: passed shader source is empty, using default.\n"; - const char *default_shader = "varying vec2 pixcoord;\n void main() { }"; - global_state.ShaderSource(shader, 1, (const GLchar **)&default_shader, nullptr); - } else { - global_state.ShaderSource(shader, 1, (const GLchar **)&source, length); - } - if (global_state.CheckAndReportError(user_context, "make_shader(2)")) { - return 1; - } - global_state.CompileShader(shader); - if (global_state.CheckAndReportError(user_context, "make_shader(3)")) { - return 1; - } - - GLint shader_ok = 0; - global_state.GetShaderiv(shader, GL_COMPILE_STATUS, &shader_ok); - if (!shader_ok) { - print(user_context) << "Could not compile shader:\n"; - GLint log_len; - global_state.GetShaderiv(shader, GL_INFO_LOG_LENGTH, &log_len); - HalideMalloc log_tmp(user_context, log_len); - if (log_tmp.ptr) { - char *log = (char *)log_tmp.ptr; - global_state.GetShaderInfoLog(shader, log_len, nullptr, log); - print(user_context) << log << "\n"; - } - global_state.DeleteShader(shader); - return 0; - } - return shader; -} - -// Check whether string starts with a given prefix. -// Returns pointer to character after matched prefix if successful or nullptr. -WEAK const char *match_prefix(const char *s, const char *prefix) { - if (0 == strncmp(s, prefix, strlen(prefix))) { - return s + strlen(prefix); - } - return nullptr; -} - -// Parse declaration of the form "type name" and construct matching Argument. -WEAK Argument *parse_argument(void *user_context, const char *src, - const char *end) { - const char *name; - Argument::Type type = Argument::Void; - if ((name = match_prefix(src, "float "))) { - type = Argument::Float; - } else if ((name = match_prefix(src, "bool "))) { - type = Argument::Bool; - } else if ((name = match_prefix(src, "int8_t "))) { - type = Argument::Int8; - } else if ((name = match_prefix(src, "int16_t "))) { - type = Argument::Int16; - } else if ((name = match_prefix(src, "int32_t "))) { - type = Argument::Int32; - } else if ((name = match_prefix(src, "uint8_t "))) { - type = Argument::UInt8; - } else if ((name = match_prefix(src, "uint16_t "))) { - type = Argument::UInt16; - } else if ((name = match_prefix(src, "uint32_t "))) { - type = Argument::UInt32; - } - if (type == Argument::Void) { - error(user_context) << "Internal error: argument type not supported"; - return nullptr; - } - - Argument *arg = (Argument *)malloc(sizeof(Argument)); - arg->name = strndup(name, end - name); - arg->type = type; - arg->kind = Argument::Invalid; - arg->next = nullptr; - return arg; -} - -// Create KernelInfo for a piece of GLSL code -WEAK KernelInfo *create_kernel(void *user_context, const char *src, int size) { - KernelInfo *kernel = (KernelInfo *)malloc(sizeof(KernelInfo)); - - kernel->source = strndup(src, size); - kernel->arguments = nullptr; - kernel->program_id = 0; - - debug(user_context) << "Compiling GLSL kernel (size = " << size << "):\n"; - - // Parse initial comment block - const char *line = kernel->source; - while (*line) { - const char *next_line = strchr(line, '\n') + 1; - if (!next_line) { - next_line = line + size; - } - - const char *args; - if ((args = match_prefix(line, kernel_marker))) { - // set name - kernel->name = strstrip(strndup(args, next_line - args), next_line - args); - } else if ((args = match_prefix(line, uniform_marker))) { - if (Argument *arg = - parse_argument(user_context, args, next_line - 1)) { - arg->kind = Argument::Uniform; - arg->next = kernel->arguments; - kernel->arguments = arg; - } else { - halide_error(user_context, "Invalid VAR marker"); - goto error; - } - } else if ((args = match_prefix(line, varying_marker))) { - if (Argument *arg = - parse_argument(user_context, args, next_line - 1)) { - arg->kind = Argument::Varying; - arg->next = kernel->arguments; - kernel->arguments = arg; - } else { - halide_error(user_context, "Invalid VARYING marker"); - goto error; - } - } else if ((args = match_prefix(line, input_marker))) { - if (Argument *arg = parse_argument(user_context, args, next_line - 1)) { - arg->kind = Argument::Inbuf; - arg->next = kernel->arguments; - kernel->arguments = arg; - } else { - error(user_context) << "Invalid IN_BUFFER marker"; - goto error; - } - } else if ((args = match_prefix(line, output_marker))) { - if (Argument *arg = parse_argument(user_context, args, next_line - 1)) { - arg->kind = Argument::Outbuf; - arg->next = kernel->arguments; - kernel->arguments = arg; - } else { - error(user_context) << "Invalid OUT_BUFFER marker"; - goto error; - } - } else { - // Stop parsing if we encounter something we don't recognize - break; - } - line = next_line; - } - - // Arguments are currently in reverse order, flip the list. - { - Argument *cur = kernel->arguments; - kernel->arguments = nullptr; - while (cur) { - Argument *next = cur->next; - cur->next = kernel->arguments; - kernel->arguments = cur; - cur = next; - } - } - - return kernel; -error: - free(kernel); - return nullptr; -} - -// Delete all data associated with a kernel. Also release associated OpenGL -// shader and program. -WEAK void delete_kernel(void *user_context, KernelInfo *kernel) { - global_state.DeleteProgram(kernel->program_id); -#if 0 // TODO figure out why this got deleted. - global_state.DeleteShader(kernel->shader_id); -#endif - - Argument *arg = kernel->arguments; - while (arg) { - Argument *next = arg->next; - free(arg->name); - free(arg); - arg = next; - } - free(kernel->source); - free(kernel->name); - free(kernel); -} - -// Vertices and their order in a triangle strip for rendering a quad -// ranging from (-1,-1) to (1,1). -WEAK GLfloat quad_vertices[] = { - -1.0f, -1.0f, 1.0f, -1.0f, - -1.0f, 1.0f, 1.0f, 1.0f}; -WEAK GLuint quad_indices[] = {0, 1, 2, 3}; - -WEAK void GlobalState::init() { - initialized = false; - profile = OpenGL; - major_version = 2; - minor_version = 0; - framebuffer_id = 0; - vertex_array_object = vertex_buffer = element_buffer = 0; - have_vertex_array_objects = false; - have_texture_rg = false; - have_texture_rgb8_rgba8 = false; - // Initialize all GL function pointers to nullptr -#define GLFUNC(type, name) name = nullptr; - USED_GL_FUNCTIONS; - OPTIONAL_GL_FUNCTIONS; -#undef GLFUNC -} - -WEAK int load_gl_func(void *user_context, const char *name, void **ptr, bool required) { - void *p = halide_opengl_get_proc_address(user_context, name); - if (!p && required) { - error(user_context) << "Could not load function pointer for " << name; - return -1; - } - *ptr = p; - return 0; -} - -WEAK bool extension_supported(void *user_context, const char *name) { - // Iterate over space delimited extension strings. Note that glGetStringi - // is not part of GL ES 2.0, and not reliable in all implementations of - // GL ES 3.0. - const char *start = (const char *)global_state.GetString(GL_EXTENSIONS); - if (!start) { - return false; - } - while (const char *pos = strstr(start, name)) { - const char *end = pos + strlen(name); - // Ensure the found match is a full word, not a substring. - if ((pos == start || pos[-1] == ' ') && - (*end == ' ' || *end == '\0')) { - return true; - } - start = end; - } - - return false; -} - -// Check for availability of various version- and extension-specific features -// and hook up functions pointers as necessary -WEAK void init_extensions(void *user_context) { - if (global_state.major_version >= 3) { // This is likely valid for both OpenGL and OpenGL ES - load_gl_func(user_context, "glGenVertexArrays", (void **)&global_state.GenVertexArrays, false); - load_gl_func(user_context, "glBindVertexArray", (void **)&global_state.BindVertexArray, false); - load_gl_func(user_context, "glDeleteVertexArrays", (void **)&global_state.DeleteVertexArrays, false); - if (global_state.GenVertexArrays && global_state.BindVertexArray && global_state.DeleteVertexArrays) { - global_state.have_vertex_array_objects = true; - } - } - load_gl_func(user_context, "glDrawBuffers", (void **)&global_state.DrawBuffers, false); - - global_state.have_texture_rg = - global_state.major_version >= 3 || - (global_state.profile == OpenGL && - extension_supported(user_context, "GL_ARB_texture_rg")) || - (global_state.profile == OpenGLES && - extension_supported(user_context, "GL_EXT_texture_rg")); - - global_state.have_texture_rgb8_rgba8 = - global_state.major_version >= 3 || - (global_state.profile == OpenGLES && - extension_supported(user_context, "GL_OES_rgb8_rgba8")); - - global_state.have_texture_float = - (global_state.major_version >= 3) || - (global_state.profile == OpenGL && - extension_supported(user_context, "GL_ARB_texture_float")) || - (global_state.profile == OpenGLES && - extension_supported(user_context, "GL_OES_texture_float")); -} - -WEAK const char *parse_int(const char *str, int *val) { - int v = 0; - size_t i = 0; - while (str[i] >= '0' && str[i] <= '9') { - v = 10 * v + (str[i] - '0'); - i++; - } - if (i > 0) { - *val = v; - return &str[i]; - } - return nullptr; -} - -WEAK const char *parse_opengl_version(const char *str, int *major, int *minor) { - str = parse_int(str, major); - if (str == nullptr || *str != '.') { - return nullptr; - } - return parse_int(str + 1, minor); -} - -// Initialize the OpenGL-specific parts of the runtime. -WEAK int halide_opengl_init(void *user_context) { - if (global_state.initialized) { - return 0; - } - -#ifdef DEBUG_RUNTIME - halide_start_clock(user_context); -#endif - - global_state.init(); - - // Make a context if there isn't one - if (halide_opengl_create_context(user_context)) { - error(user_context) << "Failed to make OpenGL context"; - return -1; - } - - // Initialize pointers to core OpenGL functions. -#define GLFUNC(TYPE, VAR) \ - if (load_gl_func(user_context, "gl" #VAR, (void **)&global_state.VAR, true) < 0) { \ - return -1; \ - } - USED_GL_FUNCTIONS; -#undef GLFUNC - - const char *version = (const char *)global_state.GetString(GL_VERSION); - const char *gles_version = match_prefix(version, "OpenGL ES "); - int major, minor; - if (gles_version && parse_opengl_version(gles_version, &major, &minor)) { - global_state.profile = OpenGLES; - global_state.major_version = major; - global_state.minor_version = minor; - } else if (parse_opengl_version(version, &major, &minor)) { - global_state.profile = OpenGL; - global_state.major_version = major; - global_state.minor_version = minor; - } else { - global_state.profile = OpenGL; - global_state.major_version = 2; - global_state.minor_version = 0; - } - init_extensions(user_context); - debug(user_context) - << "Halide running on OpenGL " << ((global_state.profile == OpenGL) ? "" : "ES ") << major << "." << minor << "\n" - << " vertex_array_objects: " << (global_state.have_vertex_array_objects ? "yes\n" : "no\n") - << " texture_rg: " << (global_state.have_texture_rg ? "yes\n" : "no\n") - << " have_texture_rgb8_rgba8: " << (global_state.have_texture_rgb8_rgba8 ? "yes\n" : "no\n") - << " texture_float: " << (global_state.have_texture_float ? "yes\n" : "no\n"); - - // Initialize framebuffer. - global_state.GenFramebuffers(1, &global_state.framebuffer_id); - if (global_state.CheckAndReportError(user_context, "halide_opengl_init GenFramebuffers")) { - return 1; - } - - // Initialize vertex and element buffers. - GLuint buf[2]; - global_state.GenBuffers(2, buf); - global_state.BindBuffer(GL_ARRAY_BUFFER, buf[0]); - global_state.BufferData(GL_ARRAY_BUFFER, sizeof(quad_vertices), quad_vertices, GL_STATIC_DRAW); - global_state.BindBuffer(GL_ARRAY_BUFFER, 0); - global_state.BindBuffer(GL_ELEMENT_ARRAY_BUFFER, buf[1]); - global_state.BufferData(GL_ELEMENT_ARRAY_BUFFER, sizeof(quad_indices), quad_indices, GL_STATIC_DRAW); - global_state.BindBuffer(GL_ELEMENT_ARRAY_BUFFER, 0); - global_state.vertex_buffer = buf[0]; - global_state.element_buffer = buf[1]; - - if (global_state.have_vertex_array_objects) { - global_state.GenVertexArrays(1, &global_state.vertex_array_object); - if (global_state.CheckAndReportError(user_context, "halide_opengl_init GenVertexArrays")) { - return 1; - } - } - - global_state.initialized = true; - return 0; -} - -// Release all data allocated by the runtime. -// -// The OpenGL context itself is generally managed by the host application, so -// we leave it untouched. -WEAK int halide_opengl_device_release(void *user_context) { - if (!global_state.initialized) { - return 0; - } - - debug(user_context) << "halide_opengl_release\n"; - global_state.DeleteFramebuffers(1, &global_state.framebuffer_id); - - ModuleState *mod = state_list; - while (mod) { - delete_kernel(user_context, mod->kernel); - mod->kernel = nullptr; - ModuleState *next = mod->next; - // do not call free(mod) to avoid dangling pointers: the module state - // is still referenced in the code generated by Halide (see - // CodeGen_GPU_Host::get_module_state). - mod = next; - } - - global_state.DeleteBuffers(1, &global_state.vertex_buffer); - global_state.DeleteBuffers(1, &global_state.element_buffer); - if (global_state.have_vertex_array_objects) { - global_state.DeleteVertexArrays(1, &global_state.vertex_array_object); - } - - global_state = GlobalState(); - - return 0; -} - -// Determine OpenGL texture format and channel type for a given halide_buffer_t. -WEAK bool get_texture_format(void *user_context, halide_buffer_t *buf, - GLint *internal_format, GLint *format, GLint *type) { - if (buf->type == halide_type_of()) { - *type = GL_UNSIGNED_BYTE; - } else if (buf->type == halide_type_of()) { - *type = GL_UNSIGNED_SHORT; - } else if (buf->type == halide_type_of()) { - *type = GL_FLOAT; - } else { - error(user_context) << "OpenGL: Only uint8, uint16, and float textures are supported."; - return false; - } - - const int channels = (buf->dimensions > 2) ? buf->dim[2].extent : 0; - - // GL_LUMINANCE and GL_LUMINANCE_ALPHA aren't color-renderable in ES2, period, - // thus can't be read back via ReadPixels, thus are nearly useless to us. - // GL_RED and GL_RG are technically optional in ES2 (required in ES3), - // but as a practical matter, they are supported on pretty much every recent device - // (iOS: everything >= iPhone 4s; Android: everything >= 4.3 plus various older devices). - // This is definitely suboptimal; the only real alternative would be to implement - // these as GL_RGB or GL_RGBA, ignoring the extra channels. - if (channels <= 2 && !global_state.have_texture_rg) { - error(user_context) << "OpenGL: 1 and 2 channel textures are not supported for this version of OpenGL."; - return false; - } - - // Common formats supported by both GLES 2.0 and GL 2.1 are selected below - // - switch (channels) { - case 0: - case 1: - *format = GL_RED; - break; - case 2: - *format = GL_RG; - break; - case 3: - *format = GL_RGB; - break; - case 4: - *format = GL_RGBA; - break; - default: - error(user_context) << "OpenGL: Invalid number of color channels: " << channels; - return false; - } - - switch (global_state.profile) { - case OpenGLES: - // For OpenGL ES, the texture format has to match the pixel format - // since there no conversion is performed during texture transfers. - // See OES_texture_float. - *internal_format = *format; - break; - case OpenGL: - // For desktop OpenGL, the internal format specifiers include the - // precise data type, see ARB_texture_float. - if (*type == GL_FLOAT) { - switch (*format) { - case GL_RED: - case GL_RG: - case GL_RGB: - case GL_RGBA: - *internal_format = GL_RGBA32F; - break; - default: - error(user_context) << "OpenGL: Cannot select internal format for format " << *format; - return false; - } - } else { - *internal_format = *format; - } - break; - } - - return true; -} - -// This function returns the width, height and number of color channels that the -// texture for the specified halide_buffer_t will contain. It provides a single place -// to implement the logic snapping zero sized dimensions to one element. -WEAK bool get_texture_dimensions(void *user_context, halide_buffer_t *buf, GLint *width, - GLint *height, GLint *channels) { - if (buf->dimensions > 3) { - error(user_context) << "The GL backend supports buffers of at most 3 dimensions\n"; - return false; - } - - *width = buf->dim[0].extent; - if (*width == 0) { - error(user_context) << "Invalid dim[0].extent: " << *width << "\n"; - return false; - } - - // GLES 2.0 supports GL_TEXTURE_2D (plus cube map), but not 1d or 3d. If we - // end up with a buffer that has a zero extent, set the corresponding size - // to one. - *height = (buf->dimensions > 1) ? buf->dim[1].extent : 1; - *channels = (buf->dimensions > 2) ? buf->dim[2].extent : 1; - - return true; -} - -// Allocate a new texture matching the dimension and color format of the -// specified buffer. -WEAK int halide_opengl_device_malloc(void *user_context, halide_buffer_t *buf) { - if (int error = halide_opengl_init(user_context)) { - return error; - } - - if (!buf) { - error(user_context) << "Invalid buffer"; - return 1; - } - - // If the texture was already created by the host application, check that - // it has the correct format. Otherwise, allocate and set up an - // appropriate texture. - GLuint tex = 0; - bool halide_allocated = false; - - if (buf->device) { -#ifdef HAVE_GLES3 - // Look up the width and the height from the existing texture. Note that - // glGetTexLevelParameteriv does not support GL_TEXTURE_WIDTH or - // GL_TEXTURE_HEIGHT in GLES 2.0 - GLint width, height; - global_state.BindTexture(GL_TEXTURE_2D, tex); - global_state.GetTexLevelParameteriv(GL_TEXTURE_2D, 0, GL_TEXTURE_WIDTH, &width); - global_state.GetTexLevelParameteriv(GL_TEXTURE_2D, 0, GL_TEXTURE_HEIGHT, &height); - if (global_state.CheckAndReportError(user_context, "halide_opengl_device_malloc binding texture (GLES3)")) { - return 1; - } - if (width < buf->dim[0].extent || height < buf->dim[1].extent) { - error(user_context) - << "Existing texture is smaller than buffer. " - << "Texture size: " << width << "x" << height - << ", buffer size: " << buf->dim[0].extent << "x" << buf->dim[1].extent; - return 1; - } -#endif - uint64_t handle = buf->device; - tex = (handle == HALIDE_OPENGL_RENDER_TARGET) ? 0 : (GLuint)handle; - } else { - if (buf->dimensions > 3) { - error(user_context) << "high-dimensional textures are not supported"; - return 1; - } - - // Generate texture ID - global_state.GenTextures(1, &tex); - if (global_state.CheckAndReportError(user_context, "halide_opengl_device_malloc GenTextures")) { - global_state.DeleteTextures(1, &tex); - return 1; - } - - // Set parameters for this texture: no interpolation and clamp to edges. - global_state.BindTexture(GL_TEXTURE_2D, tex); - global_state.TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); - global_state.TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); - global_state.TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); - global_state.TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); - if (global_state.CheckAndReportError(user_context, "halide_opengl_device_malloc binding texture")) { - global_state.DeleteTextures(1, &tex); - return 1; - } - - // Create empty texture here and fill it with glTexSubImage2D later. - GLint internal_format, format, type; - if (!get_texture_format(user_context, buf, &internal_format, &format, &type)) { - error(user_context) << "Invalid texture format"; - global_state.DeleteTextures(1, &tex); - return 1; - } - - GLint width, height, channels; - if (!get_texture_dimensions(user_context, buf, &width, &height, &channels)) { - error(user_context) << "Invalid texture dimensions"; - return 1; - } - - global_state.TexImage2D(GL_TEXTURE_2D, 0, internal_format, width, height, 0, format, type, nullptr); - if (global_state.CheckAndReportError(user_context, "halide_opengl_device_malloc TexImage2D")) { - global_state.DeleteTextures(1, &tex); - return 1; - } - - buf->device = tex; - buf->device_interface = &opengl_device_interface; - buf->device_interface->impl->use_module(); - halide_allocated = true; - debug(user_context) << "Allocated texture " << tex - << " of size " << width << " x " << height << "\n"; - - global_state.BindTexture(GL_TEXTURE_2D, 0); - } - - return 0; -} - -// Delete all texture information associated with a buffer. -WEAK int halide_opengl_device_free(void *user_context, halide_buffer_t *buf) { - if (!global_state.initialized) { - error(user_context) << "OpenGL runtime not initialized in call to halide_opengl_device_free."; - return 1; - } - - if (buf->device == 0) { - return 0; - } - - uint64_t handle = buf->device; - GLuint tex = (handle == HALIDE_OPENGL_RENDER_TARGET) ? 0 : (GLuint)handle; - - int result = 0; - debug(user_context) << "halide_opengl_device_free: Deleting texture " << tex << "\n"; - global_state.DeleteTextures(1, &tex); - if (global_state.CheckAndReportError(user_context, "halide_opengl_device_free DeleteTextures")) { - result = 1; - // do not return: we want to zero out the interface and - // device fields even if we can't delete the texture. - } - buf->device = 0; - buf->device_interface->impl->release_module(); - buf->device_interface = nullptr; - - return result; -} - -// Can't use std::min, std::max in Halide runtime. -template -ALWAYS_INLINE T std_min(T a, T b) { - return (a < b) ? a : b; -} -template -ALWAYS_INLINE T std_max(T a, T b) { - return (a > b) ? a : b; -} - -// This method copies image data from the layout specified by the strides of the -// halide_buffer_t to the packed interleaved format needed by GL. It is assumed that -// src and dst have the same number of channels. -template -ALWAYS_INLINE void halide_to_interleaved(const halide_buffer_t *src_buf, T *dst) { - const T *src = reinterpret_cast(src_buf->host); - int width = (src_buf->dimensions > 0) ? src_buf->dim[0].extent : 1; - int height = (src_buf->dimensions > 1) ? src_buf->dim[1].extent : 1; - int channels = (src_buf->dimensions > 2) ? src_buf->dim[2].extent : 1; - int x_stride = (src_buf->dimensions > 0) ? src_buf->dim[0].stride : 0; - int y_stride = (src_buf->dimensions > 1) ? src_buf->dim[1].stride : 0; - int c_stride = (src_buf->dimensions > 2) ? src_buf->dim[2].stride : 0; - for (int y = 0; y < height; y++) { - int dstidx = y * width * channels; - for (int x = 0; x < width; x++) { - int srcidx = y * y_stride + x * x_stride; - for (int c = 0; c < channels; c++) { - dst[dstidx] = src[srcidx]; - srcidx += c_stride; - dstidx += 1; - } - } - } -} - -// This method copies image data from the packed interleaved format needed by GL -// to the arbitrary strided layout specified by the halide_buffer_t. If src has fewer -// channels than dst, the excess in dst will be left untouched; if src has -// more channels than dst, the excess will be ignored. -template -ALWAYS_INLINE void interleaved_to_halide(void *user_context, const T *src, int src_channels, halide_buffer_t *dst_buf) { - T *dst = reinterpret_cast(dst_buf->host); - int width = (dst_buf->dimensions > 0) ? dst_buf->dim[0].extent : 1; - int height = (dst_buf->dimensions > 1) ? dst_buf->dim[1].extent : 1; - int dst_channels = (dst_buf->dimensions > 2) ? dst_buf->dim[2].extent : 1; - int x_stride = (dst_buf->dimensions > 0) ? dst_buf->dim[0].stride : 0; - int y_stride = (dst_buf->dimensions > 1) ? dst_buf->dim[1].stride : 0; - int c_stride = (dst_buf->dimensions > 2) ? dst_buf->dim[2].stride : 0; - int src_skip = std_max(0, src_channels - dst_channels); - int channels = std_min(src_channels, dst_channels); - - for (int y = 0; y < height; y++) { - int srcidx = y * width * src_channels; - for (int x = 0; x < width; x++) { - int dstidx = y * y_stride + x * x_stride; - for (int c = 0; c < channels; c++) { - dst[dstidx] = src[srcidx]; - srcidx += 1; - dstidx += c_stride; - } - srcidx += src_skip; - } - } -} - -// Copy image data from host memory to texture. -WEAK int halide_opengl_copy_to_device(void *user_context, halide_buffer_t *buf) { - if (!global_state.initialized) { - error(user_context) << "OpenGL runtime not initialized (halide_opengl_copy_to_device)."; - return 1; - } - - GLStateSaver state_saver; - - int err = halide_opengl_device_malloc(user_context, buf); - if (err) { - return err; - } - - if (!buf->host || !buf->device) { - debug_buffer(user_context, buf); - error(user_context) << "Invalid copy_to_device operation: host or device nullptr"; - return 1; - } - - uint64_t handle = buf->device; - if (handle == HALIDE_OPENGL_RENDER_TARGET) { - // TODO: this isn't correct; we want to ensure we copy to the current render_target. - debug(user_context) << "halide_opengl_copy_to_device: called for HALIDE_OPENGL_RENDER_TARGET\n"; - return 0; - } - GLuint tex = (GLuint)handle; - debug(user_context) << "halide_opengl_copy_to_device: " << tex << "\n"; - - global_state.BindTexture(GL_TEXTURE_2D, tex); - if (global_state.CheckAndReportError(user_context, "halide_opengl_copy_to_device BindTexture")) { - return 1; - } - GLint internal_format, format, type; - if (!get_texture_format(user_context, buf, &internal_format, &format, &type)) { - error(user_context) << "Invalid texture format"; - return 1; - } - - GLint width, height, buffer_channels; - if (!get_texture_dimensions(user_context, buf, &width, &height, &buffer_channels)) { - error(user_context) << "Invalid texture dimensions"; - return 1; - } - - // To use TexSubImage2D directly, the colors must be stored interleaved - // and rows must be stored consecutively. - // (Single-channel buffers are "interleaved" for our purposes here.) - bool is_interleaved = (buffer_channels == 1) || (buf->dim[2].stride == 1 && buf->dim[0].stride == buf->dim[2].extent); - bool is_packed = (buf->dim[1].stride == buf->dim[0].extent * buf->dim[0].stride); - if (is_interleaved && is_packed) { - global_state.PixelStorei(GL_UNPACK_ALIGNMENT, 1); - global_state.TexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, width, height, format, type, buf->host); - if (global_state.CheckAndReportError(user_context, "halide_opengl_copy_to_device TexSubImage2D(1)")) { - return 1; - } - } else { - debug(user_context) - << "Warning: In copy_to_device, host buffer is not interleaved. Doing slow interleave.\n"; - - size_t texture_size = width * height * buffer_channels * buf->type.bytes(); - HalideMalloc tmp(user_context, texture_size); - if (!tmp.ptr) { - error(user_context) << "halide_malloc failed inside copy_to_device"; - return -1; - } - - switch (type) { - case GL_UNSIGNED_BYTE: - halide_to_interleaved(buf, (uint8_t *)tmp.ptr); - break; - case GL_UNSIGNED_SHORT: - halide_to_interleaved(buf, (uint16_t *)tmp.ptr); - break; - case GL_FLOAT: - halide_to_interleaved(buf, (float *)tmp.ptr); - break; - } - - global_state.PixelStorei(GL_UNPACK_ALIGNMENT, 1); - global_state.TexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, width, height, format, type, tmp.ptr); - if (global_state.CheckAndReportError(user_context, "halide_opengl_copy_to_device TexSubImage2D(2)")) { - return 1; - } - } - - return 0; -} - -// Copy image data from texture back to host memory. -WEAK int halide_opengl_copy_to_host(void *user_context, halide_buffer_t *buf) { - if (!global_state.initialized) { - error(user_context) << "OpenGL runtime not initialized (halide_opengl_copy_to_host)."; - return 1; - } - - GLStateSaver state_saver; - - if (!buf->host || !buf->device) { - debug_buffer(user_context, buf); - error(user_context) << "Invalid copy_to_host operation: host or dev nullptr"; - return 1; - } - - GLint internal_format, format, type; - if (!get_texture_format(user_context, buf, &internal_format, &format, &type)) { - error(user_context) << "Invalid texture format"; - return 1; - } - - GLint width, height, buffer_channels; - if (!get_texture_dimensions(user_context, buf, &width, &height, &buffer_channels)) { - error(user_context) << "Invalid texture dimensions"; - return 1; - } - GLint texture_channels = buffer_channels; - - uint64_t handle = buf->device; - if (handle != HALIDE_OPENGL_RENDER_TARGET) { - GLuint tex = (GLuint)handle; - debug(user_context) << "halide_copy_to_host: texture " << tex << "\n"; - global_state.BindFramebuffer(GL_FRAMEBUFFER, global_state.framebuffer_id); - if (global_state.CheckAndReportError(user_context, "copy_to_host BindFramebuffer")) { - return 1; - } - global_state.FramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, tex, 0); - if (global_state.CheckAndReportError(user_context, "copy_to_host FramebufferTexture2D")) { - return 1; - } - } else { - debug(user_context) << "halide_copy_to_host: HALIDE_OPENGL_RENDER_TARGET\n"; - } - - // Check that framebuffer is set up correctly - GLenum status = global_state.CheckFramebufferStatus(GL_FRAMEBUFFER); - if (status != GL_FRAMEBUFFER_COMPLETE) { - error(user_context) - << "Setting up GL framebuffer " << global_state.framebuffer_id << " failed " << status; - return 1; - } - - // The only format/type pairs guaranteed to be readable in GLES2 are GL_RGBA+GL_UNSIGNED_BYTE, - // plus one other implementation-dependent pair specified here. Spoiler alert: - // some ES2 implementations return that very same pair here (i.e., they don't support - // any other formats); in that case, we need to read as RGBA and manually convert to - // what we need (usually GL_RGB). - // NOTE: this requires the currently-bound Framebuffer is correct. - // TODO: short and float will require even more effort on top of this. - if (global_state.profile == OpenGLES && format == GL_RGB) { - GLint extra_format, extra_type; - global_state.GetIntegerv(GL_IMPLEMENTATION_COLOR_READ_TYPE, &extra_type); - if (type != GL_UNSIGNED_BYTE && type != extra_type) { - error(user_context) << "ReadPixels does not support our type; we don't handle this yet.\n"; - return 1; - } - global_state.GetIntegerv(GL_IMPLEMENTATION_COLOR_READ_FORMAT, &extra_format); - if (format != GL_RGBA && format != extra_format) { - debug(user_context) << "ReadPixels does not support our format; falling back to GL_RGBA\n"; - format = GL_RGBA; - texture_channels = 4; - } - } - - // To download the texture directly, the colors must be stored interleaved - // and rows must be stored consecutively. - // (Single-channel buffers are "interleaved" for our purposes here.) - bool is_interleaved = (buffer_channels == 1) || (buf->dim[2].stride == 1 && buf->dim[0].stride == buf->dim[2].extent); - bool is_packed = (buf->dim[1].stride == buf->dim[0].extent * buf->dim[0].stride); - if (is_interleaved && is_packed && texture_channels == buffer_channels) { - global_state.PixelStorei(GL_PACK_ALIGNMENT, 1); -#ifdef DEBUG_RUNTIME - int64_t t1 = halide_current_time_ns(user_context); -#endif - global_state.ReadPixels(0, 0, buf->dim[0].extent, buf->dim[1].extent, format, type, buf->host); -#ifdef DEBUG_RUNTIME - int64_t t2 = halide_current_time_ns(user_context); -#endif - if (global_state.CheckAndReportError(user_context, "copy_to_host ReadPixels (1)")) { - return 1; - } -#ifdef DEBUG_RUNTIME - debug(user_context) << "ReadPixels(1) time: " << (t2 - t1) / 1e3 << "usec\n"; -#endif - } else { - debug(user_context) - << "Warning: In copy_to_host, host buffer is not interleaved, or not a native format. Doing slow deinterleave.\n"; - - size_t texture_size = width * height * texture_channels * buf->type.bytes(); - HalideMalloc tmp(user_context, texture_size); - if (!tmp.ptr) { - error(user_context) << "halide_malloc failed inside copy_to_host"; - return -1; - } - - global_state.PixelStorei(GL_PACK_ALIGNMENT, 1); -#ifdef DEBUG_RUNTIME - int64_t t1 = halide_current_time_ns(user_context); -#endif - global_state.ReadPixels(0, 0, buf->dim[0].extent, buf->dim[1].extent, format, type, tmp.ptr); -#ifdef DEBUG_RUNTIME - int64_t t2 = halide_current_time_ns(user_context); - debug(user_context) << "ReadPixels(2) time: " << (t2 - t1) / 1e3 << "usec\n"; -#endif - if (global_state.CheckAndReportError(user_context, "copy_to_host ReadPixels (2)")) { - return 1; - } - - // Premature optimization warning: interleaved_to_halide() could definitely - // be optimized, but ReadPixels() typically takes ~2-10x as long (especially on - // mobile devices), so the returns will be modest. -#ifdef DEBUG_RUNTIME - int64_t t3 = halide_current_time_ns(user_context); -#endif - switch (type) { - case GL_UNSIGNED_BYTE: - interleaved_to_halide(user_context, (uint8_t *)tmp.ptr, texture_channels, buf); - break; - case GL_UNSIGNED_SHORT: - interleaved_to_halide(user_context, (uint16_t *)tmp.ptr, texture_channels, buf); - break; - case GL_FLOAT: - interleaved_to_halide(user_context, (float *)tmp.ptr, texture_channels, buf); - break; - } -#ifdef DEBUG_RUNTIME - int64_t t4 = halide_current_time_ns(user_context); - debug(user_context) << "deinterleave time: " << (t4 - t3) / 1e3 << "usec\n"; -#endif - } - - return 0; -} - -} // namespace OpenGL -} // namespace Internal -} // namespace Runtime -} // namespace Halide - -using namespace Halide::Runtime::Internal::OpenGL; - -// Find the correct module for the called function -// TODO: This currently takes O(# of GLSL'd stages) and can -// be optimized -WEAK ModuleState *find_module(const char *stage_name) { - ModuleState *state_ptr = state_list; - - while (state_ptr != nullptr) { - KernelInfo *kernel = state_ptr->kernel; - if (kernel && strcmp(stage_name, kernel->name) == 0) { - return state_ptr; - } - state_ptr = state_ptr->next; - } - - return nullptr; -} - -// Create wrappers that satisfy old naming conventions - -extern "C" { - -WEAK int halide_opengl_run(void *user_context, - void *state_ptr, - const char *entry_name, - int blocksX, int blocksY, int blocksZ, - int threadsX, int threadsY, int threadsZ, - int shared_mem_bytes, - size_t arg_sizes[], void *args[], int8_t is_buffer[], - int num_padded_attributes, - float *vertex_buffer, - int num_coords_dim0, - int num_coords_dim1) { - if (!global_state.initialized) { - error(user_context) << "OpenGL runtime not initialized (halide_opengl_run)."; - return 1; - } - - GLStateSaver state_saver; - - // Find the right module - ModuleState *mod = find_module(entry_name); - if (!mod) { - error(user_context) << "Internal error: module state for stage " << entry_name << " not found\n"; - return 1; - } - - KernelInfo *kernel = mod->kernel; - - global_state.UseProgram(kernel->program_id); - if (global_state.CheckAndReportError(user_context, "halide_opengl_run UseProgram")) { - return 1; - } - - // TODO(abstephensg) it would be great to codegen these vec4 uniform buffers - // directly, instead of passing an array of arguments and then copying them - // out at runtime. - - // Determine the number of float and int uniform parameters. This code - // follows the argument packing convention in CodeGen_GPU_Host and - // CodeGen_OpenGL_Dev - int num_uniform_floats = 0; - int num_uniform_ints = 0; - - Argument *kernel_arg = kernel->arguments; - for (int i = 0; args[i]; i++, kernel_arg = kernel_arg->next) { - - // Check for a mismatch between the number of arguments declared in the - // fragment shader source header and the number passed to this function - if (!kernel_arg) { - error(user_context) - << "Too many arguments passed to halide_opengl_run\n" - << "Argument " << i << ": size=" << i << " value=" << args[i]; - return 1; - } - - // Count the number of float and int uniform parameters. - if (kernel_arg->kind == Argument::Uniform) { - switch (kernel_arg->type) { - case Argument::Float: - // Integer parameters less than 32 bits wide are passed as - // normalized float values - case Argument::Int8: - case Argument::UInt8: - case Argument::Int16: - case Argument::UInt16: - ++num_uniform_floats; - break; - case Argument::Bool: - case Argument::Int32: - case Argument::UInt32: - ++num_uniform_ints; - break; - default: - error(user_context) << "GLSL: Encountered invalid kernel argument type"; - return 1; - } - } - } - - // Pad up to a multiple of four - int num_padded_uniform_floats = (num_uniform_floats + 0x3) & ~0x3; - int num_padded_uniform_ints = (num_uniform_ints + 0x3) & ~0x3; - - // Allocate storage for the packed arguments - float uniform_float[num_padded_uniform_floats]; - int uniform_int[num_padded_uniform_ints]; - - bool bind_render_targets = true; - - // Copy input arguments to corresponding GLSL uniforms. - GLint num_active_textures = 0; - int uniform_float_idx = 0; - int uniform_int_idx = 0; - - kernel_arg = kernel->arguments; - for (int i = 0; args[i]; i++, kernel_arg = kernel_arg->next) { - - if (kernel_arg->kind == Argument::Outbuf) { - halide_assert(user_context, is_buffer[i] && "OpenGL Outbuf argument is not a buffer."); - // Check if the output buffer will be bound by the client instead of - // the Halide runtime - uint64_t handle = ((halide_buffer_t *)args[i])->device; - if (!handle) { - error(user_context) << "GLSL: Encountered invalid nullptr dev pointer"; - return 1; - } - if (handle == HALIDE_OPENGL_RENDER_TARGET) { - bind_render_targets = false; - } - // Outbuf textures are handled explicitly below - continue; - } else if (kernel_arg->kind == Argument::Inbuf) { - halide_assert(user_context, is_buffer[i] && "OpenGL Inbuf argument is not a buffer."); - GLint loc = - global_state.GetUniformLocation(kernel->program_id, kernel_arg->name); - if (global_state.CheckAndReportError(user_context, "halide_opengl_run GetUniformLocation(InBuf)")) { - return 1; - } - if (loc == -1) { - error(user_context) << "No sampler defined for input texture."; - return 1; - } - uint64_t handle = ((halide_buffer_t *)args[i])->device; - if (!handle) { - error(user_context) << "GLSL: Encountered invalid nullptr dev pointer"; - return 1; - } - global_state.ActiveTexture(GL_TEXTURE0 + num_active_textures); - global_state.BindTexture(GL_TEXTURE_2D, handle == HALIDE_OPENGL_RENDER_TARGET ? 0 : (GLuint)handle); - global_state.Uniform1iv(loc, 1, &num_active_textures); - - // Textures not created by the Halide runtime might not have - // parameters set, or might have had parameters set differently - global_state.TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); - global_state.TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); - global_state.TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); - global_state.TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); - - num_active_textures++; - // TODO: check maximum number of active textures - } else if (kernel_arg->kind == Argument::Uniform) { - // Copy the uniform parameter into the packed scalar list - // corresponding to its type. - - // Note: small integers are represented as floats in GLSL. - switch (kernel_arg->type) { - case Argument::Float: - uniform_float[uniform_float_idx++] = *(float *)args[i]; - break; - case Argument::Bool: - uniform_int[uniform_int_idx++] = *((bool *)args[i]) ? 1 : 0; - break; - case Argument::Int8: - uniform_float[uniform_float_idx++] = *((int8_t *)args[i]); - break; - case Argument::UInt8: - uniform_float[uniform_float_idx++] = *((uint8_t *)args[i]); - break; - case Argument::Int16: { - uniform_float[uniform_float_idx++] = *((int16_t *)args[i]); - break; - } - case Argument::UInt16: { - uniform_float[uniform_float_idx++] = *((uint16_t *)args[i]); - break; - } - case Argument::Int32: { - uniform_int[uniform_int_idx++] = *((int32_t *)args[i]); - break; - } - case Argument::UInt32: { - uint32_t value = *((uint32_t *)args[i]); - if (value > 0x7fffffff) { - error(user_context) - << "OpenGL: argument '" << kernel_arg->name << "' is too large for GLint"; - return -1; - } - uniform_int[uniform_int_idx++] = static_cast(value); - break; - } - case Argument::Void: - error(user_context) << "OpenGL: Encountered invalid kernel argument type"; - return 1; - } - } - } - - if (kernel_arg) { - error(user_context) << "Too few arguments passed to halide_opengl_run"; - return 1; - } - - // Set the packed uniform int parameters - for (int idx = 0; idx != num_padded_uniform_ints; idx += 4) { - - // Produce the uniform parameter name without using the std library. - Printer name(user_context); - name << "_uniformi" << (idx / 4); - - GLint loc = global_state.GetUniformLocation(kernel->program_id, name.str()); - if (global_state.CheckAndReportError(user_context, "halide_opengl_run GetUniformLocation")) { - return 1; - } - if (loc == -1) { - // Argument was probably optimized away by GLSL compiler. - continue; - } - - global_state.Uniform4iv(loc, 1, &uniform_int[idx]); - } - - // Set the packed uniform float parameters - for (int idx = 0; idx != num_padded_uniform_floats; idx += 4) { - - // Produce the uniform parameter name without using the std library. - Printer name(user_context); - name << "_uniformf" << (idx / 4); - - GLint loc = global_state.GetUniformLocation(kernel->program_id, name.str()); - if (global_state.CheckAndReportError(user_context, "halide_opengl_run GetUniformLocation(2)")) { - return 1; - } - if (loc == -1) { - // Argument was probably optimized away by GLSL compiler. - continue; - } - - global_state.Uniform4fv(loc, 1, &uniform_float[idx]); - } - - // Prepare framebuffer for rendering to output textures. - GLint output_min[2] = {0, 0}; - GLint output_extent[2] = {0, 0}; - - if (bind_render_targets) { - global_state.BindFramebuffer(GL_FRAMEBUFFER, global_state.framebuffer_id); - } - - global_state.Disable(GL_CULL_FACE); - global_state.Disable(GL_DEPTH_TEST); - - GLint num_output_textures = 0; - kernel_arg = kernel->arguments; - for (int i = 0; args[i]; i++, kernel_arg = kernel_arg->next) { - if (kernel_arg->kind != Argument::Outbuf) { - continue; - } - - halide_assert(user_context, is_buffer[i] && "OpenGL Outbuf argument is not a buffer."); - - // TODO: GL_MAX_COLOR_ATTACHMENTS - if (num_output_textures >= 1) { - error(user_context) - << "OpenGL ES 2.0 only supports one single output texture"; - return 1; - } - - halide_buffer_t *buf = (halide_buffer_t *)args[i]; - halide_assert(user_context, buf->dimensions >= 2); - uint64_t handle = buf->device; - if (!handle) { - error(user_context) << "GLSL: Encountered invalid nullptr dev pointer"; - return 1; - } - GLuint tex = (handle == HALIDE_OPENGL_RENDER_TARGET) ? 0 : (GLuint)handle; - - // Check to see if the object name is actually a FBO - if (bind_render_targets) { - debug(user_context) - << "Output texture " << num_output_textures << ": " << tex << "\n"; - global_state.FramebufferTexture2D(GL_FRAMEBUFFER, - GL_COLOR_ATTACHMENT0 + num_output_textures, - GL_TEXTURE_2D, tex, 0); - if (global_state.CheckAndReportError(user_context, "halide_opengl_run FramebufferTexture2D")) { - return 1; - } - } - - output_min[0] = buf->dim[0].min; - output_min[1] = buf->dim[1].min; - output_extent[0] = buf->dim[0].extent; - output_extent[1] = buf->dim[1].extent; - num_output_textures++; - } - // TODO: GL_MAX_DRAW_BUFFERS - if (num_output_textures == 0) { - error(user_context) << "halide_opengl_run: kernel has no output\n"; - // TODO: cleanup - return 1; - } else if (num_output_textures > 1) { - if (global_state.DrawBuffers) { - HalideMalloc draw_buffers_tmp(user_context, num_output_textures * sizeof(GLenum)); - if (!draw_buffers_tmp.ptr) { - error(user_context) << "halide_malloc"; - return 1; - } - GLenum *draw_buffers = (GLenum *)draw_buffers_tmp.ptr; - for (int i = 0; i < num_output_textures; i++) { - draw_buffers[i] = GL_COLOR_ATTACHMENT0 + i; - } - global_state.DrawBuffers(num_output_textures, draw_buffers); - if (global_state.CheckAndReportError(user_context, "halide_opengl_run DrawBuffers")) { - return 1; - } - } else { - error(user_context) << "halide_opengl_run: kernel has more than one output and DrawBuffers is not available (earlier than GL ES 3.0?).\n"; - // TODO: cleanup - return 1; - } - } - - if (bind_render_targets) { - // Check that framebuffer is set up correctly - GLenum status = global_state.CheckFramebufferStatus(GL_FRAMEBUFFER); - if (global_state.CheckAndReportError(user_context, "halide_opengl_run CheckFramebufferStatus")) { - return 1; - } - if (status != GL_FRAMEBUFFER_COMPLETE) { - error(user_context) - << "Setting up GL framebuffer " << global_state.framebuffer_id - << " failed (" << status << ")"; - // TODO: cleanup - return 1; - } - } - - // Set vertex attributes - GLint loc = global_state.GetUniformLocation(kernel->program_id, "output_extent"); - global_state.Uniform2iv(loc, 1, output_extent); - if (global_state.CheckAndReportError(user_context, "halide_opengl_run Uniform2iv(output_extent)")) { - return 1; - } - loc = global_state.GetUniformLocation(kernel->program_id, "output_min"); - global_state.Uniform2iv(loc, 1, output_min); - if (global_state.CheckAndReportError(user_context, "halide_opengl_run Uniform2iv(output_min)")) { - return 1; - } - -#if 0 // DEBUG_RUNTIME - debug(user_context) << "output_extent: " << output_extent[0] << "," << output_extent[1] << "\n"; - debug(user_context) << "output_min: " << output_min[0] << "," << output_min[1] << "\n"; -#endif - - // TODO(abestephensg): Sort coordinate dimensions when the linear solver is integrated - // Sort the coordinates - - // Construct an element buffer using the sorted vertex order. - // Note that this is "width" and "height" of the vertices, not the output image. - int width = num_coords_dim0; - int height = num_coords_dim1; - - int vertex_buffer_size = width * height * num_padded_attributes; - - int element_buffer_size = (width - 1) * (height - 1) * 6; - int element_buffer[element_buffer_size]; - - int idx = 0; - for (int h = 0; h != (height - 1); ++h) { - for (int w = 0; w != (width - 1); ++w) { - - // TODO(abestephensg): Use sorted coordinates when integrated - int v = w + h * width; - element_buffer[idx++] = v; - element_buffer[idx++] = v + 1; - element_buffer[idx++] = v + width + 1; - - element_buffer[idx++] = v + width + 1; - element_buffer[idx++] = v + width; - element_buffer[idx++] = v; - } - } - -#if 0 // DEBUG_RUNTIME - debug(user_context) << "Vertex buffer:"; - for (int i=0;i!=vertex_buffer_size;++i) { - if (!(i%num_padded_attributes)) { - debug(user_context) << "\n"; - } - debug(user_context) << vertex_buffer[i] << " "; - } - debug(user_context) << "\n"; - debug(user_context) << "\n"; - - debug(user_context) << "Element buffer:"; - for (int i=0;i!=element_buffer_size;++i) { - if (!(i%3)) { - debug(user_context) << "\n"; - } - debug(user_context) << element_buffer[i] << " "; - } - debug(user_context) << "\n"; -#endif - - // Setup viewport - global_state.Viewport(0, 0, output_extent[0], output_extent[1]); - - // Setup the vertex and element buffers - GLuint vertex_array_object = 0; - if (global_state.have_vertex_array_objects) { - global_state.GenVertexArrays(1, &vertex_array_object); - global_state.BindVertexArray(vertex_array_object); - } - - GLuint vertex_buffer_id; - global_state.GenBuffers(1, &vertex_buffer_id); - global_state.BindBuffer(GL_ARRAY_BUFFER, vertex_buffer_id); - global_state.BufferData(GL_ARRAY_BUFFER, sizeof(float) * vertex_buffer_size, vertex_buffer, GL_STATIC_DRAW); - if (global_state.CheckAndReportError(user_context, "halide_opengl_run vertex BufferData et al")) { - return 1; - } - - GLuint element_buffer_id; - global_state.GenBuffers(1, &element_buffer_id); - global_state.BindBuffer(GL_ELEMENT_ARRAY_BUFFER, element_buffer_id); - global_state.BufferData(GL_ELEMENT_ARRAY_BUFFER, sizeof(float) * element_buffer_size, element_buffer, GL_STATIC_DRAW); - if (global_state.CheckAndReportError(user_context, "halide_opengl_run element BufferData et al")) { - return 1; - } - - // The num_padded_attributes argument is the number of vertex attributes, - // including the spatial x and y coordinates, padded up to a multiple of - // four so that the attributes may be packed into vec4 slots. - int num_packed_attributes = num_padded_attributes / 4; - - // Set up the per vertex attributes - GLint attrib_ids[num_packed_attributes]; - - for (int i = 0; i != num_packed_attributes; i++) { - - // The attribute names can synthesized by the runtime based on the - // number of packed varying attributes - Printer attribute_name(user_context); - attribute_name << "_varyingf" << i << "_attrib"; - - // TODO(abstephensg): Switch to glBindAttribLocation - GLint attrib_id = global_state.GetAttribLocation(kernel->program_id, attribute_name.buf); - attrib_ids[i] = attrib_id; - - // Check to see if the varying attribute was simplified out of the - // program by the GLSL compiler. - if (attrib_id == -1) { - continue; - } - - global_state.VertexAttribPointer(attrib_id, 4, GL_FLOAT, GL_FALSE /* Normalized */, sizeof(GLfloat) * num_padded_attributes, (void *)(i * sizeof(GLfloat) * 4)); - if (global_state.CheckAndReportError(user_context, "halide_opengl_run VertexAttribPointer et al")) { - return 1; - } - - global_state.EnableVertexAttribArray(attrib_id); - if (global_state.CheckAndReportError(user_context, "halide_opengl_run EnableVertexAttribArray et al")) { - return 1; - } - } - - // Draw the scene - global_state.DrawElements(GL_TRIANGLES, element_buffer_size, GL_UNSIGNED_INT, nullptr); - if (global_state.CheckAndReportError(user_context, "halide_opengl_run DrawElements et al")) { - return 1; - } - - // Cleanup - if (global_state.have_vertex_array_objects) { - global_state.DeleteVertexArrays(1, &vertex_array_object); - } - - global_state.DeleteBuffers(1, &vertex_buffer_id); - global_state.DeleteBuffers(1, &element_buffer_id); - - return 0; -} - -WEAK int halide_opengl_device_sync(void *user_context, struct halide_buffer_t *) { - if (!global_state.initialized) { - error(user_context) << "OpenGL runtime not initialized (halide_opengl_device_sync)."; - return 1; - } -#ifdef DEBUG_RUNTIME - int64_t t0 = halide_current_time_ns(user_context); -#endif - global_state.Finish(); -#ifdef DEBUG_RUNTIME - int64_t t1 = halide_current_time_ns(user_context); - debug(user_context) << "halide_opengl_device_sync: took " << (t1 - t0) / 1e3 << "usec\n"; -#endif - return 0; -} - -// Called at the beginning of a code block generated by Halide. This function -// is responsible for setting up the OpenGL environment and compiling the GLSL -// code into a fragment shader. -WEAK int halide_opengl_initialize_kernels(void *user_context, void **state_ptr, - const char *src, int size) { - debug(user_context) << "In initialize_kernels\n"; - - if (int error = halide_opengl_init(user_context)) { - return error; - } - - const char *this_kernel = src; - - ModuleState **state = (ModuleState **)state_ptr; - ModuleState *module = *state; - - while (this_kernel) { - // Find the start of the next kernel - const char *next_kernel = strstr(this_kernel + 1, kernel_marker); - - // Use that to compute the length of this kernel - int len = 0; - if (!next_kernel) { - len = strlen(this_kernel); - } else { - len = next_kernel - this_kernel; - } - - // Construct a new ModuleState and add it to the global list - module = (ModuleState *)malloc(sizeof(ModuleState)); - module->kernel = nullptr; - module->next = state_list; - state_list = module; - *state = module; - - KernelInfo *kernel = module->kernel; - if (!kernel) { - kernel = create_kernel(user_context, this_kernel, len); - if (!kernel) { - error(user_context) << "Invalid kernel: " << this_kernel; - return -1; - } - module->kernel = kernel; - } - - // Create the vertex shader. The runtime will output boilerplate for the - // vertex shader based on a fixed program plus arguments obtained from - // the comment header passed in the fragment shader. Since there are a - // relatively small number of vertices (i.e. usually only four), per-vertex - // expressions interpolated by varying attributes are evaluated - // by host code on the CPU and passed to the GPU as values in the - // vertex buffer. - enum { PrinterLength = 1024 * 4 }; - Printer vertex_src(user_context); - - // Count the number of varying attributes, this is 2 for the spatial - // x and y coordinates, plus the number of scalar varying attribute - // expressions pulled out of the fragment shader. - int num_varying_float = 2; - - for (Argument *arg = kernel->arguments; arg; arg = arg->next) { - if (arg->kind == Argument::Varying) { - ++num_varying_float; - } - } - - int num_packed_varying_float = ((num_varying_float + 3) & ~0x3) / 4; - - for (int i = 0; i != num_packed_varying_float; ++i) { - vertex_src << "attribute vec4 _varyingf" << i << "_attrib;\n"; - vertex_src << "varying vec4 _varyingf" << i << ";\n"; - } - - vertex_src << "uniform ivec2 output_min;\n" - << "uniform ivec2 output_extent;\n" - << "void main() {\n" - - // Host codegen always passes the spatial vertex coordinates - // in the first two elements of the _varyingf0_attrib - << " vec2 position = vec2(_varyingf0_attrib[0], _varyingf0_attrib[1]);\n" - << " gl_Position = vec4(position, 0.0, 1.0);\n" - << " vec2 texcoord = 0.5 * position + 0.5;\n" - << " vec2 pixcoord = texcoord * vec2(output_extent.xy) + vec2(output_min.xy);\n"; - - // Copy through all of the varying attributes - for (int i = 0; i != num_packed_varying_float; ++i) { - vertex_src << " _varyingf" << i << " = _varyingf" << i << "_attrib;\n"; - } - - vertex_src << " _varyingf0.xy = pixcoord;\n"; - - vertex_src << "}\n"; - - // Check to see if there was sufficient storage for the vertex program. - if (vertex_src.size() >= PrinterLength) { - error(user_context) << "Vertex shader source truncated"; - return 1; - } - - // Initialize vertex shader. - GLuint vertex_shader_id = make_shader(user_context, - GL_VERTEX_SHADER, vertex_src.buf, nullptr); - if (vertex_shader_id == 0) { - halide_error(user_context, "Failed to create vertex shader"); - return 1; - } - - // Create the fragment shader - GLuint fragment_shader_id = make_shader(user_context, GL_FRAGMENT_SHADER, - kernel->source, nullptr); - // Link GLSL program - GLuint program = global_state.CreateProgram(); - global_state.AttachShader(program, vertex_shader_id); - global_state.AttachShader(program, fragment_shader_id); - global_state.LinkProgram(program); - - // Release the individual shaders - global_state.DeleteShader(vertex_shader_id); - global_state.DeleteShader(fragment_shader_id); - - GLint status; - global_state.GetProgramiv(program, GL_LINK_STATUS, &status); - if (!status) { - GLint log_len; - global_state.GetProgramiv(program, GL_INFO_LOG_LENGTH, &log_len); - HalideMalloc log_tmp(user_context, log_len); - if (log_tmp.ptr) { - char *log = (char *)log_tmp.ptr; - global_state.GetProgramInfoLog(program, log_len, nullptr, log); - debug(user_context) << "Could not link GLSL program:\n" - << log << "\n"; - } - global_state.DeleteProgram(program); - return -1; - } - kernel->program_id = program; - - this_kernel = next_kernel; - } - return 0; -} - -WEAK int halide_opengl_device_and_host_malloc(void *user_context, struct halide_buffer_t *buf) { - return halide_default_device_and_host_malloc(user_context, buf, &opengl_device_interface); -} - -WEAK int halide_opengl_device_and_host_free(void *user_context, struct halide_buffer_t *buf) { - return halide_default_device_and_host_free(user_context, buf, &opengl_device_interface); -} - -WEAK const halide_device_interface_t *halide_opengl_device_interface() { - return &opengl_device_interface; -} - -WEAK void halide_opengl_context_lost(void *user_context) { - if (!global_state.initialized) { - return; - } - - debug(user_context) << "halide_opengl_context_lost\n"; - for (ModuleState *mod = state_list; mod; mod = mod->next) { - // Reset program handle to force recompilation. - mod->kernel->program_id = 0; - } - - global_state.init(); -} - -WEAK int halide_opengl_wrap_texture(void *user_context, halide_buffer_t *buf, uint64_t texture_id) { - if (!global_state.initialized) { - if (int error = halide_opengl_init(user_context)) { - return error; - } - } - if (texture_id == 0) { - error(user_context) << "Texture " << texture_id << " is not a valid texture name."; - return -3; - } - halide_assert(user_context, buf->device == 0); - if (buf->device != 0) { - return -2; - } - buf->device = texture_id; - buf->device_interface = &opengl_device_interface; - buf->device_interface->impl->use_module(); - return 0; -} - -WEAK int halide_opengl_wrap_render_target(void *user_context, halide_buffer_t *buf) { - if (!global_state.initialized) { - if (int error = halide_opengl_init(user_context)) { - return error; - } - } - halide_assert(user_context, buf->device == 0); - if (buf->device != 0) { - return -2; - } - buf->device = HALIDE_OPENGL_RENDER_TARGET; - buf->device_interface = &opengl_device_interface; - buf->device_interface->impl->use_module(); - return 0; -} - -WEAK int halide_opengl_detach_texture(void *user_context, halide_buffer_t *buf) { - if (buf->device == 0) { - return 0; - } - - halide_assert(user_context, buf->device_interface == &opengl_device_interface); - buf->device = 0; - buf->device_interface->impl->release_module(); - buf->device_interface = nullptr; - return 0; -} - -WEAK uintptr_t halide_opengl_get_texture(void *user_context, halide_buffer_t *buf) { - if (buf->device == 0) { - return 0; - } - halide_assert(user_context, buf->device_interface == &opengl_device_interface); - uint64_t handle = buf->device; - // client_bound always return 0 here. - return handle == HALIDE_OPENGL_RENDER_TARGET ? 0 : (uintptr_t)handle; -} - -namespace { -WEAK __attribute__((destructor)) void halide_opengl_cleanup() { - halide_opengl_device_release(nullptr); -} -} // namespace - -} // extern "C" - -namespace Halide { -namespace Runtime { -namespace Internal { -namespace OpenGL { - -WEAK halide_device_interface_impl_t opengl_device_interface_impl = { - halide_use_jit_module, - halide_release_jit_module, - halide_opengl_device_malloc, - halide_opengl_device_free, - halide_opengl_device_sync, - halide_opengl_device_release, - halide_opengl_copy_to_host, - halide_opengl_copy_to_device, - halide_opengl_device_and_host_malloc, - halide_opengl_device_and_host_free, - halide_default_buffer_copy, - halide_default_device_crop, - halide_default_device_slice, - halide_default_device_release_crop, - halide_opengl_wrap_texture, - halide_opengl_detach_texture}; - -WEAK halide_device_interface_t opengl_device_interface = { - halide_device_malloc, - halide_device_free, - halide_device_sync, - halide_device_release, - halide_copy_to_host, - halide_copy_to_device, - halide_device_and_host_malloc, - halide_device_and_host_free, - halide_buffer_copy, - halide_device_crop, - halide_device_slice, - halide_device_release_crop, - halide_device_wrap_native, - halide_device_detach_native, - nullptr, - &opengl_device_interface_impl}; - -} // namespace OpenGL -} // namespace Internal -} // namespace Runtime -} // namespace Halide diff --git a/src/runtime/runtime_api.cpp b/src/runtime/runtime_api.cpp index 230c907721d0..7b38a15caefe 100644 --- a/src/runtime/runtime_api.cpp +++ b/src/runtime/runtime_api.cpp @@ -7,7 +7,6 @@ #include "HalideRuntimeHexagonHost.h" #include "HalideRuntimeMetal.h" #include "HalideRuntimeOpenCL.h" -#include "HalideRuntimeOpenGL.h" #include "HalideRuntimeOpenGLCompute.h" #include "HalideRuntimeQurt.h" #include "cpu_features.h" @@ -148,16 +147,8 @@ extern "C" __attribute__((used)) void *halide_runtime_api_functions[] = { (void *)&halide_opencl_set_device_type, (void *)&halide_opencl_set_platform_name, (void *)&halide_opencl_wrap_cl_mem, - (void *)&halide_opengl_context_lost, (void *)&halide_opengl_create_context, - (void *)&halide_opengl_detach_texture, - (void *)&halide_opengl_device_interface, (void *)&halide_opengl_get_proc_address, - (void *)&halide_opengl_get_texture, - (void *)&halide_opengl_initialize_kernels, - (void *)&halide_opengl_run, - (void *)&halide_opengl_wrap_render_target, - (void *)&halide_opengl_wrap_texture, (void *)&halide_openglcompute_device_interface, (void *)&halide_openglcompute_initialize_kernels, (void *)&halide_openglcompute_run, diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index dee66bc4dd21..c5147af5adc7 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -34,11 +34,6 @@ if (WITH_TEST_PERFORMANCE) add_subdirectory(performance) endif () -option(WITH_TEST_OPENGL "Build OpenGL tests" OFF) -if (WITH_TEST_OPENGL) - add_subdirectory(opengl) -endif () - option(WITH_TEST_GENERATOR "Build generator tests" ON) if (WITH_TEST_GENERATOR) add_subdirectory(generator) diff --git a/test/correctness/device_buffer_copy.cpp b/test/correctness/device_buffer_copy.cpp index cff35c8a11f1..c83a079eb151 100644 --- a/test/correctness/device_buffer_copy.cpp +++ b/test/correctness/device_buffer_copy.cpp @@ -214,8 +214,7 @@ int main(int argc, char **argv) { // Test copying between different device APIs. Probably will not // run on test infrastructure as we do not configure more than one // GPU API at a time. For now, special case CUDA and OpenCL as these are - // the most likely to be supported together. (OpenGL would be a candidate - // but buffer_copy support needs to be added.) + // the most likely to be supported together. if (target.has_feature(Target::CUDA) && target.has_feature(Target::OpenCL)) { printf("Test cross device copy device to device.\n"); { diff --git a/test/correctness/gpu_multi_device.cpp b/test/correctness/gpu_multi_device.cpp index b92a1b37ae22..b9a872f33af2 100644 --- a/test/correctness/gpu_multi_device.cpp +++ b/test/correctness/gpu_multi_device.cpp @@ -39,16 +39,6 @@ struct MultiDevicePipeline { .gpu_tile(x, y, xi, yi, 8, 8, TailStrategy::Auto, DeviceAPI::Metal); current_stage++; } - if (jit_target.has_feature(Target::OpenGL)) { - stage[current_stage](x, y, c) = stage[current_stage - 1](x, y, c) + 69; - stage[current_stage] - .compute_root() - .bound(c, 0, 3) - .reorder(c, x, y) - .glsl(x, y, c) - .vectorize(c); - current_stage++; - } if (jit_target.has_feature(Target::OpenGLCompute)) { stage[current_stage](x, y, c) = stage[current_stage - 1](x, y, c) + 69; stage[current_stage] diff --git a/test/correctness/plain_c_includes.c b/test/correctness/plain_c_includes.c index 18529c77a8fb..65a436014cbd 100644 --- a/test/correctness/plain_c_includes.c +++ b/test/correctness/plain_c_includes.c @@ -10,7 +10,6 @@ #include "HalideRuntimeHexagonHost.h" #include "HalideRuntimeMetal.h" #include "HalideRuntimeOpenCL.h" -#include "HalideRuntimeOpenGL.h" #include "HalideRuntimeOpenGLCompute.h" #include "HalideRuntimeQurt.h" diff --git a/test/correctness/target.cpp b/test/correctness/target.cpp index 64060606d0e5..7c575c5233ee 100644 --- a/test/correctness/target.cpp +++ b/test/correctness/target.cpp @@ -52,10 +52,10 @@ int main(int argc, char **argv) { // Full specification round-trip, crazy features t1 = Target(Target::Android, Target::ARM, 32, {Target::JIT, Target::SSE41, Target::AVX, Target::AVX2, - Target::CUDA, Target::OpenCL, Target::OpenGL, Target::OpenGLCompute, + Target::CUDA, Target::OpenCL, Target::OpenGLCompute, Target::Debug}); ts = t1.to_string(); - if (ts != "arm-32-android-avx-avx2-cuda-debug-jit-opencl-opengl-openglcompute-sse41") { + if (ts != "arm-32-android-avx-avx2-cuda-debug-jit-opencl-openglcompute-sse41") { printf("to_string failure: %s\n", ts.c_str()); return -1; } diff --git a/test/opengl/CMakeLists.txt b/test/opengl/CMakeLists.txt deleted file mode 100644 index b38c20a3a36a..000000000000 --- a/test/opengl/CMakeLists.txt +++ /dev/null @@ -1,28 +0,0 @@ -if (TARGET OpenGL::GL) - tests(GROUPS opengl - SOURCES - conv_select.cpp - copy_pixels.cpp - copy_to_device.cpp - copy_to_host.cpp - float_texture.cpp - inline_reduction.cpp - internal.cpp - lut.cpp - multiple_stages.cpp - produce.cpp - rewrap_texture.cpp - save_state.cpp - select.cpp - set_pixels.cpp - shifted_domains.cpp - special_funcs.cpp - sumcolor_reduction.cpp - sum_reduction.cpp - tuples.cpp - varying.cpp - ) - foreach (test_name IN LISTS TEST_NAMES) - target_link_libraries("${test_name}" PRIVATE OpenGL::GL) - endforeach () -endif () diff --git a/test/opengl/conv_select.cpp b/test/opengl/conv_select.cpp deleted file mode 100644 index 735c752d06f0..000000000000 --- a/test/opengl/conv_select.cpp +++ /dev/null @@ -1,50 +0,0 @@ -// test case provided by Lee Yuguang - -#include "Halide.h" -#include - -#include "testing.h" - -using namespace Halide; - -int main() { - // This test must be run with an OpenGL target. - const Target target = get_jit_target_from_environment().with_feature(Target::OpenGL); - - // Define the input - const int width = 10, height = 10, channels = 4, res_channels = 2; - Buffer input(width, height, channels); - input.fill([](int x, int y, int c) { - return float(x + y); - }); - - // Define the algorithm. - Var x, y, c; - RDom r(0, 2, "r"); - Func f, g; - - Expr coordx = clamp(x + r, 0, input.width() - 1); - f(x, y, c) = cast(sum(input(coordx, y, c))); - - Expr R = select(f(x, y, c) > 9.0f, 1.0f, 0.0f); - Expr G = select(f(x, y, c) > 9.0f, 0.f, 1.0f); - g(x, y, c) = mux(c, {R, G}); - - // Schedule f and g to compute in separate passes on the GPU. - g.bound(c, 0, 2).glsl(x, y, c); - - // Generate the result. - Buffer result = g.realize(width, height, res_channels, target); - result.copy_to_host(); - - //Check the result. - if (!Testing::check_result(result, [](int x, int y, int c) { - const float temp = ((x + y) > 4) ? 1.0f : 0.0f; - return (c == 0) ? temp : (1.0f - temp); - })) { - return 1; - } - - printf("Success!\n"); - return 0; -} diff --git a/test/opengl/copy_pixels.cpp b/test/opengl/copy_pixels.cpp deleted file mode 100644 index 97cacecd32e1..000000000000 --- a/test/opengl/copy_pixels.cpp +++ /dev/null @@ -1,33 +0,0 @@ -#include "Halide.h" -#include - -#include "testing.h" - -using namespace Halide; - -int main() { - // This test must be run with an OpenGL target. - const Target target = get_jit_target_from_environment().with_feature(Target::OpenGL); - - Buffer input(255, 10, 3); - input.fill([](int x, int y, int c) { - return 10 * x + y + c; - }); - - Var x, y, c; - Func g; - g(x, y, c) = input(x, y, c); - - Buffer out(255, 10, 3); - g.bound(c, 0, 3); - g.glsl(x, y, c); - g.realize(out, target); - out.copy_to_host(); - - if (!Testing::check_result(out, [&](int x, int y, int c) { return input(x, y, c); })) { - return 1; - } - - printf("Success!\n"); - return 0; -} diff --git a/test/opengl/copy_to_device.cpp b/test/opengl/copy_to_device.cpp deleted file mode 100644 index 0feedf5895c8..000000000000 --- a/test/opengl/copy_to_device.cpp +++ /dev/null @@ -1,38 +0,0 @@ -#include "Halide.h" - -#include "testing.h" - -using namespace Halide; - -// Test that internal allocations work correctly with copy_to_device. -// This requires that suitable halide_buffer_t objects are created internally. -int main() { - // This test must be run with an OpenGL target. - const Target target = get_jit_target_from_environment().with_feature(Target::OpenGL); - - Buffer input(255, 10, 3); - input.fill([](int x, int y, int c) { - return 10 * x + y + c; - }); - - Var x, y, c; - Func g, h; - h(x, y, c) = input(x, y, c); - h.compute_root(); // force internal allocation of h - - // access h from shader to trigger copy_to_device operation - g(x, y, c) = h(x, y, c); - g.bound(c, 0, 3); - g.glsl(x, y, c); - - Buffer out(255, 10, 3); - g.realize(out, target); - out.copy_to_host(); - - if (!Testing::check_result(out, [&](int x, int y, int c) { return input(x, y, c); })) { - return 1; - } - - printf("Success!\n"); - return 0; -} diff --git a/test/opengl/copy_to_host.cpp b/test/opengl/copy_to_host.cpp deleted file mode 100644 index c03759065a9d..000000000000 --- a/test/opengl/copy_to_host.cpp +++ /dev/null @@ -1,39 +0,0 @@ -#include "Halide.h" -#include - -#include "testing.h" - -using namespace Halide; - -int main() { - // This test must be run with an OpenGL target. - const Target target = get_jit_target_from_environment().with_feature(Target::OpenGL); - - Func gpu("gpu"), cpu("cpu"); - Var x, y, c; - - // Fill buffer using GLSL - gpu(x, y, c) = cast(mux(c, {10 * x + y, 127, 12})); - gpu.bound(c, 0, 3); - gpu.glsl(x, y, c); - gpu.compute_root(); - - // This should trigger a copy_to_host operation - cpu(x, y, c) = gpu(x, y, c); - - Buffer out(10, 10, 3); - cpu.realize(out, target); - - if (!Testing::check_result(out, [&](int x, int y, int c) { - switch (c) { - case 0: return 10*x+y; - case 1: return 127; - case 2: return 12; - default: return -1; - } })) { - return 1; - } - - printf("Success!\n"); - return 0; -} diff --git a/test/opengl/float_texture.cpp b/test/opengl/float_texture.cpp deleted file mode 100644 index 166863d559ea..000000000000 --- a/test/opengl/float_texture.cpp +++ /dev/null @@ -1,37 +0,0 @@ -#include "Halide.h" -#include - -#include "testing.h" - -using namespace Halide; - -int main() { - // This test must be run with an OpenGL target. - const Target target = get_jit_target_from_environment().with_feature(Target::OpenGL); - - Buffer input(255, 255, 3); - input.fill([](int x, int y, int c) { - // Note: the following values can be >1.0f to test whether - // OpenGL performs clamping operations as part of the copy - // operation. (It may do so if something other than floats - // are stored in the actual texture.) - return (10 * x + y + c); - }); - - Var x, y, c; - Func g; - g(x, y, c) = input(x, y, c); - - Buffer out(255, 255, 3); - g.bound(c, 0, 3); - g.glsl(x, y, c); - g.realize(out, target); - out.copy_to_host(); - - if (!Testing::check_result(out, [&](int x, int y, int c) { return input(x, y, c); })) { - return 1; - } - - printf("Success!\n"); - return 0; -} diff --git a/test/opengl/inline_reduction.cpp b/test/opengl/inline_reduction.cpp deleted file mode 100644 index 6630145e284f..000000000000 --- a/test/opengl/inline_reduction.cpp +++ /dev/null @@ -1,27 +0,0 @@ -#include "Halide.h" -#include - -#include "testing.h" - -using namespace Halide; - -int main() { - // This test must be run with an OpenGL target. - const Target target = get_jit_target_from_environment().with_feature(Target::OpenGL); - - Func f; - Var x, y, c; - RDom r(0, 10); - f(x, y, c) = sum(cast(r)); - f.bound(c, 0, 3).glsl(x, y, c); - - Buffer result = f.realize(100, 100, 3, target); - - if (!Testing::check_result(result, [&](int x, int y, int c) { return 45; })) { - return 1; - } - - printf("Success!\n"); - - return 0; -} diff --git a/test/opengl/internal.cpp b/test/opengl/internal.cpp deleted file mode 100644 index e1ce9c34ed5b..000000000000 --- a/test/opengl/internal.cpp +++ /dev/null @@ -1,10 +0,0 @@ -#include "../../src/CodeGen_OpenGL_Dev.h" - -using namespace Halide; -using namespace Halide::Internal; - -int main() { - CodeGen_GLSL::test(); - - return 0; -} diff --git a/test/opengl/lut.cpp b/test/opengl/lut.cpp deleted file mode 100644 index d51f7f1f8bf6..000000000000 --- a/test/opengl/lut.cpp +++ /dev/null @@ -1,76 +0,0 @@ - -#include "Halide.h" -#include - -#include "testing.h" - -using namespace Halide; - -// This test creates two input images and uses one to perform a dependent lookup -// into the other. - -int test_lut1d() { - - // This test must be run with an OpenGL target. - const Target target = get_jit_target_from_environment().with_feature(Target::OpenGL); - - Var x("x"); - Var y("y"); - Var c("c"); - - Buffer input(8, 8, 3); - input.fill([](int x, int y, int c) { - const float v = (1.0f / 16.0f) + (float)x / 8.0f; - switch (c) { - case 0: - return (uint8_t)(v * 255.0f); - case 1: - return (uint8_t)((1.0f - v) * 255.0f); - default: - return (uint8_t)((v > 0.5 ? 1.0 : 0.0) * 255.0f); - } - }); - - // 1D Look Up Table case - Buffer lut1d(8, 1, 3); - for (int c = 0; c != 3; ++c) { - for (int i = 0; i != 8; ++i) { - lut1d(i, 0, c) = (float)(1 + i); - } - } - - Func f0("f"); - Expr e = cast(8.0f * cast(input(x, y, c)) / 255.0f); - - f0(x, y, c) = lut1d(clamp(e, 0, 7), 0, c); - - Buffer out0(8, 8, 3); - - f0.bound(c, 0, 3); - f0.glsl(x, y, c); - f0.realize(out0, target); - out0.copy_to_host(); - - if (!Testing::check_result(out0, [](int x, int y, int c) { - switch (c) { - case 0: return (float)(1 + x); - case 1: return (float)(8 - x); - case 2: return (x > 3) ? 8.0f : 1.0f; - default: return std::numeric_limits::infinity(); - } })) { - return 1; - } - - return 0; -} - -int main() { - - if (test_lut1d() == 0) { - printf("Success!\n"); - } else { - printf("FAILED\n"); - } - - return 0; -} diff --git a/test/opengl/multiple_stages.cpp b/test/opengl/multiple_stages.cpp deleted file mode 100644 index f5bac6b8b197..000000000000 --- a/test/opengl/multiple_stages.cpp +++ /dev/null @@ -1,44 +0,0 @@ -#include "Halide.h" -#include - -#include "testing.h" - -using namespace Halide; - -int main() { - - // This test must be run with an OpenGL target. - const Target target = get_jit_target_from_environment().with_feature(Target::OpenGL); - - Func f, g, h; - Var x, y, c; - g(x, y, c) = cast(x); - h(x, y, c) = 1 + g(x, y, c); - f(x, y, c) = h(x, y, c) + cast(y); - f.bound(c, 0, 3).glsl(x, y, c); - h.bound(c, 0, 3).compute_root(); - g.bound(c, 0, 3).compute_root().glsl(x, y, c); - - Buffer result = f.realize(10, 10, 3, target); - result.copy_to_host(); - - if (!Testing::check_result(result, [&](int i, int j, int k) { return i + j + 1; })) { - return 1; - } - - Func f2, g2; - f2(x, y, c) = cast(x); - g2(x, y, c) = f2(x, y, c) + cast(y); - - f2.bound(c, 0, 3).glsl(x, y, c).compute_root(); - g2.bound(c, 0, 3).glsl(x, y, c); - - Buffer result2 = g2.realize(10, 10, 3, target); - if (!Testing::check_result(result2, 0.01f, [&](int i, int j, int k) { return (float)(i + j); })) { - return 1; - } - - printf("Success!\n"); - - return 0; -} diff --git a/test/opengl/produce.cpp b/test/opengl/produce.cpp deleted file mode 100644 index 002f9ec89045..000000000000 --- a/test/opengl/produce.cpp +++ /dev/null @@ -1,70 +0,0 @@ -#include "Halide.h" -#include - -#include "testing.h" - -using namespace Halide; - -// This test creates two input images and uses one to perform a dependent lookup -// into the other. The lookup table is produced using a Halide func scheduled -// on the host. - -int test_lut1d() { - - // This test must be run with an OpenGL target. - const Target target = get_jit_target_from_environment().with_feature(Target::OpenGL); - - Var x("x"); - Var y("y"); - Var c("c"); - - Buffer input(8, 8, 3); - input.fill([](int x, int y, int c) { - float v = (1.0f / 16.0f) + (float)x / 8.0f; - switch (c) { - case 0: return (uint8_t)(v * 255.0f); - case 1: return (uint8_t)((1.0f - v) * 255.0f); - default: return (uint8_t)((v > 0.5 ? 1.0 : 0.0) * 255.0f); - } }); - - // 1D Look Up Table case - Func lut1d("lut1d"); - lut1d(x) = cast(1 + x); - - Func f0("f"); - Expr e = cast(8.0f * cast(input(x, y, c)) / 255.0f); - - f0(x, y, c) = lut1d(clamp(e, 0, 7)); - lut1d.compute_root(); - - f0.bound(c, 0, 3); - f0.glsl(x, y, c); - - Buffer out0(8, 8, 3); - f0.realize(out0, target); - - out0.copy_to_host(); - - if (!Testing::check_result(out0, [](int x, int y, int c) { - switch (c) { - case 0: return (float)(1 + x); - case 1: return (float)(8 - x); - case 2: return (x > 3) ? 8.0f : 1.0f; - default: return -1.0f; - } })) { - return 1; - } - - return 0; -} - -int main() { - - if (test_lut1d() == 0) { - printf("Success!\n"); - } else { - printf("FAILED\n"); - } - - return 0; -} diff --git a/test/opengl/rewrap_texture.cpp b/test/opengl/rewrap_texture.cpp deleted file mode 100644 index f19842338670..000000000000 --- a/test/opengl/rewrap_texture.cpp +++ /dev/null @@ -1,67 +0,0 @@ -#ifdef _WIN32 -#include -int main() { - printf("[SKIP] OpenGL on Windows is broken.\n"); - return 0; -} -#else - -#include "Halide.h" - -#include - -#if __APPLE__ -// TODO: why are these deprecated? Can we update this test? -#define GL_SILENCE_DEPRECATION -#include -#else -#include -#endif - -using namespace Halide; - -int main() { - - // This test must be run with an OpenGL target. - const Target target = get_jit_target_from_environment().with_feature(Target::OpenGL); - - const int width = 255; - const int height = 10; - - Buffer input(width, height, 3); - Buffer out1(width, height, 3); - Buffer out2(width, height, 3); - Buffer out3(width, height, 3); - - Var x, y, c; - Func g; - g(x, y, c) = input(x, y, c); - g.bound(c, 0, 3); - g.glsl(x, y, c); - - g.realize(out1, target); // run once to initialize OpenGL - - GLuint texture_id; - glGenTextures(1, &texture_id); - glBindTexture(GL_TEXTURE_2D, texture_id); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); - glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, width, height, 0, GL_RGBA, GL_UNSIGNED_BYTE, nullptr); - - // wrapping a texture should work - out2.device_wrap_native(DeviceAPI::GLSL, texture_id, target); - g.realize(out2, target); - out2.device_detach_native(); - - // re-wrapping the texture should not abort - out3.device_wrap_native(DeviceAPI::GLSL, texture_id, target); - g.realize(out3, target); - out3.device_detach_native(); - - printf("Success!\n"); - return 0; -} - -#endif diff --git a/test/opengl/save_state.cpp b/test/opengl/save_state.cpp deleted file mode 100644 index 574565775728..000000000000 --- a/test/opengl/save_state.cpp +++ /dev/null @@ -1,314 +0,0 @@ -#ifdef _WIN32 -#include -int main() { - printf("[SKIP] OpenGL on Windows is broken.\n"); - return 0; -} -#else - -#include "Halide.h" - -#include -#include -#include -#include - -#if __APPLE__ -// TODO: why are these deprecated? Can we update this test? -#define GL_SILENCE_DEPRECATION -#include -#else -#define GL_GLEXT_PROTOTYPES -#include -#endif - -// Generates an arbitrary program. -class Program { -public: - static GLuint handle() { - const char *vertexShader = " \ - attribute vec4 Position; \ - attribute vec2 TexCoordIn; \ - varying vec2 TexCoordOut; \ - void main(void) { \ - gl_Position = Position; \ - TexCoordOut = TexCoordIn; \ - }"; - - const char *fragmentShader = " \ - varying vec2 TexCoordOut; \ - uniform sampler2D Texture; \ - void main(void) { \ - gl_FragColor = texture2D(Texture, TexCoordOut); \ - }"; - - GLuint handle = glCreateProgram(); - glAttachShader(handle, compileShader("vertex", vertexShader, GL_VERTEX_SHADER)); - glAttachShader(handle, compileShader("fragment", fragmentShader, GL_FRAGMENT_SHADER)); - glLinkProgram(handle); - - GLint linkSuccess; - glGetProgramiv(handle, GL_LINK_STATUS, &linkSuccess); - if (linkSuccess == GL_FALSE) { - GLchar messages[256]; - glGetProgramInfoLog(handle, sizeof(messages), 0, messages); - fprintf(stderr, "Error linking program: %s\n", messages); - exit(1); - } - - return handle; - } - -private: - static GLuint compileShader(const char *label, const char *shaderString, GLenum shaderType) { - const GLuint handle = glCreateShader(shaderType); - const int len = strlen(shaderString); - glShaderSource(handle, 1, &shaderString, &len); - glCompileShader(handle); - GLint compileSuccess; - glGetShaderiv(handle, GL_COMPILE_STATUS, &compileSuccess); - if (compileSuccess == GL_FALSE) { - GLchar messages[256]; - glGetShaderInfoLog(handle, sizeof(messages), 0, messages); - fprintf(stderr, "Error compiling %s shader: %s\n", label, messages); - exit(1); - } - return handle; - } -}; - -// Encapsulates setting OpenGL's state to arbitrary values, and checking -// whether the state matches those values. -class KnownState { -private: - void gl_enable(GLenum cap, bool state) { - (state ? glEnable : glDisable)(cap); - } - - GLuint gl_gen(void (*fn)(GLsizei, GLuint *)) { - GLuint val; - (*fn)(1, &val); - return val; - } - - void check_value(const char *operation, const char *label, GLenum pname, GLint initial) { - GLint val; - glGetIntegerv(pname, &val); - if (val != initial) { - fprintf(stderr, "%s did not restore %s: initial value was %d (%#x), current value is %d (%#x)\n", operation, label, initial, initial, val, val); - errors = true; - } - } - - void check_value(const char *operation, const char *label, GLenum pname, GLenum initial) { - check_value(operation, label, pname, (GLint)initial); - } - - void check_value(const char *operation, const char *label, GLenum pname, GLint initial[], int n = 4) { - GLint val[2048]; - glGetIntegerv(pname, val); - for (int i = 0; i < n; i++) { - if (val[i] != initial[i]) { - fprintf(stderr, "%s did not restore %s: initial value was", operation, label); - for (int j = 0; j < n; j++) { - fprintf(stderr, " %d", initial[j]); - } - fprintf(stderr, ", current value is"); - for (int j = 0; j < n; j++) { - fprintf(stderr, " %d", val[j]); - } - fprintf(stderr, "\n"); - errors = true; - return; - } - } - } - - void check_value(const char *operation, const char *label, GLenum pname, bool initial) { - GLboolean val; - glGetBooleanv(pname, &val); - if (val != initial) { - fprintf(stderr, "%s did not restore boolean %s: initial value was %s, current value is %s\n", operation, label, initial ? "true" : "false", val ? "true" : "false"); - errors = true; - } - } - - void check_error(const char *label) { - GLenum err = glGetError(); - if (err != GL_NO_ERROR) { - fprintf(stderr, "Error setting %s: OpenGL error %#x\n", label, err); - errors = true; - } - } - - // version of OpenGL - int gl_major_version; - int gl_minor_version; - - GLenum initial_active_texture; - GLint initial_viewport[4]; - GLuint initial_array_buffer_binding; - GLuint initial_element_array_buffer_binding; - GLuint initial_current_program; - GLuint initial_framebuffer_binding; - static const int ntextures = 10; - GLuint initial_bound_textures[ntextures]; - bool initial_cull_face; - bool initial_depth_test; - - static const int nvertex_attribs = 10; - bool initial_vertex_attrib_array_enabled[nvertex_attribs]; - - // The next two functions are stolen from opengl.cpp - // and are used to parse the major/minor version of OpenGL - // to see if vertex array objects are supported - const char *parse_int(const char *str, int *val) { - int v = 0; - size_t i = 0; - while (str[i] >= '0' && str[i] <= '9') { - v = 10 * v + (str[i] - '0'); - i++; - } - if (i > 0) { - *val = v; - return &str[i]; - } - return nullptr; - } - - const char *parse_opengl_version(const char *str, int *major, int *minor) { - str = parse_int(str, major); - if (str == nullptr || *str != '.') { - return nullptr; - } - return parse_int(str + 1, minor); - } - - GLuint initial_vertex_array_binding; - -public: - bool errors{false}; - - // This sets most values to generated or arbitrary values, which the - // halide calls would be unlikely to accidentally use. But for boolean - // values, we want to be sure that halide is really restoring the - // initial value, not just setting it to true or false. So we need to - // be able to try both. - void setup(bool boolval) { - // parse the OpenGL version - const char *version = (const char *)glGetString(GL_VERSION); - parse_opengl_version(version, &gl_major_version, &gl_minor_version); - - glGenTextures(ntextures, initial_bound_textures); - for (int i = 0; i < ntextures; i++) { - glActiveTexture(GL_TEXTURE0 + i); - glBindTexture(GL_TEXTURE_2D, initial_bound_textures[i]); - } - glActiveTexture(initial_active_texture = GL_TEXTURE3); - - // Vertex array objects are only used by Halide if the OpenGL version >=3 - if (gl_major_version >= 3) { - glBindVertexArray(initial_vertex_array_binding = gl_gen(glGenVertexArrays)); - } - - for (int i = 0; i < nvertex_attribs; i++) { - if ((initial_vertex_attrib_array_enabled[i] = boolval)) { - glEnableVertexAttribArray(i); - } else { - glDisableVertexAttribArray(i); - } - char buf[256]; - sprintf(buf, "vertex attrib array %d state", i); - check_error(buf); - } - - glUseProgram(initial_current_program = Program::handle()); - glViewport(initial_viewport[0] = 111, initial_viewport[1] = 222, initial_viewport[2] = 333, initial_viewport[3] = 444); - gl_enable(GL_CULL_FACE, initial_cull_face = boolval); - gl_enable(GL_DEPTH_TEST, initial_depth_test = boolval); - glBindBuffer(GL_ARRAY_BUFFER, initial_array_buffer_binding = gl_gen(glGenBuffers)); - glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, initial_element_array_buffer_binding = gl_gen(glGenBuffers)); - glBindFramebuffer(GL_FRAMEBUFFER, initial_framebuffer_binding = gl_gen(glGenFramebuffers)); - - check_error("known state"); - } - - void check(const char *operation) { - check_value(operation, "ActiveTexture", GL_ACTIVE_TEXTURE, initial_active_texture); - check_value(operation, "current program", GL_CURRENT_PROGRAM, initial_current_program); - check_value(operation, "framebuffer binding", GL_FRAMEBUFFER_BINDING, initial_framebuffer_binding); - check_value(operation, "array buffer binding", GL_ARRAY_BUFFER_BINDING, initial_array_buffer_binding); - check_value(operation, "element array buffer binding", GL_ELEMENT_ARRAY_BUFFER_BINDING, initial_element_array_buffer_binding); - check_value(operation, "viewport", GL_VIEWPORT, initial_viewport); - check_value(operation, "GL_CULL_FACE", GL_CULL_FACE, initial_cull_face); - check_value(operation, "GL_DEPTH_TEST", GL_DEPTH_TEST, initial_cull_face); - - // Vertex array objects are only used by Halide if the OpenGL version >=3 - if (gl_major_version >= 3) { - check_value(operation, "vertex array binding", GL_VERTEX_ARRAY_BINDING, initial_vertex_array_binding); - } else { - fprintf(stderr, "Skipping vertex array binding tests because OpenGL version is %d.%d (<3.0)\n", gl_major_version, gl_minor_version); - } - - for (int i = 0; i < ntextures; i++) { - char buf[100]; - sprintf(buf, "bound texture (unit %d)", i); - glActiveTexture(GL_TEXTURE0 + i); - check_value(operation, buf, GL_TEXTURE_BINDING_2D, initial_bound_textures[i]); - } - - for (int i = 0; i < nvertex_attribs; i++) { - int initial = initial_vertex_attrib_array_enabled[i]; - GLint val; - glGetVertexAttribiv(i, GL_VERTEX_ATTRIB_ARRAY_ENABLED, &val); - if (val != initial) { - fprintf(stderr, "%s did not restore boolean VertexAttributeArrayEnabled(%d): initial value was %s, current value is %s\n", operation, i, initial ? "true" : "false", val ? "true" : "false"); - errors = true; - } - } - } -}; - -using namespace Halide; - -int main() { - // This test must be run with an OpenGL target. - const Target target = get_jit_target_from_environment().with_feature(Target::OpenGL); - - KnownState known_state; - - Buffer input(255, 10, 3); - Buffer out(UInt(8), 255, 10, 3); - - Var x, y, c; - Func g; - g(x, y, c) = input(x, y, c); - g.bound(c, 0, 3); - g.glsl(x, y, c); - g.realize(out, target); // let Halide initialize OpenGL - - known_state.setup(true); - g.realize(out, target); - known_state.check("realize"); - - known_state.setup(true); - out.copy_to_host(); - known_state.check("copy_to_host"); - - known_state.setup(false); - g.realize(out, target); - known_state.check("realize"); - - known_state.setup(false); - out.copy_to_host(); - known_state.check("copy_to_host"); - - if (known_state.errors) { - return 1; - } - - printf("Success!\n"); - return 0; -} - -#endif diff --git a/test/opengl/select.cpp b/test/opengl/select.cpp deleted file mode 100644 index f4c358b43e70..000000000000 --- a/test/opengl/select.cpp +++ /dev/null @@ -1,214 +0,0 @@ -#include "Halide.h" -#include - -#include "testing.h" - -using namespace Halide; - -int test_per_channel_select() { - - printf("Testing select of channel.\n"); - - // This test must be run with an OpenGL target. - const Target target = get_jit_target_from_environment().with_feature(Target::OpenGL); - - Func gpu("gpu"), cpu("cpu"); - Var x("x"), y("y"), c("c"); - - gpu(x, y, c) = cast(mux(c, {128, x, y, x * y})); - gpu.bound(c, 0, 4); - gpu.glsl(x, y, c); - gpu.compute_root(); - - cpu(x, y, c) = gpu(x, y, c); - - Buffer out(10, 10, 4); - cpu.realize(out, target); - - // Verify the result - if (!Testing::check_result(out, [&](int x, int y, int c) { - switch (c) { - case 0: return 128; - case 1: return x; - case 2: return y; - default: return x*y; - } })) { - return 1; - } - - return 0; -} - -int test_flag_scalar_select() { - - printf("Testing select of scalar value with flag.\n"); - - // This test must be run with an OpenGL target. - const Target target = get_jit_target_from_environment().with_feature(Target::OpenGL); - - Func gpu("gpu"), cpu("cpu"); - Var x("x"), y("y"), c("c"); - - int flag_value = 0; - - Param flag("flag"); - flag.set(flag_value); - - gpu(x, y, c) = cast(select(flag != 0, 128, - 255)); - gpu.bound(c, 0, 4); - gpu.glsl(x, y, c); - gpu.compute_root(); - - // This should trigger a copy_to_host operation - cpu(x, y, c) = gpu(x, y, c); - - Buffer out(10, 10, 4); - cpu.realize(out, target); - - // Verify the result - if (!Testing::check_result(out, [&](int x, int y, int c) { - return !flag_value ? 255 : 128; - })) { - return 1; - } - - return 0; -} - -int test_flag_pixel_select() { - - printf("Testing select of pixel value with flag.\n"); - - // This test must be run with an OpenGL target. - const Target target = get_jit_target_from_environment().with_feature(Target::OpenGL); - - Func gpu("gpu"), cpu("cpu"); - Var x("x"), y("y"), c("c"); - - int flag_value = 0; - - Param flag("flag"); - flag.set(flag_value); - - Buffer image(10, 10, 4); - for (int y = 0; y < image.height(); y++) { - for (int x = 0; x < image.width(); x++) { - for (int c = 0; c < image.channels(); c++) { - image(x, y, c) = 128; - } - } - } - - gpu(x, y, c) = cast(select(flag != 0, image(x, y, c), - 255)); - gpu.bound(c, 0, 4); - gpu.glsl(x, y, c); - gpu.compute_root(); - - // This should trigger a copy_to_host operation - cpu(x, y, c) = gpu(x, y, c); - - Buffer out(10, 10, 4); - cpu.realize(out, target); - - // Verify the result - if (!Testing::check_result(out, [&](int x, int y, int c) { - return !flag_value ? 255 : 128; - })) { - return 1; - } - - return 0; -} - -int test_nested_select() { - - printf("Testing nested select.\n"); - - // This test must be run with an OpenGL target. - const Target target = get_jit_target_from_environment().with_feature(Target::OpenGL); - - // Define the algorithm. - Var x("x"), y("y"), c("c"); - Func f("f"); - Expr temp = cast(select(x == 0, 1, 2)); - f(x, y, c) = select(y == 0, temp, 255 - temp); - - // Schedule f to run on the GPU. - const int channels = 3; - f.bound(c, 0, channels).glsl(x, y, c); - - // Generate the result. - const int width = 10, height = 10; - Buffer out = f.realize(width, height, channels, target); - - // Check the result. - int errors = 0; - out.for_each_element([&](int x, int y, int c) { - uint8_t temp = x == 0 ? 1 : 2; - uint8_t expected = y == 0 ? temp : 255 - temp; - uint8_t actual = out(x, y, c); - if (expected != actual && ++errors == 1) { - fprintf(stderr, "out(%d, %d, %d) = %d instead of %d\n", - x, y, c, actual, expected); - } - }); - - return errors; -} - -int test_nested_select_varying() { - - printf("Testing nested select with varying condition.\n"); - - // This test must be run with an OpenGL target. - const Target target = get_jit_target_from_environment().with_feature(Target::OpenGL); - - // Define the algorithm. - Var x("x"), y("y"), c("c"); - Func f("f"); - Expr temp = cast(select(x - c > 0, 1, 2)); - f(x, y, c) = select(y == 0, temp, 255 - temp); - - // Schedule f to run on the GPU. - const int channels = 3; - f.bound(c, 0, channels).glsl(x, y, c); - - // Generate the result. - const int width = 10, height = 10; - Buffer out = f.realize(width, height, channels, target); - - // Check the result. - int errors = 0; - out.for_each_element([&](int x, int y, int c) { - uint8_t temp = x - c > 0 ? 1 : 2; - uint8_t expected = y == 0 ? temp : 255 - temp; - uint8_t actual = out(x, y, c); - if (expected != actual && ++errors == 1) { - fprintf(stderr, "out(%d, %d, %d) = %d instead of %d\n", - x, y, c, actual, expected); - } - }); - - return errors; -} - -int main() { - - int err = 0; - - err |= test_per_channel_select(); - err |= test_flag_scalar_select(); - err |= test_flag_pixel_select(); - err |= test_nested_select(); - err |= test_nested_select_varying(); - - if (err) { - printf("FAILED\n"); - return 1; - } - - printf("Success!\n"); - return 0; -} diff --git a/test/opengl/set_pixels.cpp b/test/opengl/set_pixels.cpp deleted file mode 100644 index 7c282878af0b..000000000000 --- a/test/opengl/set_pixels.cpp +++ /dev/null @@ -1,28 +0,0 @@ -#include "Halide.h" -#include - -#include "testing.h" - -using namespace Halide; - -int main() { - // This test must be run with an OpenGL target. - const Target target = get_jit_target_from_environment().with_feature(Target::OpenGL); - - Func f; - Var x, y, c; - - f(x, y, c) = cast(42); - - Buffer out(10, 10, 3); - f.bound(c, 0, 3).glsl(x, y, c); - f.realize(out, target); - - out.copy_to_host(); - if (!Testing::check_result(out, [](int x, int y, int c) { return 42; })) { - return 1; - } - - printf("Success!\n"); - return 0; -} diff --git a/test/opengl/shifted_domains.cpp b/test/opengl/shifted_domains.cpp deleted file mode 100644 index 38e2e81b2771..000000000000 --- a/test/opengl/shifted_domains.cpp +++ /dev/null @@ -1,66 +0,0 @@ -#include "Halide.h" -#include - -#include "testing.h" - -using namespace Halide; - -// This test executes a simple kernel with a non-zero min value. The code is -// adapted from lesson_06_realizing_over_shifted_domains.cpp and scheduled for -// GLSL -int shifted_domains() { - - // This test must be run with an OpenGL target. - const Target target = get_jit_target_from_environment().with_feature(Target::OpenGL); - - int errors = 0; - - Func gradient("gradient"); - Var x("x"), y("y"), c("c"); - gradient(x, y, c) = cast(x + y); - - gradient.bound(c, 0, 1); - gradient.glsl(x, y, c); - - printf("Evaluating gradient from (0, 0) to (7, 7)\n"); - Buffer result(8, 8, 1); - gradient.realize(result, target); - result.copy_to_host(); - - if (!Testing::check_result(result, 5e-5f, [](int x, int y) { return float(x + y); })) - errors++; - - Buffer shifted(5, 7, 1); - shifted.set_min(100, 50); - - printf("Evaluating gradient from (100, 50) to (104, 56)\n"); - - gradient.realize(shifted, target); - shifted.copy_to_host(); - - if (!Testing::check_result(shifted, 5e-5f, [](int x, int y) { return float(x + y); })) - errors++; - - // Test with a negative min - shifted.set_min(-100, -50); - - printf("Evaluating gradient from (-100, -50) to (-96, -44)\n"); - - gradient.realize(shifted, target); - shifted.copy_to_host(); - - if (!Testing::check_result(shifted, 5e-5f, [](int x, int y) { return float(x + y); })) - errors++; - - return errors; -} - -int main() { - - if (shifted_domains() != 0) { - return 1; - } - - printf("Success!\n"); - return 0; -} diff --git a/test/opengl/special_funcs.cpp b/test/opengl/special_funcs.cpp deleted file mode 100644 index 677bf05a23c0..000000000000 --- a/test/opengl/special_funcs.cpp +++ /dev/null @@ -1,150 +0,0 @@ -#include "Halide.h" -#include -#include -#include -#include - -using namespace Halide; - -Var x, y, c; - -double square(double x) { - return x * x; -} - -template -void test_function(Expr e, Buffer &cpu_result, Buffer &gpu_result) { - Func cpu("cpu"), gpu("gpu"); - - Target cpu_target = get_host_target(); - Target gpu_target = get_host_target().with_feature(Target::OpenGL); - cpu(x, y, c) = e; - gpu(x, y, c) = e; - - cpu.realize(cpu_result, cpu_target); - - gpu.bound(c, 0, 3).glsl(x, y, c); - gpu.realize(gpu_result, gpu_target); - gpu_result.copy_to_host(); -} - -template -bool test_exact(Expr r, Expr g, Expr b) { - Expr e = cast(mux(c, {r, g, b})); - const int W = 256, H = 256; - Buffer cpu_result(W, H, 3); - Buffer gpu_result(W, H, 3); - test_function(e, cpu_result, gpu_result); - - for (int y = 0; y < gpu_result.height(); y++) { - for (int x = 0; x < gpu_result.width(); x++) { - if (!(gpu_result(x, y, 0) == cpu_result(x, y, 0) && - gpu_result(x, y, 1) == cpu_result(x, y, 1) && - gpu_result(x, y, 2) == cpu_result(x, y, 2))) { - std::cerr << "Incorrect pixel for " << e << " at (" << x << ", " << y << ")\n" - << " (" - << (int)gpu_result(x, y, 0) << ", " - << (int)gpu_result(x, y, 1) << ", " - << (int)gpu_result(x, y, 2) << ") != (" - << (int)cpu_result(x, y, 0) << ", " - << (int)cpu_result(x, y, 1) << ", " - << (int)cpu_result(x, y, 2) - << ")\n"; - return false; - } - } - } - return true; -} - -template -bool test_approx(Expr r, Expr g, Expr b, double rms_error) { - Expr e = cast(mux(c, {r, g, b})); - const int W = 256, H = 256; - Buffer cpu_result(W, H, 3); - Buffer gpu_result(W, H, 3); - test_function(e, cpu_result, gpu_result); - - double err = 0.0; - for (int y = 0; y < gpu_result.height(); y++) { - for (int x = 0; x < gpu_result.width(); x++) { - err += square(gpu_result(x, y, 0) - cpu_result(x, y, 0)); - err += square(gpu_result(x, y, 1) - cpu_result(x, y, 1)); - err += square(gpu_result(x, y, 2) - cpu_result(x, y, 2)); - } - } - err = sqrt(err / (W * H)); - if (err > rms_error) { - std::cerr << "RMS error too large for " << e << ": " - << err << " > " << rms_error << "\n"; - return false; - } else { - return true; - } -} - -int main() { - - int errors = 0; - - if (!test_exact(0, 0, 0)) { - printf("Failed constant value test\n"); - errors++; - } - if (!test_exact(clamp(x + y, 0, 255), 0, 0)) { - printf("Failed clamp test\n"); - errors++; - } - - if (!test_exact( - max(x, y), - cast(min(cast(x), cast(y))), - clamp(x, 0, 10))) { - printf("Failed min/max test\n"); - errors++; - } - - if (!test_exact(trunc(x + 0.25f), trunc(-(x + 0.75f)), 0.0f)) { - printf("Failed trunc test\n"); - errors++; - } - - // Trigonometric functions in GLSL are fast but not very accurate, - // especially outside of 0..2pi. - // The GLSL ES 1.0 spec does not define the precision of these operations - // so a wide error bound is used in this test. - Expr r = (256 * x + y) / ceilf(65536.f / (2 * 3.1415926536f)); - if (!test_approx(sin(r), cos(r), 0.0f, 5e-2)) { - errors++; - printf("Failed trigonometric test\n"); - } - - // TODO: the test must account for differences in default rounding behavior - // between the CPU and GPU for float <-> integer conversions. In this case - // the operation is performed in float in the GLSL shader, and then - // converted back to a normalized integer value. - if (!test_approx( - (x - 127) / 3 + 127, - (x - 127) % 3 + 127, - 0, - 1)) { - printf("Failed integer operation test\n"); - errors++; - } - - if (!test_exact( - lerp(cast(x), cast(y), cast(128)), - lerp(cast(x), cast(y), 0.5f), - cast(lerp(cast(x), cast(y), 0.2f)))) { - printf("Failed lerp test\n"); - errors++; - } - - if (errors == 0) { - printf("Success!\n"); - return 0; - } else { - printf("FAILED %d tests\n", errors); - return 1; - } -} diff --git a/test/opengl/sum_reduction.cpp b/test/opengl/sum_reduction.cpp deleted file mode 100644 index 97fd40d5905c..000000000000 --- a/test/opengl/sum_reduction.cpp +++ /dev/null @@ -1,46 +0,0 @@ -#include "Halide.h" -#include - -#include "testing.h" - -using namespace Halide; - -int main() { - // This test must be run with an OpenGL target. - const Target target = get_jit_target_from_environment().with_feature(Target::OpenGL); - - // Define the input - const int width = 10, height = 10, channels = 4; - Buffer input(width, height, channels); - input.fill([](int x, int y, int c) { - return float(x + y); - }); - - // Define the algorithm. - Var x, y, c; - RDom r(0, 5, "r"); - Func g; - Expr coordx = clamp(x + r, 0, input.width() - 1); - g(x, y, c) = cast(sum(input(coordx, y, c)) / sum(r) * 255.0f); - - // Schedule f and g to compute in separate passes on the GPU. - g.bound(c, 0, 4).glsl(x, y, c); - - // Generate the result. - Buffer result = g.realize(width, height, channels, target); - result.copy_to_host(); - - // Check the result. - if (!Testing::check_result(result, 1e-3f, [&](int x, int y, int c) { - float temp = 0.0f; - for (int r = 0; r < 5; r++) { - temp += input(std::min(x + r, input.width() - 1), y, c); - } - return temp / 10.0f * 255.0f; - })) { - return 1; - } - - printf("Success!\n"); - return 0; -} diff --git a/test/opengl/sumcolor_reduction.cpp b/test/opengl/sumcolor_reduction.cpp deleted file mode 100644 index 6532376061be..000000000000 --- a/test/opengl/sumcolor_reduction.cpp +++ /dev/null @@ -1,40 +0,0 @@ -#include "Halide.h" -#include - -#include "testing.h" - -using namespace Halide; - -int main() { - // This test must be run with an OpenGL target. - const Target target = get_jit_target_from_environment().with_feature(Target::OpenGL); - - // Define the input. - const int width = 10, height = 10, channels = 3; - Buffer input(width, height, channels); - input.fill([](int x, int y, int c) { - return x + y; - }); - - // Define the algorithm. - Var x, y, c; - RDom r(0, 3, "r"); - Func g; - - g(x, y, c) = sum(input(x, y, r)); - - // Schedule f and g to compute in separate passes on the GPU. - g.bound(c, 0, 3).glsl(x, y, c); - - // Generate the result. - Buffer result = g.realize(10, 10, 3, target); - result.copy_to_host(); - - // Check the result. - if (!Testing::check_result(result, 1e-6f, [](int x, int y, int c) { return 3.0f * (x + y); })) { - return 1; - } - - printf("Success!\n"); - return 0; -} diff --git a/test/opengl/testing.h b/test/opengl/testing.h deleted file mode 100644 index 860ea55c172c..000000000000 --- a/test/opengl/testing.h +++ /dev/null @@ -1,86 +0,0 @@ -#ifndef _TESTING_H_ -#define _TESTING_H_ - -#include "Halide.h" -#include -#include -#include -#include - -namespace Testing { - -template -bool neq(T a, T b, T tol) { - return std::abs(a - b) > tol; -} - -// Check 3-dimension buffer -template -auto check_result(const Halide::Buffer &buf, T tol, F f) -> decltype(std::declval()(0, 0, 0), bool()) { - class err : std::exception { - public: - static void vector(const std::vector &v) { - for (size_t i = 0; i < v.size(); i++) { - if (i > 0) { - std::cerr << ","; - } - std::cerr << +v[i]; // use unary + to promote uint8_t from char to numeric - } - } - }; - try { - buf.for_each_element([&](int x, int y) { - std::vector expected; - std::vector result; - for (int c = 0; c < buf.channels(); c++) { - expected.push_back(f(x, y, c)); - result.push_back(buf(x, y, c)); - } - for (int c = 0; c < buf.channels(); c++) { - if (neq(result[c], expected[c], tol)) { - std::cerr << "Error: result ("; - err::vector(result); - std::cerr << ") should be ("; - err::vector(expected); - std::cerr << ") at x=" << x << " y=" << y << "\n"; - throw err(); - } - } - }); - } catch (err &) { - return false; - } - return true; -} - -// Check 2-dimension buffer -template -auto check_result(const Halide::Buffer &buf, T tol, F f) -> decltype(std::declval()(0, 0), bool()) { - class err : std::exception {}; - try { - buf.for_each_element([&](int x, int y) { - const T expected = f(x, y); - const T result = buf(x, y); - if (neq(result, expected, tol)) { - std::cerr << "Error: result ("; - std::cerr << +result; - std::cerr << ") should be ("; - std::cerr << +expected; - std::cerr << ") at x=" << x << " y=" << y << "\n"; - throw err(); - } - }); - } catch (err &) { - return false; - } - return true; -} - -// Shorthand to check with tolerance=0 -template -bool check_result(const Halide::Buffer &buf, Func f) { - return check_result(buf, 0, f); -} -} // namespace Testing - -#endif // _TESTING_H_ diff --git a/test/opengl/tuples.cpp b/test/opengl/tuples.cpp deleted file mode 100644 index b4a834ffd1ca..000000000000 --- a/test/opengl/tuples.cpp +++ /dev/null @@ -1,40 +0,0 @@ -#include "Halide.h" -#include - -#include "testing.h" - -using namespace Halide; - -int main() { - // This test must be run with an OpenGL target. - const Target target = get_jit_target_from_environment().with_feature(Target::OpenGL); - - Buffer input(255, 10, 3); - input.fill([](int x, int y, int c) { - return 10 * x + y + c; - }); - - Var x, y, c; - Func g; - g(x, y, c) = {input(x, y, c), input(x, y, c) / 2}; - - // h will be an opengl stage with tuple input. Tuple outputs - // aren't supported because OpenGL ES 2.0 doesn't support multiple - // output textures. - Func h; - h(x, y, c) = min(g(x, y, c)[0], g(x, y, c)[1]); - - Buffer out(255, 10, 3); - g.compute_root(); - h.compute_root().bound(c, 0, 3).glsl(x, y, c); - - h.realize(out, target); - out.copy_to_host(); - - if (!Testing::check_result(out, [&](int x, int y, int c) { return input(x, y, c) / 2; })) { - return 1; - } - - printf("Success!\n"); - return 0; -} diff --git a/test/opengl/vagrant/.gitignore b/test/opengl/vagrant/.gitignore deleted file mode 100644 index 8000dd9db47c..000000000000 --- a/test/opengl/vagrant/.gitignore +++ /dev/null @@ -1 +0,0 @@ -.vagrant diff --git a/test/opengl/vagrant/README.md b/test/opengl/vagrant/README.md deleted file mode 100644 index febf3be7d1ad..000000000000 --- a/test/opengl/vagrant/README.md +++ /dev/null @@ -1,136 +0,0 @@ -# Testing OpenGL on Ubuntu 14 & 16 using vagrant & VirtualBox - -## Overview - -This subdirectory (`Halide/test/opengl/vagrant`) provides the setup to build -Halide and run the OpenGL tests headlessly on Ubuntu 14.04 and/or 16.04, running -virtually under [vagrant](http://vagrantup.com) and -[VirtualBox](https://www.virtualbox.org). - -This is intended in particular for use by those who develop Halide's OpenGL -back-end on OS X and need to test on Linux. - -The `Vagrantfile` provisions with the necessary capabilities to build Halide and -build & run Halide's OpenGL test suite. In particular it installs llvm-3.8 and -OpenGL with software rendering to a dummy X server. - -## Quick instructions - -Presuming that you have [vagrant](http://vagrantup.com) and -[VirtualBox](https://www.virtualbox.org) installed, - -``` -$ cd Halide/test/opengl/vagrant -$ vagrant up [u14|u16] -[...] -$ vagrant ssh [u14|u16] -c "sh /vagrant/build_tests.sh" -[...] -``` - -The `[u14|u16]` argument is optional, the default is `u16` to use the Ubuntu -16.04 virtual machine. Specify `u14` to use the Ubuntu 14.04 macihne. - -After a bit of time and a lot of verbiage, you should eventually see the `make` -output for building and running the OpenGL tests - -## Detailed instructions - -### Starting and provisioning the virtual machine(s) - -As per above, you can start the machines using - -``` -$ cd Halide/test/opengl/vagrant -$ vagrant up [u14|u16] -[...] -``` - -The first time you run it for a given machine, it will download the necessary -base box, then boot and provision the machine. This will take several minutes. - -You may notice some errors or warnings in the output of `vagrant up`'s -provisioning; these can be safely ignored. (In particular for `u16` the output -ends with `ttyname failed: Inappropriate ioctl for device` which looks omnious -but is harmless.) - -As usual, you can stop or power down the machine using -`vagrant suspend [u14|u16]` or `vagrant halt [u14|u16]`; subsequently starting -it up again using `vagrant up [u14|u16]` should be reasonably quick. For more -info, see the `vagrant help` or the [vagrant](http://vagrantup.com) docs. - -See the `Vagrantfile` for the specific details of what gets provisioned. - -### Building Halide and running the tests - -The virtual machine has these directories live-shared with the host: - -- `/Halide` - The root of your Halide source tree -- `/vagrant` - The vagrant work directory. I.e. effectively a hard link to - `/Halide/test/opengl/vagrant` - -Because these are live shared, you can edit Halide source files on your host -machine but build them on the virtual machine. - -The script `build_tests.sh`, run on the virtual machine, is just a quick -shorthand to minimize the amount of typing, letting you build and run everything -at once from the host via - -``` -$ vagrant ssh [u14|u16] -c "sh /vagrant/build_tests.sh" -``` - -But of course for more focused development & debugging you might want to do -things one step at a time: - -``` -$ vagrant ssh [u14|u16] -[...Ubuntu motd...] -vagrant@vagrant:~$ -``` - -These are the steps taken by `build_tests.sh`: - -#### 1. Create an out-of-tree build directory - -``` -vagrant@vagrant:~$ mkdir ~/halide_build -vagrant@vagrant:~$ cd ~/halide_build -vagrant@vagrant:~/halide_build$ ln -s /Halide/Makefile . -``` - -It's important to build out-of-tree, because `/Halide` tree is live shared and -we don't want the virtual machine's object files to clobber the host object -files! - -#### 2. Build Halide - -Nothing special here, just build normally, e.g.: - -``` -vagrant@vagrant:~/halide_build$ make -j 3 -``` - -The machine is provisioned with environment variables `LLVM_CONFIG` globally set -appropriately. - -#### 3. Build & run the OpenGL tests - -Again nothing special here, just build the opengl tests normally, e.g.: - -``` -vagrant@vagrant:~/halide_build$ make -k test_opengl -``` - -Or of course you can build and run just one test, e.g.: - -``` -vagrant@vagrant:~/halide_build$ make opengl_float_texture -``` - -The machine is provisioned with environment variables `HL_TARGET` and -`HL_JIT_TARGET` set to `host-opengl`. You can of course override in your shell, -e.g. if you want to use `host-opengl-debug`. - -The machine is provisioned with `lldb` installed in case you need to do some -debugging. Aside from that it's bare-bones; if you need anything else for your -debugging or development you will need to `apt-get install` it. diff --git a/test/opengl/vagrant/Vagrantfile b/test/opengl/vagrant/Vagrantfile deleted file mode 100644 index 5d7fef1a0afe..000000000000 --- a/test/opengl/vagrant/Vagrantfile +++ /dev/null @@ -1,118 +0,0 @@ -# -*- mode: ruby -*- -# vi: set ft=ruby : - -# All Vagrant configuration is done below. The "2" in Vagrant.configure -# configures the configuration version (we support older styles for -# backwards compatibility). Please don't change it unless you know what -# you're doing. -Vagrant.configure("2") do |config| - # The most common configuration options are documented and commented below. - # For a complete reference, please see the online documentation at - # https://docs.vagrantup.com. - - # Every Vagrant development environment requires a box. You can search for - # boxes at https://atlas.hashicorp.com/search. - config.vm.define "u14", autostart: false do |u14| - u14.vm.box = "bento/ubuntu-14.04" - u14.vm.provision "shell", inline: <<-SHELL - # Create and start headless X service using upstart - cp /vagrant/provision/etc/init/xdummy.conf /etc/init/ - service xdummy start - SHELL - end - config.vm.define "u16", primary: true do |u16| - u16.vm.box = "bento/ubuntu-16.04" - u16.vm.provision "shell", inline: <<-SHELL - # Create and start headless X service using systemd - cp /vagrant/provision/etc/systemd/system/xdummy.service /etc/systemd/system/ - systemctl start xdummy - SHELL - end - - config.vm.boot_timeout = 600 - - - # Disable automatic box update checking. If you disable this, then - # boxes will only be checked for updates when the user runs - # `vagrant box outdated`. This is not recommended. - # config.vm.box_check_update = false - - # Create a forwarded port mapping which allows access to a specific port - # within the machine from a port on the host machine. In the example below, - # accessing "localhost:8080" will access port 80 on the guest machine. - # config.vm.network "forwarded_port", guest: 80, host: 8080 - - # Create a private network, which allows host-only access to the machine - # using a specific IP. - # config.vm.network "private_network", ip: "192.168.33.10" - - # Create a public network, which generally matched to bridged network. - # Bridged networks make the machine appear as another physical device on - # your network. - # config.vm.network "public_network" - - # Share an additional folder to the guest VM. The first argument is - # the path on the host to the actual folder. The second argument is - # the path on the guest to mount the folder. And the optional third - # argument is a set of non-required options. - # config.vm.synced_folder "../data", "/vagrant_data" - config.vm.synced_folder "../../..", "/Halide" - - # Provider-specific configuration so you can fine-tune various - # backing providers for Vagrant. These expose provider-specific options. - # Example for VirtualBox: - # - config.vm.provider "virtualbox" do |vb| - # Display the VirtualBox GUI when booting the machine - vb.gui = false - - # Customize the amount of memory on the VM: - vb.memory = "2048" - end - # - # View the documentation for the provider you are using for more - # information on available options. - - # Define a Vagrant Push strategy for pushing to Atlas. Other push strategies - # such as FTP and Heroku are also available. See the documentation at - # https://docs.vagrantup.com/v2/push/atlas.html for more information. - # config.push.define "atlas" do |push| - # push.app = "YOUR_ATLAS_USERNAME/YOUR_APPLICATION_NAME" - # end - - # Enable provisioning with a shell script. Additional provisioners such as - # Puppet, Chef, Ansible, Salt, and Docker are also available. Please see the - # documentation for more information about their specific syntax and use. - config.vm.provision "shell", inline: <<-SHELL - - # Global environment variables, both for system purposes (PATH, LC_ALL) - # and for convenience of building and running Halide opengl tests - # (DISPLAY, LLVM_CONFIG, CLANG, HL_JIT_TARGET) - cp /vagrant/provision/etc/environment /etc/environment - - apt-get update - - # Install resources for headless X service (final provisioning of the service is machine-specific) - apt-get install -y xserver-xorg-video-dummy - cp /vagrant/provision/usr/share/X11/xorg.conf.d/xdummy.conf /usr/share/X11/xorg.conf.d/xdummy.conf - - # Install llvm-3.8 as llvm - apt-get install -y llvm-3.8 llvm-3.8-dev clang-3.8 lldb-3.8 - for ll in /usr/bin/*-3.8 ; do ln -s -f $ll `echo $ll | sed -e s/-3.8//` ; done - - # Build OpenGL (mesa) using software driver (gallium / llvmpipe). Can't - # use the prebuilt mesa packages because they expect video hardware drivers. - apt-get install -y build-essential scons python-mako flex bison zlib1g-dev libudev-dev pkg-config libx11-dev libxext-dev libxdamage-dev x11proto-gl-dev libx11-xcb-dev - cd /usr/local/src - test -f mesa-12.0.2.tar.xz || wget -q https://mesa.freedesktop.org/archive/12.0.2/mesa-12.0.2.tar.xz - test -d mesa-12.0.2 || tar xkf mesa-12.0.2.tar.xz - cd mesa-12.0.2 - scons build=release texture_float=yes libgl-xlib - ln -s -f `pwd`/include/GL* /usr/local/include/ - cp `pwd`/build/linux-x86_64/gallium/targets/libgl-xlib/libGL.* /usr/local/lib - ldconfig - - # Machine-specific provisioning will happpen next - SHELL - -end diff --git a/test/opengl/vagrant/build_tests.sh b/test/opengl/vagrant/build_tests.sh deleted file mode 100755 index 54dec279c28a..000000000000 --- a/test/opengl/vagrant/build_tests.sh +++ /dev/null @@ -1,6 +0,0 @@ -#!/bin/sh -x -mkdir -p ~/halide_build -cd ~/halide_build -ln -s -f /Halide/Makefile . -make -j 3 -make -k test_opengl diff --git a/test/opengl/vagrant/provision/etc/environment b/test/opengl/vagrant/provision/etc/environment deleted file mode 100644 index 2ab25818fbad..000000000000 --- a/test/opengl/vagrant/provision/etc/environment +++ /dev/null @@ -1,7 +0,0 @@ -PATH="/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/games:/usr/local/games" -LC_ALL=C -DISPLAY=:0.0 -LLVM_CONFIG=/usr/bin/llvm-config-3.8 -CLANG=/usr/bin/clang-3.8 -HL_TARGET=host-opengl -HL_JIT_TARGET=host-opengl diff --git a/test/opengl/vagrant/provision/etc/init/xdummy.conf b/test/opengl/vagrant/provision/etc/init/xdummy.conf deleted file mode 100644 index da5925809f41..000000000000 --- a/test/opengl/vagrant/provision/etc/init/xdummy.conf +++ /dev/null @@ -1,7 +0,0 @@ -description "Dummy X server providing DISPLAY=:0.0" - -expect fork - -script - /usr/bin/Xorg -noreset +extension GLX +extension RANDR +extension RENDER -logfile /var/log/Xorg.log :0 & -end script diff --git a/test/opengl/vagrant/provision/etc/systemd/system/xdummy.service b/test/opengl/vagrant/provision/etc/systemd/system/xdummy.service deleted file mode 100644 index 8d0ce1a3c4d9..000000000000 --- a/test/opengl/vagrant/provision/etc/systemd/system/xdummy.service +++ /dev/null @@ -1,6 +0,0 @@ -[Unit] -Description=Dummy X server providing DISPLAY=:0.0" - -[Service] -Type=simple -ExecStart=/usr/bin/Xorg -noreset +extension GLX +extension RANDR +extension RENDER -config /dev/null -logfile /var/log/Xorg.log :0 diff --git a/test/opengl/vagrant/provision/usr/share/X11/xorg.conf.d/xdummy.conf b/test/opengl/vagrant/provision/usr/share/X11/xorg.conf.d/xdummy.conf deleted file mode 100644 index d31d944c32f3..000000000000 --- a/test/opengl/vagrant/provision/usr/share/X11/xorg.conf.d/xdummy.conf +++ /dev/null @@ -1,137 +0,0 @@ -# This xorg configuration file is meant to be used by xpra -# to start a dummy X11 server. -# For details, please see: -# https://xpra.org/Xdummy.html - -Section "ServerFlags" - Option "DontVTSwitch" "true" - Option "AllowMouseOpenFail" "true" - Option "PciForceNone" "true" - Option "AutoEnableDevices" "false" - Option "AutoAddDevices" "false" -EndSection - -Section "InputDevice" - Identifier "dummy_mouse" - Option "CorePointer" "true" - Driver "void" -EndSection - -Section "InputDevice" - Identifier "dummy_keyboard" - Option "CoreKeyboard" "true" - Driver "void" -EndSection - -Section "Device" - Identifier "dummy_videocard" - Driver "dummy" - Option "ConstantDPI" "true" - #VideoRam 4096000 - #VideoRam 256000 - VideoRam 192000 -EndSection - -Section "Monitor" - Identifier "dummy_monitor" - HorizSync 5.0 - 1000.0 - VertRefresh 5.0 - 200.0 - #This can be used to get a specific DPI, but only for the default resolution: - #DisplaySize 508 317 - #NOTE: the highest modes will not work without increasing the VideoRam - # for the dummy video card. - Modeline "32768x32768" 15226.50 32768 35800 39488 46208 32768 32771 32781 32953 - Modeline "32768x16384" 7516.25 32768 35544 39192 45616 16384 16387 16397 16478 - Modeline "16384x8192" 2101.93 16384 16416 24400 24432 8192 8390 8403 8602 - Modeline "8192x4096" 424.46 8192 8224 9832 9864 4096 4195 4202 4301 - Modeline "5496x1200" 199.13 5496 5528 6280 6312 1200 1228 1233 1261 - Modeline "5280x1080" 169.96 5280 5312 5952 5984 1080 1105 1110 1135 - Modeline "5280x1200" 191.40 5280 5312 6032 6064 1200 1228 1233 1261 - Modeline "5120x3200" 199.75 5120 5152 5904 5936 3200 3277 3283 3361 - Modeline "4800x1200" 64.42 4800 4832 5072 5104 1200 1229 1231 1261 - Modeline "3840x2880" 133.43 3840 3872 4376 4408 2880 2950 2955 3025 - Modeline "3840x2560" 116.93 3840 3872 4312 4344 2560 2622 2627 2689 - Modeline "3840x2048" 91.45 3840 3872 4216 4248 2048 2097 2101 2151 - Modeline "3840x1080" 100.38 3840 3848 4216 4592 1080 1081 1084 1093 - Modeline "3600x1200" 106.06 3600 3632 3984 4368 1200 1201 1204 1214 - Modeline "3288x1080" 39.76 3288 3320 3464 3496 1080 1106 1108 1135 - Modeline "2048x2048" 49.47 2048 2080 2264 2296 2048 2097 2101 2151 - Modeline "2048x1536" 80.06 2048 2104 2312 2576 1536 1537 1540 1554 - Modeline "2560x1600" 47.12 2560 2592 2768 2800 1600 1639 1642 1681 - Modeline "2560x1440" 42.12 2560 2592 2752 2784 1440 1475 1478 1513 - Modeline "1920x1440" 69.47 1920 1960 2152 2384 1440 1441 1444 1457 - Modeline "1920x1200" 26.28 1920 1952 2048 2080 1200 1229 1231 1261 - Modeline "1920x1080" 23.53 1920 1952 2040 2072 1080 1106 1108 1135 - Modeline "1680x1050" 20.08 1680 1712 1784 1816 1050 1075 1077 1103 - Modeline "1600x1200" 22.04 1600 1632 1712 1744 1200 1229 1231 1261 - Modeline "1600x900" 33.92 1600 1632 1760 1792 900 921 924 946 - Modeline "1440x900" 30.66 1440 1472 1584 1616 900 921 924 946 - ModeLine "1366x768" 72.00 1366 1414 1446 1494 768 771 777 803 - Modeline "1280x1024" 31.50 1280 1312 1424 1456 1024 1048 1052 1076 - Modeline "1280x800" 24.15 1280 1312 1400 1432 800 819 822 841 - Modeline "1280x768" 23.11 1280 1312 1392 1424 768 786 789 807 - Modeline "1360x768" 24.49 1360 1392 1480 1512 768 786 789 807 - Modeline "1024x768" 18.71 1024 1056 1120 1152 768 786 789 807 - Modeline "768x1024" 19.50 768 800 872 904 1024 1048 1052 1076 - - - #common resolutions for android devices (both orientations): - Modeline "800x1280" 25.89 800 832 928 960 1280 1310 1315 1345 - Modeline "1280x800" 24.15 1280 1312 1400 1432 800 819 822 841 - Modeline "720x1280" 30.22 720 752 864 896 1280 1309 1315 1345 - Modeline "1280x720" 27.41 1280 1312 1416 1448 720 737 740 757 - Modeline "768x1024" 24.93 768 800 888 920 1024 1047 1052 1076 - Modeline "1024x768" 23.77 1024 1056 1144 1176 768 785 789 807 - Modeline "600x1024" 19.90 600 632 704 736 1024 1047 1052 1076 - Modeline "1024x600" 18.26 1024 1056 1120 1152 600 614 617 631 - Modeline "536x960" 16.74 536 568 624 656 960 982 986 1009 - Modeline "960x536" 15.23 960 992 1048 1080 536 548 551 563 - Modeline "600x800" 15.17 600 632 688 720 800 818 822 841 - Modeline "800x600" 14.50 800 832 880 912 600 614 617 631 - Modeline "480x854" 13.34 480 512 560 592 854 873 877 897 - Modeline "848x480" 12.09 848 880 920 952 480 491 493 505 - Modeline "480x800" 12.43 480 512 552 584 800 818 822 841 - Modeline "800x480" 11.46 800 832 872 904 480 491 493 505 - #resolutions for android devices (both orientations) - #minus the status bar - #38px status bar (and width rounded up) - Modeline "800x1242" 25.03 800 832 920 952 1242 1271 1275 1305 - Modeline "1280x762" 22.93 1280 1312 1392 1424 762 780 783 801 - Modeline "720x1242" 29.20 720 752 856 888 1242 1271 1276 1305 - Modeline "1280x682" 25.85 1280 1312 1408 1440 682 698 701 717 - Modeline "768x986" 23.90 768 800 888 920 986 1009 1013 1036 - Modeline "1024x730" 22.50 1024 1056 1136 1168 730 747 750 767 - Modeline "600x986" 19.07 600 632 704 736 986 1009 1013 1036 - Modeline "1024x562" 17.03 1024 1056 1120 1152 562 575 578 591 - Modeline "536x922" 16.01 536 568 624 656 922 943 947 969 - Modeline "960x498" 14.09 960 992 1040 1072 498 509 511 523 - Modeline "600x762" 14.39 600 632 680 712 762 779 783 801 - Modeline "800x562" 13.52 800 832 880 912 562 575 578 591 - Modeline "480x810" 12.59 480 512 552 584 810 828 832 851 - Modeline "848x442" 11.09 848 880 920 952 442 452 454 465 - Modeline "480x762" 11.79 480 512 552 584 762 779 783 801 -EndSection - -Section "Screen" - Identifier "dummy_screen" - Device "dummy_videocard" - Monitor "dummy_monitor" - DefaultDepth 24 - SubSection "Display" - Viewport 0 0 - Depth 24 - #Modes "32768x32768" "32768x16384" "16384x8192" "8192x4096" "5120x3200" "3840x2880" "3840x2560" "3840x2048" "2048x2048" "2560x1600" "1920x1440" "1920x1200" "1920x1080" "1600x1200" "1680x1050" "1600x900" "1400x1050" "1440x900" "1280x1024" "1366x768" "1280x800" "1024x768" "1024x600" "800x600" "320x200" - Modes "5120x3200" "3840x2880" "3840x2560" "3840x2048" "2048x2048" "2560x1600" "1920x1440" "1920x1200" "1920x1080" "1600x1200" "1680x1050" "1600x900" "1400x1050" "1440x900" "1280x1024" "1366x768" "1280x800" "1024x768" "1024x600" "800x600" "320x200" - #Virtual 32000 32000 - #Virtual 16384 8192 - Virtual 8192 4096 - #Virtual 5120 3200 - EndSubSection -EndSection - -Section "ServerLayout" - Identifier "dummy_layout" - Screen "dummy_screen" - InputDevice "dummy_mouse" - InputDevice "dummy_keyboard" -EndSection diff --git a/test/opengl/varying.cpp b/test/opengl/varying.cpp deleted file mode 100644 index 314136215c94..000000000000 --- a/test/opengl/varying.cpp +++ /dev/null @@ -1,218 +0,0 @@ -#include "Halide.h" -#include - -#include "testing.h" - -using namespace Halide; -using namespace Halide::Internal; - -// This test exercises several use cases for the GLSL varying attributes -// feature. This feature detects expressions that are linear in terms of the -// loop variables of a .glsl(..) scheduled Func and uses graphics pipeline -// interpolation to evaluate the expressions instead of evaluating them per -// fragment in the Halide generated fragment shader. Common examples are texture -// coordinates interpolated across a Func domain or texture coordinates -// transformed by a matrix and interpolated across the domain. Both cases arise -// when GLSL shaders are ported to Halide. - -// This is a mutator that injects code that counts the number of variables -// tagged .varying -#ifdef _MSC_VER -#define DLLEXPORT __declspec(dllexport) -#else -#define DLLEXPORT -#endif - -// This global variable is used to count the number of unique varying attribute -// variables that appear in the lowered Halide IR. -std::set varyings; - -// This function is a HalideExtern used to add variables to the set. The tests -// below check the total number of unique variables found--not the specific -// names of the variables which are arbitrary. -extern "C" DLLEXPORT const Variable *record_varying(const Variable *op) { - if (varyings.find(op->name) == varyings.end()) { - fprintf(stderr, "Found varying attribute: %s\n", op->name.c_str()); - varyings.insert(op->name); - } - return op; -} -HalideExtern_1(const Variable *, record_varying, const Variable *); - -// This visitor inserts the above function in the IR tree. -class CountVarying : public IRMutator { - using IRMutator::visit; - - Expr visit(const Variable *op) override { - Expr expr = IRMutator::visit(op); - if (ends_with(op->name, ".varying")) { - expr = record_varying(op); - } - return expr; - } -}; - -bool perform_test(const char *label, const Target target, Func f, int expected_nvarying, float tol, std::function expected_val) { - fprintf(stderr, "%s\n", label); - - Buffer out(8, 8, 3); - - varyings.clear(); - f.add_custom_lowering_pass(new CountVarying); - f.realize(out, target); - - // Check for the correct number of varying attributes - if ((int)varyings.size() != expected_nvarying) { - fprintf(stderr, - "%s: Error: wrong number of varying attributes: %d should be %d\n", - label, (int)varyings.size(), expected_nvarying); - return false; - } - - // Check for correct result values - out.copy_to_host(); - - if (!Testing::check_result(out, tol, expected_val)) { - return false; - } - - fprintf(stderr, "%s Passed!\n", label); - return true; -} - -// This is a simple test case where there are two expressions that are not -// linearly varying in terms of a loop variable and one expression that is. -bool test0(const Target target, Var &x, Var &y, Var &c) { - float p_value = 8.0f; - Param p("p"); - p.set(p_value); - - Func f0("f0"); - f0(x, y, c) = mux(c, {4.0f, // Constant term - p * 10.0f, // Linear expression not in terms of a loop parameter - cast(x) * 100.0f}); // Linear expression in terms of x - - f0.bound(c, 0, 3); - f0.glsl(x, y, c); - return perform_test("Test0", target, f0, 2, 0.0f, [&](int x, int y, int c) { - switch (c) { - case 0: return 4.0f; - case 1: return p_value * 10.0f; - default: return static_cast(x) * 100.0f; - } }); -} - -struct CoordXform { - const float th = 3.141592f / 8.0f; - const float s_th = sinf(th); - const float c_th = cosf(th); - const float m[6] = { - c_th, -s_th, 0.0f, - s_th, c_th, 0.0f}; - Param m0, m1, m2, m3, m4, m5; - CoordXform() - : m0("m0"), m1("m1"), m2("m2"), m3("m3"), m4("m4"), m5("m5") { - m0.set(m[0]); - m1.set(m[1]); - m2.set(m[2]); - m3.set(m[3]); - m4.set(m[4]); - m5.set(m[5]); - } -}; - -// This is a more complicated test case where several expressions are linear -// in all of the loop variables. This is the coordinate transformation case -bool test1(const Target target, Var &x, Var &y, Var &c) { - struct CoordXform m; - Func f1("f1"); - f1(x, y, c) = mux(c, {m.m0 * x + m.m1 * y + m.m2, - m.m3 * x + m.m4 * y + m.m5, - 1.0f}); - - f1.bound(c, 0, 3); - f1.glsl(x, y, c); - - return perform_test("Test1", target, f1, 4, 0.000001f, [&](int x, int y, int c) { - switch (c) { - case 0: return m.m[0] * x + m.m[1] * y + m.m[2]; - case 1: return m.m[3] * x + m.m[4] * y + m.m[5]; - default: return 1.0f; - } }); -} - -// The feature is supposed to find linearly varying sub-expressions as well -// so for example, if the above expressions are wrapped in a non-linear -// function like sqrt, they should still be extracted. -bool test2(const Target target, Var &x, Var &y, Var &c) { - struct CoordXform m; - Func f2("f2"); - f2(x, y, c) = mux(c, {sqrt(m.m0 * x + m.m1 * y + m.m2), - sqrt(m.m3 * x + m.m4 * y + m.m5), - 1.0f}); - f2.bound(c, 0, 3); - f2.glsl(x, y, c); - - return perform_test("Test2", target, f2, 4, 0.000001f, [&](int x, int y, int c) { - switch (c) { - case 0: return sqrtf(m.m[0] * x + m.m[1] * y + m.m[2]); - case 1: return sqrtf(m.m[3] * x + m.m[4] * y + m.m[5]); - default: return 1.0f; - } }); -} - -// This case tests a large expression linearly varying in terms of a loop -// variable -bool test3(const Target target, Var &x, Var &y, Var &c) { - float p_value = 8.0f; - Param p("p"); - p.set(p_value); - Expr foo = p; - for (int i = 0; i < 10; i++) { - foo = foo + foo + foo; - } - foo = x + foo; - - float foo_value = p_value; - for (int i = 0; i < 10; i++) { - foo_value = foo_value + foo_value + foo_value; - } - - Func f3("f3"); - f3(x, y, c) = mux(c, {foo, 1.0f, 2.0f}); - - f3.bound(c, 0, 3); - f3.glsl(x, y, c); - - return perform_test("Test3", target, f3, 2, 0.000001f, [&](int x, int y, int c) { - switch (c) { - case 0: return (float)x + foo_value; - case 1: return 1.0f; - default: return 2.0f; - } }); -} - -int main() { - // This test must be run with an OpenGL target. - const Target target = get_jit_target_from_environment().with_feature(Target::OpenGL); - - Var x("x"); - Var y("y"); - Var c("c"); - - bool pass = true; - pass &= test0(target, x, y, c); - pass &= test1(target, x, y, c); - pass &= test2(target, x, y, c); - pass &= test3(target, x, y, c); - if (!pass) { - return 1; - } - - // The test will return early on error. - fprintf(stderr, "Success!\n"); - - // This test may abort with the message "Failed to free device buffer" due - // to https://github.com/halide/Halide/issues/559 - return 0; -}