Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
46 commits
Select commit Hold shift + click to select a range
d6f6053
Revert "Revert "Make context handling in GPU runtimes more consistent…
Dec 7, 2020
f8df8eb
Revert "Revert "Fix broken destroy_context() in gpu_multi_context_thr…
Dec 7, 2020
805e14b
Solve the COMDAT in runtime failing on Mac OS X problem once and for …
Dec 8, 2020
fbea278
Improve comment.
Dec 8, 2020
312c05e
Merge branch 'master' into gpu_context_consistency2
Dec 8, 2020
1070d3f
Merge branch 'remove_runtime_comdats_macos_ios' into gpu_context_cons…
Dec 8, 2020
b851d20
Fix tabs in indentation.
Dec 8, 2020
8b9017f
Merge branch 'remove_runtime_comdats_macos_ios' into gpu_context_cons…
Dec 8, 2020
961fd42
Merge branch 'master' into gpu_context_consistency2
Dec 8, 2020
4120026
Make GPU context handling more consistent and use a common compilation
Dec 10, 2020
3a4c606
Merge branch 'master' into gpu_context_consistency2
Dec 10, 2020
4f55416
Merge branch 'master' into gpu_context_consistency2
Dec 10, 2020
03062dd
Add CUDA finalizer method.
Dec 11, 2020
42f9ccc
Conditionalize Objective C support.
Dec 11, 2020
b634ac0
Fix clang-format complaints.
Dec 11, 2020
285750a
Fix clang-format complaints.
Dec 11, 2020
e78ce14
Attempt to fix new test failure with cmake.
Dec 11, 2020
0c0ff56
Merge branch 'gpu_context_consistency2' of https://github.com/halide/…
Dec 11, 2020
1046ccc
Add Metal support to acquire_release test and make it so it doesn't f…
Dec 14, 2020
21d39d2
Merge branch 'master' into gpu_context_consistency2
steven-johnson Dec 15, 2020
348d2bf
Merge branch 'master' into gpu_context_consistency2
steven-johnson Dec 15, 2020
7afefcb
Merge branch 'master' into gpu_context_consistency2
steven-johnson Dec 16, 2020
71b4e23
Merge branch 'master' into gpu_context_consistency2
steven-johnson Dec 16, 2020
3352bf5
Fix CMake cuda target issue. Add comment to Makefile per review feedb…
Dec 17, 2020
05c8c7c
Add a couple more locals initializations for safety.
Dec 17, 2020
1dd25f7
Remove extraneous test that somehow got moved into header file.
Dec 17, 2020
eb74724
Merge branch 'master' into gpu_context_consistency2
Dec 18, 2020
10a1388
Fix OpenCL code per erroneous use of globals.
Dec 18, 2020
61f6b17
Merge branch 'master' into gpu_context_consistency2
steven-johnson Dec 21, 2020
b6b5b57
Merge branch 'master' into gpu_context_consistency2
steven-johnson Jan 5, 2021
1962b25
Fix errors for MEtal case in acquire_release_aottest.cpp.
Jan 6, 2021
054a535
Fix formatting.
Jan 6, 2021
2163090
Merge branch 'master' into gpu_context_consistency2
steven-johnson Jan 14, 2021
cf862ce
Fix D3D runtime to work like Metal does and not reentrantly acquire
Jan 14, 2021
22e336d
Merge branch 'master' into gpu_context_consistency2
steven-johnson Jan 15, 2021
c669a85
Merge branch 'master' into gpu_context_consistency2
steven-johnson Jan 15, 2021
8156bec
Merge branch 'master' into gpu_context_consistency2
Jan 19, 2021
f911bdb
Merge branch 'master' into gpu_context_consistency2
steven-johnson Jan 19, 2021
68dbd62
Merge branch 'master' into gpu_context_consistency2
steven-johnson Jan 21, 2021
871acc3
trigger buildbots
steven-johnson Jan 25, 2021
593a59a
trigger buildbots
steven-johnson Jan 25, 2021
6b877df
trigger buildbots
steven-johnson Jan 25, 2021
0452473
Merge branch 'master' into gpu_context_consistency2
steven-johnson Jan 26, 2021
c61bf39
bugfix
Jan 26, 2021
6b79b65
trigger buildbots
steven-johnson Jan 27, 2021
3028478
Merge branch 'master' into gpu_context_consistency2
steven-johnson Jan 27, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 40 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -333,7 +333,12 @@ TEST_CXX_FLAGS += -DTEST_OPENCL
endif

ifneq ($(TEST_METAL), )
TEST_CXX_FLAGS += -DTEST_METAL
# Using Metal APIs requires writing Objective-C++ (or Swift). Add ObjC++
# to allow tests to create and destroy Metal contexts, etc. This requires
# tests to be valid Objective-C++, e.g. avoiding using the identifier "id"
# in certain ways. In practice this is not enough of a problem to justify
# the work to limit which files are compiled this way.
TEST_CXX_FLAGS += -DTEST_METAL -ObjC++
endif

ifneq ($(TEST_CUDA), )
Expand Down Expand Up @@ -1192,6 +1197,8 @@ GENERATOR_AOTCPP_TESTS := $(filter-out generator_aotcpp_async_parallel,$(GENERAT
GENERATOR_AOTCPP_TESTS := $(filter-out generator_aotcpp_stubtest,$(GENERATOR_AOTCPP_TESTS))
GENERATOR_AOTCPP_TESTS := $(filter-out generator_aotcpp_stubuser,$(GENERATOR_AOTCPP_TESTS))

GENERATOR_AOTCPP_TESTS := $(filter-out generator_aotcpp_gpu_multi_context_threaded,$(GENERATOR_AOTCPP_TESTS))

test_aotcpp_generator: $(GENERATOR_AOTCPP_TESTS)

# This is just a test to ensure than RunGen builds and links for a critical mass of Generators;
Expand All @@ -1207,6 +1214,7 @@ GENERATOR_BUILD_RUNGEN_TESTS := $(filter-out $(FILTERS_DIR)/multitarget.rungen,$
GENERATOR_BUILD_RUNGEN_TESTS := $(filter-out $(FILTERS_DIR)/nested_externs.rungen,$(GENERATOR_BUILD_RUNGEN_TESTS))
GENERATOR_BUILD_RUNGEN_TESTS := $(filter-out $(FILTERS_DIR)/tiled_blur.rungen,$(GENERATOR_BUILD_RUNGEN_TESTS))
GENERATOR_BUILD_RUNGEN_TESTS := $(filter-out $(FILTERS_DIR)/extern_output.rungen,$(GENERATOR_BUILD_RUNGEN_TESTS))
GENERATOR_BUILD_RUNGEN_TESTS := $(filter-out $(FILTERS_DIR)/gpu_multi_context_threaded.rungen,$(GENERATOR_BUILD_RUNGEN_TESTS))
GENERATOR_BUILD_RUNGEN_TESTS := $(GENERATOR_BUILD_RUNGEN_TESTS) \
$(FILTERS_DIR)/multi_rungen \
$(FILTERS_DIR)/multi_rungen2 \
Expand Down Expand Up @@ -1553,6 +1561,12 @@ $(FILTERS_DIR)/nested_externs_%.a: $(BIN_DIR)/nested_externs.generator
@mkdir -p $(@D)
$(CURDIR)/$< -g nested_externs_$* $(GEN_AOT_OUTPUTS) -o $(CURDIR)/$(FILTERS_DIR) target=$(TARGET)-no_runtime

# Similarly, gpu_multi needs two different kernels to test compilation caching.
# Also requies user-context.
$(FILTERS_DIR)/gpu_multi_context_threaded_%.a: $(BIN_DIR)/gpu_multi_context_threaded.generator
@mkdir -p $(@D)
$(CURDIR)/$< -g gpu_multi_context_threaded_$* $(GEN_AOT_OUTPUTS) -o $(CURDIR)/$(FILTERS_DIR) target=$(TARGET)-no_runtime-user_context

GEN_AOT_CXX_FLAGS=$(TEST_CXX_FLAGS) -Wno-unknown-pragmas
GEN_AOT_INCLUDES=-I$(INCLUDE_DIR) -I$(FILTERS_DIR) -I$(ROOT_DIR)/src/runtime -I$(ROOT_DIR)/test/common -I $(ROOT_DIR)/apps/support -I $(SRC_DIR)/runtime -I$(ROOT_DIR)/tools
GEN_AOT_LD_FLAGS=$(COMMON_LD_FLAGS)
Expand Down Expand Up @@ -1648,11 +1662,36 @@ generator_aot_multitarget: $(BIN_DIR)/$(TARGET)/generator_aot_multitarget
HL_MULTITARGET_TEST_USE_NOBOUNDSQUERY_FEATURE=1 $(CURDIR)/$<
@-echo

# gpu_multi_context_threaded has additional deps to link in
$(BIN_DIR)/$(TARGET)/generator_aot_gpu_multi_context_threaded: $(ROOT_DIR)/test/generator/gpu_multi_context_threaded_aottest.cpp \
$(FILTERS_DIR)/gpu_multi_context_threaded_add.a \
$(FILTERS_DIR)/gpu_multi_context_threaded_mul.a \
$(RUNTIME_EXPORTED_INCLUDES) $(BIN_DIR)/$(TARGET)/runtime.a
@mkdir -p $(@D)
$(CXX) $(GEN_AOT_CXX_FLAGS) $(filter %.cpp %.o %.a,$^) $(GEN_AOT_INCLUDES) $(GEN_AOT_LD_FLAGS) $(OPENCL_LD_FLAGS) $(CUDA_LD_FLAGS) -o $@

$(BIN_DIR)/$(TARGET)/generator_aotcpp_gpu_multi_context_threaded: $(ROOT_DIR)/test/generator/gpu_multi_context_threaded_aottest.cpp \
$(FILTERS_DIR)/gpu_multi_context_threaded_add.halide_generated.cpp \
$(FILTERS_DIR)/gpu_multi_context_threaded_mul.halide_generated.cpp \
$(RUNTIME_EXPORTED_INCLUDES) $(BIN_DIR)/$(TARGET)/runtime.a
@mkdir -p $(@D)
$(CXX) $(GEN_AOT_CXX_FLAGS) $(filter %.cpp %.o %.a,$^) $(GEN_AOT_INCLUDES) $(GEN_AOT_LD_FLAGS) $(OPENCL_LD_FLAGS) $(CUDA_LD_FLAGS) -o $@

# nested externs doesn't actually contain a generator named
# "nested_externs", and has no internal tests in any case.
test_generator_nested_externs:
@echo "Skipping"

# gpu_multi actually contain a generator named
# "gpu_multi", and has no internal tests in any case.
test_generator_gpu_multi:
@echo "Skipping"

# gpu_multi_context_threaded actually contain a generator named
# "gpu_multi", and has no internal tests in any case.
test_generator_gpu_multi_context_threaded:
@echo "Skipping"

$(BUILD_DIR)/RunGenMain.o: $(ROOT_DIR)/tools/RunGenMain.cpp $(RUNTIME_EXPORTED_INCLUDES) $(ROOT_DIR)/tools/RunGen.h
@mkdir -p $(@D)
$(CXX) -c $< $(filter-out -g, $(TEST_CXX_FLAGS)) $(OPTIMIZE) -Os $(IMAGE_IO_CXX_FLAGS) -I$(INCLUDE_DIR) -I $(SRC_DIR)/runtime -I$(ROOT_DIR)/tools -o $@
Expand Down
2 changes: 1 addition & 1 deletion src/CodeGen_C.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1586,7 +1586,7 @@ void CodeGen_C::compile(const LoweredFunc &f) {

if (uses_gpu_for_loops) {
stream << get_indent() << "halide_error("
<< (have_user_context ? "__user_context_" : "nullptr")
<< (have_user_context ? "const_cast<void *>(__user_context)" : "nullptr")
<< ", \"C++ Backend does not support gpu_blocks() or gpu_threads() yet, "
<< "this function will always fail at runtime\");\n";
stream << get_indent() << "return halide_error_code_device_malloc_failed;\n";
Expand Down
7 changes: 7 additions & 0 deletions src/CodeGen_GPU_Host.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,13 @@ void CodeGen_GPU_Host<CodeGen_CPU>::compile_func(const LoweredFunc &f,
Value *result = builder->CreateCall(init, init_kernels_args);
Value *did_succeed = builder->CreateICmpEQ(result, ConstantInt::get(i32_t, 0));
CodeGen_CPU::create_assertion(did_succeed, Expr(), result);

// Generate a finalizer call as well to relase any refcounts or other resource usage
// specific to this filter call.
std::string finalize_kernels_name = "halide_" + api_unique_name + "_finalize_kernels";
llvm::Function *finalize = module->getFunction(finalize_kernels_name);
Value *module_state_value = builder->CreateLoad(module_state);
register_destructor(finalize, module_state_value, CodeGen_CPU::Always);
}

// the init kernels block should branch to the post-entry block
Expand Down
1 change: 1 addition & 0 deletions src/runtime/HalideRuntimeCuda.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ extern int halide_cuda_run(void *user_context,
float *vertex_buffer,
int num_coords_dim0,
int num_coords_dim1);
extern void halide_cuda_finalize_kernels(void *user_context, void *state_ptr);
// @}

/** Set the underlying cuda device poiner for a buffer. The device
Expand Down
1 change: 1 addition & 0 deletions src/runtime/HalideRuntimeD3D12Compute.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ extern int halide_d3d12compute_run(void *user_context,
float *vertex_buffer,
int num_coords_dim0,
int num_coords_dim1);
extern void halide_d3d12compute_finalize_kernels(void *user_context, void *state_ptr);
// @}

/** Set the underlying ID3D12Resource for a halide_buffer_t. The memory backing
Expand Down
1 change: 1 addition & 0 deletions src/runtime/HalideRuntimeHexagonHost.h
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,7 @@ extern int halide_hexagon_run(void *user_context,
uint64_t arg_sizes[],
void *args[],
int arg_flags[]);
extern void halide_hexagon_finalize_kernels(void *user_context, void *state_ptr);
extern int halide_hexagon_device_release(void *user_context);
// @}

Expand Down
1 change: 1 addition & 0 deletions src/runtime/HalideRuntimeMetal.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ extern const struct halide_device_interface_t *halide_metal_device_interface();
// @{
extern int halide_metal_initialize_kernels(void *user_context, void **state_ptr,
const char *src, int size);
void halide_metal_finalize_kernels(void *user_context, void *state_ptr);

extern int halide_metal_run(void *user_context,
void *state_ptr,
Expand Down
1 change: 1 addition & 0 deletions src/runtime/HalideRuntimeOpenCL.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ extern int halide_opencl_run(void *user_context,
float *vertex_buffer,
int num_coords_dim0,
int num_coords_dim1);
extern void halide_opencl_finalize_kernels(void *user_context, void *state_ptr);
// @}

/** Set the platform name for OpenCL to use (e.g. "Intel" or
Expand Down
2 changes: 2 additions & 0 deletions src/runtime/HalideRuntimeOpenGLCompute.h
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,8 @@ extern int halide_openglcompute_run(void *user_context,
float *vertex_buffer,
int num_coords_dim0,
int num_coords_dim1);

extern void halide_openglcompute_finalize_kernels(void *user_context, void *state_ptr);
// @}

/** This function retrieves pointers to OpenGL API functions.
Expand Down
Loading