Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
f1507af
Disable LLVM loop optimzation by default (Issue #4113)
steven-johnson Jun 9, 2020
d4b70eb
Merge branch 'master' into srj-llvm-loop-opt
steven-johnson Jul 29, 2020
356125d
Merge branch 'master' into srj-llvm-loop-opt
steven-johnson Apr 8, 2021
5f231c7
Remove dead usages of disable_llvm_loop_opt
steven-johnson Apr 8, 2021
0b18efb
trigger buildbots
steven-johnson Apr 8, 2021
ee6fb08
Update CodeGen_PTX_Dev.cpp
steven-johnson Apr 13, 2021
ec41a98
Merge branch 'master' into srj-llvm-loop-opt
steven-johnson Apr 13, 2021
751c9ea
Update CodeGen_PTX_Dev.cpp
steven-johnson Apr 20, 2021
41094ec
Merge branch 'master' into srj-llvm-loop-opt
steven-johnson Apr 28, 2021
ccdc168
Merge branch 'master' into srj-llvm-loop-opt
steven-johnson May 6, 2021
79bf468
Merge branch 'master' into srj-llvm-loop-opt
steven-johnson May 11, 2021
bea405a
Merge branch 'master' into srj-llvm-loop-opt
steven-johnson May 13, 2021
f73bd8a
Merge branch 'master' into srj-llvm-loop-opt
steven-johnson May 25, 2021
5b5f418
Add debugging info
steven-johnson May 25, 2021
e287112
more debugging
steven-johnson May 25, 2021
0b1eb79
still more debugging
steven-johnson May 26, 2021
0b1cb0d
yep more
steven-johnson May 26, 2021
43178e3
still more
steven-johnson May 26, 2021
de4d4d7
Update CMakeLists.txt
steven-johnson May 26, 2021
36eae97
Update CMakeLists.txt
steven-johnson May 26, 2021
beb795f
Update cuda.cpp
steven-johnson May 26, 2021
dee41ca
Update cuda.cpp
steven-johnson May 26, 2021
d878aea
Update cuda.cpp
steven-johnson May 26, 2021
7b6be78
Revert nonsense
steven-johnson May 26, 2021
2ca7e4e
Merge branch 'master' into srj-llvm-loop-opt
steven-johnson Jun 3, 2021
21eef77
Merge branch 'master' into srj-llvm-loop-opt
steven-johnson Jun 29, 2021
657cb56
Merge branch 'master' into srj-llvm-loop-opt
steven-johnson Aug 5, 2021
5999356
Merge branch 'master' into srj-llvm-loop-opt
steven-johnson Aug 6, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 0 additions & 4 deletions apps/onnx/model.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -344,8 +344,6 @@ std::vector<py::array> run(
}
Halide::Realization real(outputs);
Halide::Target tgt = Halide::get_host_target();
// Don't allow LLVM to mess with the code.
tgt.set_feature(Halide::Target::DisableLLVMLoopOpt, true);
// Don't create buffers larger than 2GB since we use 32bit signed indices to
// index the data stored in them.
tgt.set_feature(Halide::Target::LargeBuffers, false);
Expand Down Expand Up @@ -461,8 +459,6 @@ double benchmark(

Halide::Realization real(outputs);
Halide::Target tgt = Halide::get_host_target();
// Don't allow LLVM to mess with the code.
tgt.set_feature(Halide::Target::DisableLLVMLoopOpt, true);
// Don't create buffers larger than 2GB since we use 32bit signed indices to
// index the data stored in them.
tgt.set_feature(Halide::Target::LargeBuffers, false);
Expand Down
1 change: 0 additions & 1 deletion python_bindings/src/PyEnums.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,6 @@ void define_enums(py::module &m) {
.value("HexagonDma", Target::Feature::HexagonDma)
.value("EmbedBitcode", Target::Feature::EmbedBitcode)
.value("EnableLLVMLoopOpt", Target::Feature::EnableLLVMLoopOpt)
.value("DisableLLVMLoopOpt", Target::Feature::DisableLLVMLoopOpt)
.value("WasmSimd128", Target::Feature::WasmSimd128)
.value("WasmSignExt", Target::Feature::WasmSignExt)
.value("WasmSatFloatToInt", Target::Feature::WasmSatFloatToInt)
Expand Down
9 changes: 1 addition & 8 deletions src/CodeGen_LLVM.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1081,14 +1081,7 @@ void CodeGen_LLVM::optimize_module() {

std::unique_ptr<TargetMachine> tm = make_target_machine(*module);

// At present, we default to *enabling* LLVM loop optimization,
// unless DisableLLVMLoopOpt is set; we're going to flip this to defaulting
// to *not* enabling these optimizations (and removing the DisableLLVMLoopOpt feature).
// See https://github.com/halide/Halide/issues/4113 for more info.
// (Note that setting EnableLLVMLoopOpt always enables loop opt, regardless
// of the setting of DisableLLVMLoopOpt.)
const bool do_loop_opt = !get_target().has_feature(Target::DisableLLVMLoopOpt) ||
get_target().has_feature(Target::EnableLLVMLoopOpt);
const bool do_loop_opt = get_target().has_feature(Target::EnableLLVMLoopOpt);

PipelineTuningOptions pto;
pto.LoopInterleaving = do_loop_opt;
Expand Down
13 changes: 5 additions & 8 deletions src/CodeGen_PTX_Dev.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -667,20 +667,17 @@ vector<char> CodeGen_PTX_Dev::compile_to_src() {
}
}

// At present, we default to *enabling* LLVM loop optimization,
// unless DisableLLVMLoopOpt is set; we're going to flip this to defaulting
// to *not* enabling these optimizations (and removing the DisableLLVMLoopOpt feature).
// See https://github.com/halide/Halide/issues/4113 for more info.
// (Note that setting EnableLLVMLoopOpt always enables loop opt, regardless
// of the setting of DisableLLVMLoopOpt.)
const bool do_loop_opt = !target.has_feature(Target::DisableLLVMLoopOpt) ||
target.has_feature(Target::EnableLLVMLoopOpt);
const bool do_loop_opt = target.has_feature(Target::EnableLLVMLoopOpt);

PassManagerBuilder b;
b.OptLevel = 3;
b.Inliner = createFunctionInliningPass(b.OptLevel, 0, false);
b.LoopVectorize = do_loop_opt;
b.SLPVectorize = true;
// Setting DisableUnrollLoops = true can occasionally generate PTX code that
// will fail at runtime under some conditions (e.g. correctness_gpu_dynamic_shared
// using NVidia driver 460.x).
// b.DisableUnrollLoops = false; // !do_loop_opt;
b.DisableUnrollLoops = !do_loop_opt;

target_machine->adjustPassManager(b);
Expand Down
1 change: 0 additions & 1 deletion src/HexagonOffload.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -977,7 +977,6 @@ Stmt inject_hexagon_rpc(Stmt s, const Target &host_target,
Target::HVX_v62,
Target::HVX_v65,
Target::HVX_v66,
Target::DisableLLVMLoopOpt,
};
for (Target::Feature i : shared_features) {
if (host_target.has_feature(i)) {
Expand Down
1 change: 0 additions & 1 deletion src/Target.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -370,7 +370,6 @@ const std::map<std::string, Target::Feature> feature_name_map = {
{"check_unsafe_promises", Target::CheckUnsafePromises},
{"hexagon_dma", Target::HexagonDma},
{"embed_bitcode", Target::EmbedBitcode},
{"disable_llvm_loop_opt", Target::DisableLLVMLoopOpt},
{"enable_llvm_loop_opt", Target::EnableLLVMLoopOpt},
{"wasm_simd128", Target::WasmSimd128},
{"wasm_signext", Target::WasmSignExt},
Expand Down
1 change: 0 additions & 1 deletion src/Target.h
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,6 @@ struct Target {
CheckUnsafePromises = halide_target_feature_check_unsafe_promises,
EmbedBitcode = halide_target_feature_embed_bitcode,
EnableLLVMLoopOpt = halide_target_feature_enable_llvm_loop_opt,
DisableLLVMLoopOpt = halide_target_feature_disable_llvm_loop_opt,
WasmSimd128 = halide_target_feature_wasm_simd128,
WasmSignExt = halide_target_feature_wasm_signext,
WasmSatFloatToInt = halide_target_feature_wasm_sat_float_to_int,
Expand Down
8 changes: 0 additions & 8 deletions src/autoschedulers/adams2019/autotune_loop.sh
Original file line number Diff line number Diff line change
Expand Up @@ -65,14 +65,6 @@ else
echo Copying starting weights from ${START_WEIGHTS_FILE} to ${WEIGHTS}
fi

# We could add this unconditionally, but it's easier to wade thru
# results if we only add if needed
for F in disable_llvm_loop_opt; do
if [[ ! ${HL_TARGET} =~ .*${F}.* ]]; then
HL_TARGET="${HL_TARGET}-${F}"
fi
done

# A batch of this many samples is built in parallel, and then
# benchmarked serially.
BATCH_SIZE=32
Expand Down
3 changes: 1 addition & 2 deletions src/runtime/HalideRuntime.h
Original file line number Diff line number Diff line change
Expand Up @@ -1323,8 +1323,7 @@ typedef enum halide_target_feature_t {
halide_target_feature_check_unsafe_promises, ///< Insert assertions for promises.
halide_target_feature_hexagon_dma, ///< Enable Hexagon DMA buffers.
halide_target_feature_embed_bitcode, ///< Emulate clang -fembed-bitcode flag.
halide_target_feature_enable_llvm_loop_opt, ///< Enable loop vectorization + unrolling in LLVM. Overrides halide_target_feature_disable_llvm_loop_opt. (Ignored for non-LLVM targets.)
halide_target_feature_disable_llvm_loop_opt, ///< Disable loop vectorization + unrolling in LLVM. (Ignored for non-LLVM targets.)
halide_target_feature_enable_llvm_loop_opt, ///< Enable loop vectorization + unrolling in LLVM.
halide_target_feature_wasm_simd128, ///< Enable +simd128 instructions for WebAssembly codegen.
halide_target_feature_wasm_signext, ///< Enable +sign-ext instructions for WebAssembly codegen.
halide_target_feature_wasm_sat_float_to_int, ///< Enable saturating (nontrapping) float-to-int instructions for WebAssembly codegen.
Expand Down
2 changes: 1 addition & 1 deletion test/correctness/float16_t.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -238,7 +238,7 @@ int main(int argc, char **argv) {
to_f16.compute_root().vectorize(x, 8, TailStrategy::RoundUp);
from_f16.compute_root().vectorize(x, 8, TailStrategy::RoundUp);

from_f16.compile_to_assembly("/dev/stdout", {}, Target("host-no_asserts-no_bounds_query-no_runtime-disable_llvm_loop_unroll-disable_llvm_loop_vectorize"));
from_f16.compile_to_assembly("/dev/stdout", {}, Target("host-no_asserts-no_bounds_query-no_runtime"));
}

// Check infinity handling for both float16_t and Halide codegen.
Expand Down
3 changes: 1 addition & 2 deletions test/correctness/simd_op_check.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,8 +51,7 @@ class SimdOpCheckTest {
target = target
.with_feature(Target::NoBoundsQuery)
.with_feature(Target::NoAsserts)
.with_feature(Target::NoRuntime)
.with_feature(Target::DisableLLVMLoopOpt);
.with_feature(Target::NoRuntime);
num_threads = Internal::ThreadPool<void>::num_processors_online();
}
virtual ~SimdOpCheckTest() = default;
Expand Down
3 changes: 0 additions & 3 deletions test/performance/nested_vectorization_gemm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,6 @@ int main(int argc, char **argv) {
printf("[SKIP] Performance tests are meaningless and/or misleading under WebAssembly interpreter.\n");
return 0;
}
// We don't want to be sensitive to LLVM pulling the same tricks
// or not.
target.set_feature(Target::DisableLLVMLoopOpt);

// 8-bit mat-mul into 32-bit accumulator
{
Expand Down