diff --git a/apps/onnx/model.cpp b/apps/onnx/model.cpp
index b2d1738d3680..eb7327974612 100644
--- a/apps/onnx/model.cpp
+++ b/apps/onnx/model.cpp
@@ -344,8 +344,6 @@ std::vector<py::array> run(
     }
     Halide::Realization real(outputs);
     Halide::Target tgt = Halide::get_host_target();
-    // Don't allow LLVM to mess with the code.
-    tgt.set_feature(Halide::Target::DisableLLVMLoopOpt, true);
     // Don't create buffers larger than 2GB since we use 32bit signed indices to
     // index the data stored in them.
     tgt.set_feature(Halide::Target::LargeBuffers, false);
@@ -461,8 +459,6 @@ double benchmark(
 
     Halide::Realization real(outputs);
     Halide::Target tgt = Halide::get_host_target();
-    // Don't allow LLVM to mess with the code.
-    tgt.set_feature(Halide::Target::DisableLLVMLoopOpt, true);
     // Don't create buffers larger than 2GB since we use 32bit signed indices to
     // index the data stored in them.
     tgt.set_feature(Halide::Target::LargeBuffers, false);
diff --git a/python_bindings/src/PyEnums.cpp b/python_bindings/src/PyEnums.cpp
index f232b43c4c59..ce585f7ef06b 100644
--- a/python_bindings/src/PyEnums.cpp
+++ b/python_bindings/src/PyEnums.cpp
@@ -140,7 +140,6 @@ void define_enums(py::module &m) {
         .value("HexagonDma", Target::Feature::HexagonDma)
         .value("EmbedBitcode", Target::Feature::EmbedBitcode)
         .value("EnableLLVMLoopOpt", Target::Feature::EnableLLVMLoopOpt)
-        .value("DisableLLVMLoopOpt", Target::Feature::DisableLLVMLoopOpt)
         .value("WasmSimd128", Target::Feature::WasmSimd128)
         .value("WasmSignExt", Target::Feature::WasmSignExt)
         .value("WasmSatFloatToInt", Target::Feature::WasmSatFloatToInt)
diff --git a/src/CodeGen_LLVM.cpp b/src/CodeGen_LLVM.cpp
index d408f1dea135..d3e782cbca5d 100644
--- a/src/CodeGen_LLVM.cpp
+++ b/src/CodeGen_LLVM.cpp
@@ -1081,14 +1081,7 @@ void CodeGen_LLVM::optimize_module() {
 
     std::unique_ptr<TargetMachine> tm = make_target_machine(*module);
 
-    // At present, we default to *enabling* LLVM loop optimization,
-    // unless DisableLLVMLoopOpt is set; we're going to flip this to defaulting
-    // to *not* enabling these optimizations (and removing the DisableLLVMLoopOpt feature).
-    // See https://github.com/halide/Halide/issues/4113 for more info.
-    // (Note that setting EnableLLVMLoopOpt always enables loop opt, regardless
-    // of the setting of DisableLLVMLoopOpt.)
-    const bool do_loop_opt = !get_target().has_feature(Target::DisableLLVMLoopOpt) ||
-                             get_target().has_feature(Target::EnableLLVMLoopOpt);
+    const bool do_loop_opt = get_target().has_feature(Target::EnableLLVMLoopOpt);
 
     PipelineTuningOptions pto;
     pto.LoopInterleaving = do_loop_opt;
diff --git a/src/CodeGen_PTX_Dev.cpp b/src/CodeGen_PTX_Dev.cpp
index d1c3dd757de0..d43f83df9e31 100644
--- a/src/CodeGen_PTX_Dev.cpp
+++ b/src/CodeGen_PTX_Dev.cpp
@@ -667,20 +667,17 @@ vector<char> CodeGen_PTX_Dev::compile_to_src() {
         }
     }
 
-    // At present, we default to *enabling* LLVM loop optimization,
-    // unless DisableLLVMLoopOpt is set; we're going to flip this to defaulting
-    // to *not* enabling these optimizations (and removing the DisableLLVMLoopOpt feature).
-    // See https://github.com/halide/Halide/issues/4113 for more info.
-    // (Note that setting EnableLLVMLoopOpt always enables loop opt, regardless
-    // of the setting of DisableLLVMLoopOpt.)
-    const bool do_loop_opt = !target.has_feature(Target::DisableLLVMLoopOpt) ||
-                             target.has_feature(Target::EnableLLVMLoopOpt);
+    const bool do_loop_opt = target.has_feature(Target::EnableLLVMLoopOpt);
 
     PassManagerBuilder b;
     b.OptLevel = 3;
     b.Inliner = createFunctionInliningPass(b.OptLevel, 0, false);
     b.LoopVectorize = do_loop_opt;
     b.SLPVectorize = true;
+    // Setting DisableUnrollLoops = true can occasionally generate PTX code that
+    // will fail at runtime under some conditions (e.g. correctness_gpu_dynamic_shared
+    // using NVidia driver 460.x).
+    // b.DisableUnrollLoops = false;  // !do_loop_opt;
     b.DisableUnrollLoops = !do_loop_opt;
 
     target_machine->adjustPassManager(b);
diff --git a/src/HexagonOffload.cpp b/src/HexagonOffload.cpp
index 9d4512ce3d0d..b0fdac95a741 100644
--- a/src/HexagonOffload.cpp
+++ b/src/HexagonOffload.cpp
@@ -977,7 +977,6 @@ Stmt inject_hexagon_rpc(Stmt s, const Target &host_target,
         Target::HVX_v62,
         Target::HVX_v65,
         Target::HVX_v66,
-        Target::DisableLLVMLoopOpt,
     };
     for (Target::Feature i : shared_features) {
         if (host_target.has_feature(i)) {
diff --git a/src/Target.cpp b/src/Target.cpp
index 4e21db617f68..2a838cee3010 100644
--- a/src/Target.cpp
+++ b/src/Target.cpp
@@ -370,7 +370,6 @@ const std::map<std::string, Target::Feature> feature_name_map = {
     {"check_unsafe_promises", Target::CheckUnsafePromises},
     {"hexagon_dma", Target::HexagonDma},
     {"embed_bitcode", Target::EmbedBitcode},
-    {"disable_llvm_loop_opt", Target::DisableLLVMLoopOpt},
     {"enable_llvm_loop_opt", Target::EnableLLVMLoopOpt},
     {"wasm_simd128", Target::WasmSimd128},
     {"wasm_signext", Target::WasmSignExt},
diff --git a/src/Target.h b/src/Target.h
index 1f4e55bc7b55..90cf1d61fed1 100644
--- a/src/Target.h
+++ b/src/Target.h
@@ -117,7 +117,6 @@ struct Target {
         CheckUnsafePromises = halide_target_feature_check_unsafe_promises,
         EmbedBitcode = halide_target_feature_embed_bitcode,
         EnableLLVMLoopOpt = halide_target_feature_enable_llvm_loop_opt,
-        DisableLLVMLoopOpt = halide_target_feature_disable_llvm_loop_opt,
         WasmSimd128 = halide_target_feature_wasm_simd128,
         WasmSignExt = halide_target_feature_wasm_signext,
         WasmSatFloatToInt = halide_target_feature_wasm_sat_float_to_int,
diff --git a/src/autoschedulers/adams2019/autotune_loop.sh b/src/autoschedulers/adams2019/autotune_loop.sh
index d36830f71249..b11aaa1d24ab 100755
--- a/src/autoschedulers/adams2019/autotune_loop.sh
+++ b/src/autoschedulers/adams2019/autotune_loop.sh
@@ -65,14 +65,6 @@ else
     echo Copying starting weights from ${START_WEIGHTS_FILE} to ${WEIGHTS}
 fi
 
-# We could add this unconditionally, but it's easier to wade thru
-# results if we only add if needed
-for F in disable_llvm_loop_opt; do
-    if [[ ! ${HL_TARGET} =~ .*${F}.* ]]; then
-        HL_TARGET="${HL_TARGET}-${F}"
-    fi
-done
-
 # A batch of this many samples is built in parallel, and then
 # benchmarked serially.
 BATCH_SIZE=32
diff --git a/src/runtime/HalideRuntime.h b/src/runtime/HalideRuntime.h
index 6496f1eebc58..fec3ffd7c252 100644
--- a/src/runtime/HalideRuntime.h
+++ b/src/runtime/HalideRuntime.h
@@ -1323,8 +1323,7 @@ typedef enum halide_target_feature_t {
     halide_target_feature_check_unsafe_promises,  ///< Insert assertions for promises.
     halide_target_feature_hexagon_dma,            ///< Enable Hexagon DMA buffers.
     halide_target_feature_embed_bitcode,          ///< Emulate clang -fembed-bitcode flag.
-    halide_target_feature_enable_llvm_loop_opt,   ///< Enable loop vectorization + unrolling in LLVM. Overrides halide_target_feature_disable_llvm_loop_opt. (Ignored for non-LLVM targets.)
-    halide_target_feature_disable_llvm_loop_opt,  ///< Disable loop vectorization + unrolling in LLVM. (Ignored for non-LLVM targets.)
+    halide_target_feature_enable_llvm_loop_opt,   ///< Enable loop vectorization + unrolling in LLVM.
     halide_target_feature_wasm_simd128,           ///< Enable +simd128 instructions for WebAssembly codegen.
     halide_target_feature_wasm_signext,           ///< Enable +sign-ext instructions for WebAssembly codegen.
     halide_target_feature_wasm_sat_float_to_int,  ///< Enable saturating (nontrapping) float-to-int instructions for WebAssembly codegen.
diff --git a/test/correctness/float16_t.cpp b/test/correctness/float16_t.cpp
index 7962af423273..21d8f082a59d 100644
--- a/test/correctness/float16_t.cpp
+++ b/test/correctness/float16_t.cpp
@@ -238,7 +238,7 @@ int main(int argc, char **argv) {
         to_f16.compute_root().vectorize(x, 8, TailStrategy::RoundUp);
         from_f16.compute_root().vectorize(x, 8, TailStrategy::RoundUp);
 
-        from_f16.compile_to_assembly("/dev/stdout", {}, Target("host-no_asserts-no_bounds_query-no_runtime-disable_llvm_loop_unroll-disable_llvm_loop_vectorize"));
+        from_f16.compile_to_assembly("/dev/stdout", {}, Target("host-no_asserts-no_bounds_query-no_runtime"));
     }
 
     // Check infinity handling for both float16_t and Halide codegen.
diff --git a/test/correctness/simd_op_check.h b/test/correctness/simd_op_check.h
index 29f151e10455..df343bd0d7aa 100644
--- a/test/correctness/simd_op_check.h
+++ b/test/correctness/simd_op_check.h
@@ -51,8 +51,7 @@ class SimdOpCheckTest {
         target = target
                      .with_feature(Target::NoBoundsQuery)
                      .with_feature(Target::NoAsserts)
-                     .with_feature(Target::NoRuntime)
-                     .with_feature(Target::DisableLLVMLoopOpt);
+                     .with_feature(Target::NoRuntime);
         num_threads = Internal::ThreadPool<void>::num_processors_online();
     }
     virtual ~SimdOpCheckTest() = default;
diff --git a/test/performance/nested_vectorization_gemm.cpp b/test/performance/nested_vectorization_gemm.cpp
index 25a0bc746fb1..88904879dfab 100644
--- a/test/performance/nested_vectorization_gemm.cpp
+++ b/test/performance/nested_vectorization_gemm.cpp
@@ -10,9 +10,6 @@ int main(int argc, char **argv) {
         printf("[SKIP] Performance tests are meaningless and/or misleading under WebAssembly interpreter.\n");
         return 0;
     }
-    // We don't want to be sensitive to LLVM pulling the same tricks
-    // or not.
-    target.set_feature(Target::DisableLLVMLoopOpt);
 
     // 8-bit mat-mul into 32-bit accumulator
     {