diff --git a/runtime/kernel/targets.bzl b/runtime/kernel/targets.bzl index e67f76728b8..5c95f10276d 100644 --- a/runtime/kernel/targets.bzl +++ b/runtime/kernel/targets.bzl @@ -56,6 +56,7 @@ def define_common_targets(): exported_headers = ["thread_parallel_interface.h"], exported_deps = [ "//executorch/runtime/core:core", + "//executorch/runtime/core/portable_type/c10/c10:c10", "//executorch/runtime/platform:platform", ], visibility = [ diff --git a/runtime/kernel/thread_parallel_interface.h b/runtime/kernel/thread_parallel_interface.h index 1e79acc75a4..52100475c7b 100644 --- a/runtime/kernel/thread_parallel_interface.h +++ b/runtime/kernel/thread_parallel_interface.h @@ -11,6 +11,7 @@ #include #include +#include #include #include @@ -29,7 +30,17 @@ inline bool parallel_for_no_threadpool( begin, end); ET_CHECK_OR_RETURN_FALSE(grain_size > 0, "grain_size = %" PRId64, grain_size); +#ifndef NDEBUG + // Go backwards through the range elementwise to catch code that + // assumes parallel_for is in order like a regular for loop. + for (const auto i : c10::irange(begin, end)) { + const auto offset = i - begin; + const auto idx = end - offset - 1; + f(idx, idx + 1); + } +#else // NDEBUG f(begin, end); +#endif return true; }