diff --git a/src/operator/tensor/broadcast_reduce-inl.h b/src/operator/tensor/broadcast_reduce-inl.h index 1907c02897c9..13fadbf41918 100644 --- a/src/operator/tensor/broadcast_reduce-inl.h +++ b/src/operator/tensor/broadcast_reduce-inl.h @@ -360,10 +360,17 @@ void seq_reduce_compute(const size_t N, const size_t M, const bool addto, const Shape sshape, const Shape rshape, const Shape rstride) { const int thread_count = engine::OpenMP::Get()->GetRecommendedOMPThreadCount(); - #pragma omp parallel for num_threads(thread_count) if (N >= thread_count) - for (index_t idx = 0; idx < static_cast(N); ++idx) { - seq_reduce_assign - (idx, M, addto, big, small, bshape, sshape, rshape, rstride, N < thread_count); + if (N >= thread_count) { + #pragma omp parallel for num_threads(thread_count) + for (index_t idx = 0; idx < static_cast(N); ++idx) { + seq_reduce_assign + (idx, M, addto, big, small, bshape, sshape, rshape, rstride, false); + } + } else { + for (index_t idx = 0; idx < static_cast(N); ++idx) { + seq_reduce_assign + (idx, M, addto, big, small, bshape, sshape, rshape, rstride, true); + } } }