diff --git a/benchmark/bench_foreach_seq_sum_many.jl b/benchmark/bench_foreach_seq_sum_many.jl index f8b979e1..f4b8cf84 100644 --- a/benchmark/bench_foreach_seq_sum_many.jl +++ b/benchmark/bench_foreach_seq_sum_many.jl @@ -16,23 +16,19 @@ using ThreadsX @noinline function manual_sum_many!(vecs::NTuple{N,AbstractArray}) where {N} @simd ivdep for i in eachindex(vecs...) sums = cumsumargs(map(a -> (@inbounds a[i]), vecs)...) - ntuple(Val(N)) do j - Base.@_inline_meta - @inbounds vecs[j][i] = sums[j] - end + @inline update!(j) = @inbounds vecs[j][i] = sums[j] + ntuple(update!, Val(N)) end end @noinline foreach_sum_many!(vecs::NTuple{N,AbstractArray}, simd) where {N} = let nvecs = Val(N) - ThreadsX.Implementations.foreach_linear_seq(eachindex(vecs...), simd) do i - Base.@_inline_meta + @inline function loop_body!(i) sums = cumsumargs(map(a -> (@inbounds a[i]), vecs)...) - ntuple(nvecs) do j - Base.@_inline_meta - @inbounds vecs[j][i] = sums[j] - end + @inline update!(j) = @inbounds vecs[j][i] = sums[j] + ntuple(update!, nvecs) end + ThreadsX.Implementations.foreach_linear_seq(loop_body!, eachindex(vecs...), simd) end suite = BenchmarkGroup() diff --git a/src/foreach.jl b/src/foreach.jl index cdca7a15..4939981f 100644 --- a/src/foreach.jl +++ b/src/foreach.jl @@ -97,16 +97,16 @@ for simd in [false, true, :ivdep] ) where {F,N,M} = $(_simdify_if(simd, body)) end -ThreadsX.foreach( +function ThreadsX.foreach( f, array::AbstractArray{<:Any,N}, arrays::AbstractArray{<:Any,N}...; kw..., -) where {N} = - ThreadsX.foreach(eachindex(array, arrays...); kw...) do i - Base.@_inline_meta +) where {N} + @inline foreach_body!(i) = f((@inbounds array[i]), map(x -> (@inbounds x[i]), arrays)...) - end + ThreadsX.foreach(foreach_body!, eachindex(array, arrays...); kw...) +end #= ThreadsX.foreach(f, array::AbstractArray, arrays::AbstractArray; kw...) = diff --git a/src/map.jl b/src/map.jl index 0958d402..ff86e478 100644 --- a/src/map.jl +++ b/src/map.jl @@ -32,10 +32,8 @@ function ThreadsX.map( end function ThreadsX.map!(f, dest, array, arrays...; kw...) - ThreadsX.foreach(referenceable(dest), array, arrays...; kw...) do y, xs... - Base.@_inline_meta - y[] = f(xs...) - end + @inline map_body!(y, xs...) = y[] = f(xs...) + ThreadsX.foreach(map_body!, referenceable(dest), array, arrays...; kw...) return dest end