From 648388f7f900c63cbb6d8c182e85c188a2249312 Mon Sep 17 00:00:00 2001 From: Takafumi Arakaki Date: Tue, 1 Feb 2022 23:39:33 -0500 Subject: [PATCH 1/2] Don't use `@_inline_meta` --- benchmark/bench_foreach_seq_sum_many.jl | 16 ++++++---------- src/foreach.jl | 10 +++++----- src/map.jl | 6 ++---- 3 files changed, 13 insertions(+), 19 deletions(-) diff --git a/benchmark/bench_foreach_seq_sum_many.jl b/benchmark/bench_foreach_seq_sum_many.jl index f8b979e1..fa49249c 100644 --- a/benchmark/bench_foreach_seq_sum_many.jl +++ b/benchmark/bench_foreach_seq_sum_many.jl @@ -16,23 +16,19 @@ using ThreadsX @noinline function manual_sum_many!(vecs::NTuple{N,AbstractArray}) where {N} @simd ivdep for i in eachindex(vecs...) sums = cumsumargs(map(a -> (@inbounds a[i]), vecs)...) - ntuple(Val(N)) do j - Base.@_inline_meta - @inbounds vecs[j][i] = sums[j] - end + @inline update!(j) = @inbounds vecs[j][i] = sums[j] + ntuple(update!, Val(N)) end end @noinline foreach_sum_many!(vecs::NTuple{N,AbstractArray}, simd) where {N} = let nvecs = Val(N) - ThreadsX.Implementations.foreach_linear_seq(eachindex(vecs...), simd) do i - Base.@_inline_meta + @inline function loop_body!(j) sums = cumsumargs(map(a -> (@inbounds a[i]), vecs)...) - ntuple(nvecs) do j - Base.@_inline_meta - @inbounds vecs[j][i] = sums[j] - end + @inline update!(j) = @inbounds vecs[j][i] = sums[j] + ntuple(update!, Val(N)) end + ThreadsX.Implementations.foreach_linear_seq(loop_body!, eachindex(vecs...), simd) end suite = BenchmarkGroup() diff --git a/src/foreach.jl b/src/foreach.jl index cdca7a15..4939981f 100644 --- a/src/foreach.jl +++ b/src/foreach.jl @@ -97,16 +97,16 @@ for simd in [false, true, :ivdep] ) where {F,N,M} = $(_simdify_if(simd, body)) end -ThreadsX.foreach( +function ThreadsX.foreach( f, array::AbstractArray{<:Any,N}, arrays::AbstractArray{<:Any,N}...; kw..., -) where {N} = - ThreadsX.foreach(eachindex(array, arrays...); kw...) do i - Base.@_inline_meta +) where {N} + @inline foreach_body!(i) = f((@inbounds array[i]), map(x -> (@inbounds x[i]), arrays)...) - end + ThreadsX.foreach(foreach_body!, eachindex(array, arrays...); kw...) +end #= ThreadsX.foreach(f, array::AbstractArray, arrays::AbstractArray; kw...) = diff --git a/src/map.jl b/src/map.jl index 0958d402..ff86e478 100644 --- a/src/map.jl +++ b/src/map.jl @@ -32,10 +32,8 @@ function ThreadsX.map( end function ThreadsX.map!(f, dest, array, arrays...; kw...) - ThreadsX.foreach(referenceable(dest), array, arrays...; kw...) do y, xs... - Base.@_inline_meta - y[] = f(xs...) - end + @inline map_body!(y, xs...) = y[] = f(xs...) + ThreadsX.foreach(map_body!, referenceable(dest), array, arrays...; kw...) return dest end From 2a1c3aeff7eb8bc3096e2256f6bf9883206a9ac7 Mon Sep 17 00:00:00 2001 From: Takafumi Arakaki Date: Wed, 2 Feb 2022 04:46:00 -0500 Subject: [PATCH 2/2] Fix benchmark --- benchmark/bench_foreach_seq_sum_many.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/benchmark/bench_foreach_seq_sum_many.jl b/benchmark/bench_foreach_seq_sum_many.jl index fa49249c..f4b8cf84 100644 --- a/benchmark/bench_foreach_seq_sum_many.jl +++ b/benchmark/bench_foreach_seq_sum_many.jl @@ -23,10 +23,10 @@ end @noinline foreach_sum_many!(vecs::NTuple{N,AbstractArray}, simd) where {N} = let nvecs = Val(N) - @inline function loop_body!(j) + @inline function loop_body!(i) sums = cumsumargs(map(a -> (@inbounds a[i]), vecs)...) @inline update!(j) = @inbounds vecs[j][i] = sums[j] - ntuple(update!, Val(N)) + ntuple(update!, nvecs) end ThreadsX.Implementations.foreach_linear_seq(loop_body!, eachindex(vecs...), simd) end