Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pixie.nimble
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ requires "vmath >= 1.1.4"
requires "chroma >= 0.2.6"
requires "zippy >= 0.10.3"
requires "flatty >= 0.3.4"
requires "nimsimd >= 1.1.8"
requires "nimsimd >= 1.1.9"
requires "bumpy >= 1.1.1"

task bindings, "Generate bindings":
Expand Down
3 changes: 0 additions & 3 deletions src/pixie/simd/avx2.nim
Original file line number Diff line number Diff line change
Expand Up @@ -296,9 +296,6 @@ proc minifyBy2Avx2*(image: Image, power = 1): Image {.simd.} =
)
let
oddMask = mm256_set1_epi16(0xff00)
mergedMask = mm256_set_epi32(
0, uint32.high, 0, uint32.high, 0, uint32.high, 0, uint32.high
)
permuteControl = mm256_set_epi32(7, 7, 7, 7, 6, 4, 2, 0)
for y in 0 ..< resultEvenHeight:
let
Expand Down
139 changes: 139 additions & 0 deletions src/pixie/simd/neon.nim
Original file line number Diff line number Diff line change
Expand Up @@ -261,5 +261,144 @@ proc ceilNeon*(image: Image) {.simd.} =
rgbx.a = if rgbx.a == 0: 0 else: 255
image.data[i] = rgbx

proc minifyBy2Neon*(image: Image, power = 1): Image {.simd.} =
## Scales the image down by an integer scale.
if power < 0:
raise newException(PixieError, "Cannot minifyBy2 with negative power")
if power == 0:
return image.copy()

var src = image
for _ in 1 .. power:
# When minifying an image of odd size, round the result image size up
# so a 99 x 99 src image returns a 50 x 50 image.
let
srcWidthIsOdd = (src.width mod 2) != 0
srcHeightIsOdd = (src.height mod 2) != 0
resultEvenWidth = src.width div 2
resultEvenHeight = src.height div 2
result = newImage(
if srcWidthIsOdd: resultEvenWidth + 1 else: resultEvenWidth,
if srcHeightIsOdd: resultEvenHeight + 1 else: resultEvenHeight
)
let
evenLanes = [0.uint8, 2, 4, 6, 255, 255, 255, 255]
tblIdx = vld1_u8(evenLanes.unsafeAddr)
for y in 0 ..< resultEvenHeight:
let
topRowStart = src.dataIndex(0, y * 2)
bottomRowStart = src.dataIndex(0, y * 2 + 1)

var x: int
while x <= resultEvenWidth - 9:
let
top = vld4_u8(src.data[topRowStart + x * 2].addr)
topNext = vld4_u8(src.data[topRowStart + x * 2 + 1].addr)
bottom = vld4_u8(src.data[bottomRowStart + x * 2].addr)
bottomNext = vld4_u8(src.data[bottomRowStart + x * 2 + 1].addr)
r = vrshrn_n_u16(vaddq_u16(
vaddl_u8(top.val[0], topNext.val[0]),
vaddl_u8(bottom.val[0], bottomNext.val[0])
), 2)
g = vrshrn_n_u16(vaddq_u16(
vaddl_u8(top.val[1], topNext.val[1]),
vaddl_u8(bottom.val[1], bottomNext.val[1])
), 2)
b = vrshrn_n_u16(vaddq_u16(
vaddl_u8(top.val[2], topNext.val[2]),
vaddl_u8(bottom.val[2], bottomNext.val[2])
), 2)
a = vrshrn_n_u16(vaddq_u16(
vaddl_u8(top.val[3], topNext.val[3]),
vaddl_u8(bottom.val[3], bottomNext.val[3])
), 2)
# The correct values are in the even lanes 0, 2, 4, 6
var correct: uint8x8x4
correct.val[0] = vtbl1_u8(r, tblIdx)
correct.val[1] = vtbl1_u8(g, tblIdx)
correct.val[2] = vtbl1_u8(b, tblIdx)
correct.val[3] = vtbl1_u8(a, tblIdx)
vst4_u8(result.data[result.dataIndex(x, y)].addr, correct)
x += 4

for x in x ..< resultEvenWidth:
let
a = src.data[topRowStart + x * 2]
b = src.data[topRowStart + x * 2 + 1]
c = src.data[bottomRowStart + x * 2 + 1]
d = src.data[bottomRowStart + x * 2]
mixed = rgbx(
((a.r.uint32 + b.r + c.r + d.r) div 4).uint8,
((a.g.uint32 + b.g + c.g + d.g) div 4).uint8,
((a.b.uint32 + b.b + c.b + d.b) div 4).uint8,
((a.a.uint32 + b.a + c.a + d.a) div 4).uint8
)
result.data[result.dataIndex(x, y)] = mixed

if srcWidthIsOdd:
let rgbx = mix(
src.data[src.dataIndex(src.width - 1, y * 2 + 0)],
src.data[src.dataIndex(src.width - 1, y * 2 + 1)],
0.5
) * 0.5
result.data[result.dataIndex(result.width - 1, y)] = rgbx

if srcHeightIsOdd:
for x in 0 ..< resultEvenWidth:
let rgbx = mix(
src.data[src.dataIndex(x * 2 + 0, src.height - 1)],
src.data[src.dataIndex(x * 2 + 1, src.height - 1)],
0.5
) * 0.5
result.data[result.dataIndex(x, result.height - 1)] = rgbx

if srcWidthIsOdd:
result.data[result.dataIndex(result.width - 1, result.height - 1)] =
src.data[src.dataIndex(src.width - 1, src.height - 1)] * 0.25

# Set src as this result for if we do another power
src = result

proc magnifyBy2Neon*(image: Image, power = 1): Image {.simd.} =
## Scales image up by 2 ^ power.
if power < 0:
raise newException(PixieError, "Cannot magnifyBy2 with negative power")

let scale = 2 ^ power
result = newImage(image.width * scale, image.height * scale)

for y in 0 ..< image.height:
# Write one row of pixels duplicated by scale
let
sourceRowStart = image.dataIndex(0, y)
resultRowStart = result.dataIndex(0, y * scale)
var x: int
if scale == 2:
template duplicate(vec: uint8x8): uint8x16 =
let duplicated = vzip_u8(vec, vec)
vcombine_u8(duplicated.val[0], duplicated.val[1])
while x <= image.width - 8:
let values = vld4_u8(image.data[sourceRowStart + x].addr)
var duplicated: uint8x16x4
duplicated.val[0] = duplicate(values.val[0])
duplicated.val[1] = duplicate(values.val[1])
duplicated.val[2] = duplicate(values.val[2])
duplicated.val[3] = duplicate(values.val[3])
vst4q_u8(result.data[resultRowStart + x * scale].addr, duplicated)
x += 8
for x in x ..< image.width:
let
rgbx = image.data[sourceRowStart + x]
resultIdx = resultRowStart + x * scale
for i in 0 ..< scale:
result.data[resultIdx + i] = rgbx
# Copy that row of pixels into (scale - 1) more rows
for i in 1 ..< scale:
copyMem(
result.data[resultRowStart + result.width * i].addr,
result.data[resultRowStart].addr,
result.width * 4
)

when defined(release):
{.pop.}
4 changes: 1 addition & 3 deletions src/pixie/simd/sse2.nim
Original file line number Diff line number Diff line change
Expand Up @@ -350,9 +350,7 @@ proc minifyBy2Sse2*(image: Image, power = 1): Image {.simd.} =
if srcWidthIsOdd: resultEvenWidth + 1 else: resultEvenWidth,
if srcHeightIsOdd: resultEvenHeight + 1 else: resultEvenHeight
)
let
oddMask = mm_set1_epi16(0xff00)
mergedMask = mm_set_epi32(0, uint32.high, 0, uint32.high)
let oddMask = mm_set1_epi16(0xff00)
for y in 0 ..< resultEvenHeight:
let
topRowStart = src.dataIndex(0, y * 2)
Expand Down