Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 14 additions & 12 deletions src/pixie/images.nim
Original file line number Diff line number Diff line change
Expand Up @@ -77,23 +77,25 @@ proc isOpaque*(image: Image): bool {.raises: [].} =

proc flipHorizontal*(image: Image) {.raises: [].} =
## Flips the image around the Y axis.
let w = image.width div 2
let halfWidth = image.width div 2
for y in 0 ..< image.height:
for x in 0 ..< w:
swap(
image.data[image.dataIndex(x, y)],
image.data[image.dataIndex(image.width - x - 1, y)]
)
var
left = image.dataIndex(0, y)
right = left + image.width - 1
for x in 0 ..< halfWidth:
swap(image.data[left], image.data[right])
inc left
dec right

proc flipVertical*(image: Image) {.raises: [].} =
## Flips the image around the X axis.
let h = image.height div 2
for y in 0 ..< h:
let halfHeight = image.height div 2
for y in 0 ..< halfHeight:
let
topStart = image.dataIndex(0, y)
bottomStart = image.dataIndex(0, image.height - y - 1)
for x in 0 ..< image.width:
swap(
image.data[image.dataIndex(x, y)],
image.data[image.dataIndex(x, image.height - y - 1)]
)
swap(image.data[topStart + x], image.data[bottomStart + x])

proc rotate90*(image: Image) {.raises: [PixieError].} =
## Rotates the image 90 degrees clockwise.
Expand Down
13 changes: 6 additions & 7 deletions src/pixie/simd/avx2.nim
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ proc toPremultipliedAlphaAvx2*(data: var seq[ColorRGBA | ColorRGBX]) {.simd.} =
oddMask = mm256_set1_epi16(0xff00)
vec128 = mm256_set1_epi16(128)
hiMask = mm256_set1_epi16(255 shl 8)
iterations = data.len div 8
iterations = (data.len - i) div 8
for _ in 0 ..< iterations:
let
values = mm256_load_si256(cast[pointer](p))
Expand Down Expand Up @@ -163,7 +163,7 @@ proc invertAvx2*(image: Image) {.simd.} =

let
vec255 = mm256_set1_epi8(255)
iterations = image.data.len div 16
iterations = (image.data.len - i) div 16
for _ in 0 ..< iterations:
let
a = mm256_load_si256(cast[pointer](p))
Expand Down Expand Up @@ -211,7 +211,7 @@ proc applyOpacityAvx2*(image: Image, opacity: float32) {.simd.} =
div255 = mm256_set1_epi16(0x8081)
zeroVec = mm256_setzero_si256()
opacityVec = mm256_slli_epi16(mm256_set1_epi16(opacity), 8)
iterations = image.data.len div 8
iterations = (image.data.len - i) div 8
for _ in 0 ..< iterations:
let
values = mm256_load_si256(cast[pointer](p))
Expand Down Expand Up @@ -257,7 +257,7 @@ proc ceilAvx2*(image: Image) {.simd.} =
let
vecZero = mm256_setzero_si256()
vec255 = mm256_set1_epi8(255)
iterations = image.data.len div 8
iterations = (image.data.len - i) div 8
for _ in 0 ..< iterations:
var values = mm256_load_si256(cast[pointer](p))
values = mm256_cmpeq_epi8(values, vecZero)
Expand Down Expand Up @@ -330,9 +330,8 @@ proc minifyBy2Avx2*(image: Image, power = 1): Image {.simd.} =
addedOddDiv4 = mm256_srli_epi16(addedOdd, 2)
merged = mm256_or_si256(addedEvenDiv4, mm256_slli_epi16(addedOddDiv4, 8))
# Merged has the correct values for the next two pixels at
# index 0, 2, 4, 6 so mask the others out and permute into position
masked = mm256_and_si256(merged, mergedMask)
permuted = mm_256_permutevar8x32_epi32(masked, permuteControl)
# index 0, 2, 4, 6 so permute into position and store
permuted = mm_256_permutevar8x32_epi32(merged, permuteControl)
mm_storeu_si128(
result.data[result.dataIndex(x, y)].addr,
mm256_castsi256_si128(permuted)
Expand Down
25 changes: 25 additions & 0 deletions src/pixie/simd/neon.nim
Original file line number Diff line number Diff line change
Expand Up @@ -236,5 +236,30 @@ proc applyOpacityNeon*(image: Image, opacity: float32) {.simd.} =
rgbx.a = ((rgbx.a * opacity) div 255).uint8
image.data[i] = rgbx

proc ceilNeon*(image: Image) {.simd.} =
var
i: int
p = cast[uint](image.data[0].addr)

let
zeroVec = vmovq_n_u8(0)
vec255 = vmovq_n_u8(255)
iterations = image.data.len div 4
for _ in 0 ..< iterations:
var values = vld1q_u8(cast[pointer](p))
values = vceqq_u8(values, zeroVec)
values = vbicq_u8(vec255, values)
vst1q_u8(cast[pointer](p), values)
p += 16
i += 4 * iterations

for i in i ..< image.data.len:
var rgbx = image.data[i]
rgbx.r = if rgbx.r == 0: 0 else: 255
rgbx.g = if rgbx.g == 0: 0 else: 255
rgbx.b = if rgbx.b == 0: 0 else: 255
rgbx.a = if rgbx.a == 0: 0 else: 255
image.data[i] = rgbx

when defined(release):
{.pop.}
17 changes: 7 additions & 10 deletions src/pixie/simd/sse2.nim
Original file line number Diff line number Diff line change
Expand Up @@ -212,7 +212,7 @@ proc invertSse2*(image: Image) {.simd.} =

let
vec255 = mm_set1_epi8(255)
iterations = image.data.len div 16
iterations = (image.data.len - i) div 16
for _ in 0 ..< iterations:
let
a = mm_load_si128(cast[pointer](p))
Expand Down Expand Up @@ -264,7 +264,7 @@ proc applyOpacitySse2*(image: Image, opacity: float32) {.simd.} =
div255 = mm_set1_epi16(0x8081)
zeroVec = mm_setzero_si128()
opacityVec = mm_slli_epi16(mm_set1_epi16(opacity), 8)
iterations = image.data.len div 4
iterations = (image.data.len - i) div 4
for _ in 0 ..< iterations:
let values = mm_loadu_si128(cast[pointer](p))
if mm_movemask_epi8(mm_cmpeq_epi16(values, zeroVec)) != 0xffff:
Expand Down Expand Up @@ -308,7 +308,7 @@ proc ceilSse2*(image: Image) {.simd.} =
let
vecZero = mm_setzero_si128()
vec255 = mm_set1_epi8(255)
iterations = image.data.len div 8
iterations = (image.data.len - i) div 8
for _ in 0 ..< iterations:
var
values0 = mm_loadu_si128(cast[pointer](p))
Expand Down Expand Up @@ -383,13 +383,10 @@ proc minifyBy2Sse2*(image: Image, power = 1): Image {.simd.} =
addedOddDiv4 = mm_srli_epi16(addedOdd, 2)
merged = mm_or_si128(addedEvenDiv4, mm_slli_epi16(addedOddDiv4, 8))
# Merged has the correct values for the next two pixels at
# index 0 and 2 so mask the others out and shift 0 and 2 into
# position and store
masked = mm_and_si128(merged, mergedMask)
mm_storeu_si128(
result.data[result.dataIndex(x, y)].addr,
mm_shuffle_epi32(masked, MM_SHUFFLE(3, 3, 2, 0))
)
# index 0 and 2 so shift 0 and 2 into position and store
shuffled = mm_shuffle_epi32(merged, MM_SHUFFLE(3, 3, 2, 0))
lower = mm_cvtsi128_si64(shuffled)
copyMem(result.data[result.dataIndex(x, y)].addr, lower.unsafeAddr, 8)
x += 2

for x in x ..< resultEvenWidth:
Expand Down