Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 6 additions & 27 deletions src/pixie/images.nim
Original file line number Diff line number Diff line change
Expand Up @@ -33,11 +33,7 @@ proc newImage*(mask: Mask): Image {.raises: [PixieError].} =
result = newImage(mask.width, mask.height)

when allowSimd and compiles(newImageFromMaskSimd):
newImageFromMaskSimd(
cast[ptr UncheckedArray[ColorRGBX]](result.data[0].addr),
cast[ptr UncheckedArray[uint8]](mask.data[0].addr),
mask.data.len
)
newImageFromMaskSimd(result.data, mask.data)
return

for i in 0 ..< mask.data.len:
Expand Down Expand Up @@ -102,10 +98,7 @@ proc fill*(image: Image, color: SomeColor) {.inline, raises: [].} =
proc isOneColor*(image: Image): bool {.raises: [].} =
## Checks if the entire image is the same color.
when allowSimd and compiles(isOneColorSimd):
return isOneColorSimd(
cast[ptr UncheckedArray[ColorRGBX]](image.data[0].addr),
image.data.len
)
return isOneColorSimd(image.data)

result = true

Expand All @@ -117,10 +110,7 @@ proc isOneColor*(image: Image): bool {.raises: [].} =
proc isTransparent*(image: Image): bool {.raises: [].} =
## Checks if this image is fully transparent or not.
when allowSimd and compiles(isTransparentSimd):
return isTransparentSimd(
cast[ptr UncheckedArray[ColorRGBX]](image.data[0].addr),
image.data.len
)
return isTransparentSimd(image.data)

result = true

Expand Down Expand Up @@ -368,11 +358,7 @@ proc applyOpacity*(image: Image, opacity: float32) {.raises: [].} =
return

when allowSimd and compiles(applyOpacitySimd):
applyOpacitySimd(
cast[ptr UncheckedArray[uint8]](image.data[0].addr),
image.data.len * 4,
opacity
)
applyOpacitySimd(image.data, opacity)
return

for i in 0 ..< image.data.len:
Expand All @@ -386,10 +372,7 @@ proc applyOpacity*(image: Image, opacity: float32) {.raises: [].} =
proc invert*(image: Image) {.raises: [].} =
## Inverts all of the colors and alpha.
when allowSimd and compiles(invertImageSimd):
invertImageSimd(
cast[ptr UncheckedArray[ColorRGBX]](image.data[0].addr),
image.data.len
)
invertImageSimd(image.data)
return

for i in 0 ..< image.data.len:
Expand Down Expand Up @@ -471,11 +454,7 @@ proc newMask*(image: Image): Mask {.raises: [PixieError].} =
result = newMask(image.width, image.height)

when allowSimd and compiles(newMaskFromImageSimd):
newMaskFromImageSimd(
cast[ptr UncheckedArray[uint8]](result.data[0].addr),
cast[ptr UncheckedArray[ColorRGBX]](image.data[0].addr),
image.data.len
)
newMaskFromImageSimd(result.data, image.data)
return

for i in 0 ..< image.data.len:
Expand Down
20 changes: 5 additions & 15 deletions src/pixie/internal.nim
Original file line number Diff line number Diff line change
Expand Up @@ -79,16 +79,12 @@ proc fillUnsafe*(
) {.raises: [].} =
## Fills the image data with the color starting at index start and
## continuing for len indices.
let rgbx = color.asRgbx()

when allowSimd and compiles(fillUnsafeSimd):
fillUnsafeSimd(
cast[ptr UncheckedArray[ColorRGBX]](data[start].addr),
len,
rgbx
)
fillUnsafeSimd(data, start, len, color)
return

let rgbx = color.asRgbx()

# Use memset when every byte has the same value
if rgbx.r == rgbx.g and rgbx.r == rgbx.b and rgbx.r == rgbx.a:
nimSetMem(data[start].addr, rgbx.r.cint, len * 4)
Expand Down Expand Up @@ -117,10 +113,7 @@ proc toStraightAlpha*(data: var seq[ColorRGBA | ColorRGBX]) {.raises: [].} =
proc toPremultipliedAlpha*(data: var seq[ColorRGBA | ColorRGBX]) {.raises: [].} =
## Converts an image to premultiplied alpha from straight alpha.
when allowSimd and compiles(toPremultipliedAlphaSimd):
toPremultipliedAlphaSimd(
cast[ptr UncheckedArray[uint32]](data[0].addr),
data.len
)
toPremultipliedAlphaSimd(data)
return

for i in 0 ..< data.len:
Expand All @@ -133,10 +126,7 @@ proc toPremultipliedAlpha*(data: var seq[ColorRGBA | ColorRGBX]) {.raises: [].}

proc isOpaque*(data: var seq[ColorRGBX], start, len: int): bool =
when allowSimd and compiles(isOpaqueSimd):
return isOpaqueSimd(
cast[ptr UncheckedArray[ColorRGBX]](data[start].addr),
len
)
return isOpaqueSimd(data, start, len)

result = true

Expand Down
25 changes: 9 additions & 16 deletions src/pixie/masks.nim
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
import common, internal, vmath

when defined(amd64) and allowSimd:
import nimsimd/sse2
when allowSimd:
import simd

when defined(amd64):
import nimsimd/sse2

type
Mask* = ref object
Expand Down Expand Up @@ -194,11 +197,7 @@ proc applyOpacity*(mask: Mask, opacity: float32) {.raises: [].} =
return

when allowSimd and compiles(applyOpacitySimd):
applyOpacitySimd(
cast[ptr UncheckedArray[uint8]](mask.data[0].addr),
mask.data.len,
opacity
)
applyOpacitySimd(mask.data, opacity)
return

for i in 0 ..< mask.data.len:
Expand Down Expand Up @@ -234,11 +233,8 @@ proc getValueSmooth*(mask: Mask, x, y: float32): uint8 {.raises: [].} =

proc invert*(mask: Mask) {.raises: [].} =
## Inverts all of the values - creates a negative of the mask.
when allowSimd and compiles(invertImageSimd):
invertMaskSimd(
cast[ptr UncheckedArray[uint8]](mask.data[0].addr),
mask.data.len
)
when allowSimd and compiles(invertMaskSimd):
invertMaskSimd(mask.data)
return

for i in 0 ..< mask.data.len:
Expand Down Expand Up @@ -308,10 +304,7 @@ proc spread*(mask: Mask, spread: float32) {.raises: [PixieError].} =
proc ceil*(mask: Mask) {.raises: [].} =
## A value of 0 stays 0. Anything else turns into 255.
when allowSimd and compiles(invertImageSimd):
ceilMaskSimd(
cast[ptr UncheckedArray[uint8]](mask.data[0].addr),
mask.data.len
)
ceilMaskSimd(mask.data)
return

for i in 0 ..< mask.data.len:
Expand Down
27 changes: 17 additions & 10 deletions src/pixie/runtimechecked/avx.nim
Original file line number Diff line number Diff line change
Expand Up @@ -7,23 +7,30 @@ when defined(release):
{.push checks: off.}

proc fillUnsafeAvx*(
data: ptr UncheckedArray[ColorRGBX],
len: int,
rgbx: ColorRGBX
data: var seq[ColorRGBX],
start, len: int,
color: SomeColor
) =
var i: int
while i < len and (cast[uint](data[i].addr) and 31) != 0: # Align to 32 bytes
let rgbx = color.asRgbx()

var
i = start
p = cast[uint](data[i].addr)
# Align to 32 bytes
while i < (start + len) and (p and 31) != 0:
data[i] = rgbx
inc i
p += 4

let
iterations = (len - i) div 8
colorVec = mm256_set1_epi32(cast[int32](rgbx))
iterations = (start + len - i) div 8
for _ in 0 ..< iterations:
mm256_store_si256(data[i].addr, colorVec)
i += 8
# Fill whatever is left the slow way
for i in i ..< len:
mm256_store_si256(cast[pointer](p), colorVec)
p += 32
i += 8 * iterations

for i in i ..< start + len:
data[i] = rgbx

when defined(release):
Expand Down
57 changes: 34 additions & 23 deletions src/pixie/runtimechecked/avx2.nim
Original file line number Diff line number Diff line change
Expand Up @@ -6,20 +6,21 @@ when defined(gcc) or defined(clang):
when defined(release):
{.push checks: off.}

proc isOneColorAvx2*(data: ptr UncheckedArray[ColorRGBX], len: int): bool =
proc isOneColorAvx2*(data: var seq[ColorRGBX]): bool =
result = true

let color = data[0]

var i: int
while i < len and (cast[uint](data[i].addr) and 31) != 0: # Align to 32 bytes
# Align to 32 bytes
while i < data.len and (cast[uint](data[i].addr) and 31) != 0:
if data[i] != color:
return false
inc i

let
colorVec = mm256_set1_epi32(cast[int32](color))
iterations = (len - i) div 16
iterations = (data.len - i) div 16
for _ in 0 ..< iterations:
let
values0 = mm256_load_si256(data[i].addr)
Expand All @@ -31,22 +32,23 @@ proc isOneColorAvx2*(data: ptr UncheckedArray[ColorRGBX], len: int): bool =
return false
i += 16

for i in i ..< len:
for i in i ..< data.len:
if data[i] != color:
return false

proc isTransparentAvx2*(data: ptr UncheckedArray[ColorRGBX], len: int): bool =
proc isTransparentAvx2*(data: var seq[ColorRGBX]): bool =
result = true

var i: int
while i < len and (cast[uint](data[i].addr) and 31) != 0: # Align to 32 bytes
# Align to 32 bytes
while i < data.len and (cast[uint](data[i].addr) and 31) != 0:
if data[i].a != 0:
return false
inc i

let
vecZero = mm256_setzero_si256()
iterations = (len - i) div 16
iterations = (data.len - i) div 16
for _ in 0 ..< iterations:
let
values0 = mm256_load_si256(data[i].addr)
Expand All @@ -57,22 +59,23 @@ proc isTransparentAvx2*(data: ptr UncheckedArray[ColorRGBX], len: int): bool =
return false
i += 16

for i in i ..< len:
for i in i ..< data.len:
if data[i].a != 0:
return false

proc isOpaqueAvx2*(data: ptr UncheckedArray[ColorRGBX], len: int): bool =
proc isOpaqueAvx2*(data: var seq[ColorRGBX], start, len: int): bool =
result = true

var i: int
while i < len and (cast[uint](data[i].addr) and 31) != 0: # Align to 32 bytes
var i = start
# Align to 32 bytes
while i < (start + len) and (cast[uint](data[i].addr) and 31) != 0:
if data[i].a != 255:
return false
inc i

let
vec255 = mm256_set1_epi8(255)
iterations = (len - i) div 16
iterations = (start + len - i) div 16
for _ in 0 ..< iterations:
let
values0 = mm256_load_si256(data[i].addr)
Expand All @@ -83,21 +86,21 @@ proc isOpaqueAvx2*(data: ptr UncheckedArray[ColorRGBX], len: int): bool =
return false
i += 16

for i in i ..< len:
for i in i ..< start + len:
if data[i].a != 255:
return false

proc toPremultipliedAlphaAvx2*(
data: ptr UncheckedArray[uint32],
len: int
): int =
proc toPremultipliedAlphaAvx2*(data: var seq[ColorRGBA | ColorRGBX]) =
var i: int

let
alphaMask = mm256_set1_epi32(cast[int32](0xff000000))
oddMask = mm256_set1_epi16(cast[int16](0xff00))
div255 = mm256_set1_epi16(cast[int16](0x8081))
for _ in 0 ..< len div 8:
oddMask = mm256_set1_epi16(0xff00)
div255 = mm256_set1_epi16(0x8081)
iterations = data.len div 8
for _ in 0 ..< iterations:
let
values = mm256_loadu_si256(data[result].addr)
values = mm256_loadu_si256(data[i].addr)
alpha = mm256_and_si256(values, alphaMask)
eq = mm256_cmpeq_epi8(values, alphaMask)
if (mm256_movemask_epi8(eq) and 0x88888888) != 0x88888888:
Expand All @@ -112,10 +115,18 @@ proc toPremultipliedAlphaAvx2*(
colorsEven = mm256_srli_epi16(mm256_mulhi_epu16(colorsEven, div255), 7)
colorsOdd = mm256_srli_epi16(mm256_mulhi_epu16(colorsOdd, div255), 7)
mm256_storeu_si256(
data[result].addr,
data[i].addr,
mm256_or_si256(colorsEven, mm256_slli_epi16(colorsOdd, 8))
)
result += 8
i += 8

for i in i ..< data.len:
var c = data[i]
if c.a != 255:
c.r = ((c.r.uint32 * c.a) div 255).uint8
c.g = ((c.g.uint32 * c.a) div 255).uint8
c.b = ((c.b.uint32 * c.a) div 255).uint8
data[i] = c

when defined(release):
{.pop.}
Loading