Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
File renamed without changes.
File renamed without changes.
6 changes: 3 additions & 3 deletions pixie.nimble
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,10 @@ srcDir = "src"

requires "nim >= 1.4.8"
requires "vmath >= 1.1.4"
requires "chroma >= 0.2.5"
requires "zippy >= 0.10.2"
requires "chroma >= 0.2.6"
requires "zippy >= 0.10.3"
requires "flatty >= 0.3.4"
requires "nimsimd >= 1.1.6"
requires "nimsimd >= 1.1.7"
requires "bumpy >= 1.1.1"

task bindings, "Generate bindings":
Expand Down
9 changes: 0 additions & 9 deletions src/pixie/blends.nim
Original file line number Diff line number Diff line change
Expand Up @@ -553,16 +553,11 @@ when defined(amd64) and allowSimd:
var
backdropEven = mm_slli_epi16(backdrop, 8)
backdropOdd = mm_and_si128(backdrop, oddMask)

# backdrop * k
backdropEven = mm_mulhi_epu16(backdropEven, evenK)
backdropOdd = mm_mulhi_epu16(backdropOdd, oddK)

# div 255
backdropEven = mm_srli_epi16(mm_mulhi_epu16(backdropEven, div255), 7)
backdropOdd = mm_srli_epi16(mm_mulhi_epu16(backdropOdd, div255), 7)

# Shift from high to low bits
sourceEven = mm_srli_epi16(sourceEven, 8)
sourceOdd = mm_srli_epi16(sourceOdd, 8)

Expand All @@ -582,12 +577,8 @@ when defined(amd64) and allowSimd:
var
backdropEven = mm_slli_epi16(backdrop, 8)
backdropOdd = mm_and_si128(backdrop, oddMask)

# backdrop * source
backdropEven = mm_mulhi_epu16(backdropEven, sourceEven)
backdropOdd = mm_mulhi_epu16(backdropOdd, sourceOdd)

# div 255
backdropEven = mm_srli_epi16(mm_mulhi_epu16(backdropEven, div255), 7)
backdropOdd = mm_srli_epi16(mm_mulhi_epu16(backdropOdd, div255), 7)

Expand Down
6 changes: 3 additions & 3 deletions src/pixie/internal.nim
Original file line number Diff line number Diff line change
Expand Up @@ -104,9 +104,9 @@ proc toPremultipliedAlpha*(
for i in 0 ..< data.len:
var c = data[i]
if c.a != 255:
c.r = ((c.r.uint32 * c.a) div 255).uint8
c.g = ((c.g.uint32 * c.a) div 255).uint8
c.b = ((c.b.uint32 * c.a) div 255).uint8
c.r = ((c.r.uint32 * c.a + 127) div 255).uint8
c.g = ((c.g.uint32 * c.a + 127) div 255).uint8
c.b = ((c.b.uint32 * c.a + 127) div 255).uint8
data[i] = c

proc isOpaque*(data: var seq[ColorRGBX], start, len: int): bool {.hasSimd.} =
Expand Down
25 changes: 15 additions & 10 deletions src/pixie/simd/avx2.nim
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,8 @@ proc toPremultipliedAlphaAvx2*(data: var seq[ColorRGBA | ColorRGBX]) {.simd.} =
let
alphaMask = mm256_set1_epi32(cast[int32](0xff000000))
oddMask = mm256_set1_epi16(0xff00)
div255 = mm256_set1_epi16(0x8081)
vec128 = mm256_set1_epi16(128)
hiMask = mm256_set1_epi16(255 shl 8)
iterations = data.len div 8
for _ in 0 ..< iterations:
let
Expand All @@ -112,20 +113,24 @@ proc toPremultipliedAlphaAvx2*(data: var seq[ColorRGBA | ColorRGBX]) {.simd.} =
colorsOdd = mm256_and_si256(values, oddMask)
colorsEven = mm256_mulhi_epu16(colorsEven, evenMultiplier)
colorsOdd = mm256_mulhi_epu16(colorsOdd, oddMultiplier)
colorsEven = mm256_srli_epi16(mm256_mulhi_epu16(colorsEven, div255), 7)
colorsOdd = mm256_srli_epi16(mm256_mulhi_epu16(colorsOdd, div255), 7)
mm256_storeu_si256(
data[i].addr,
mm256_or_si256(colorsEven, mm256_slli_epi16(colorsOdd, 8))
)
let
tmpEven = mm256_add_epi16(colorsEven, vec128)
tmpOdd = mm256_add_epi16(colorsOdd, vec128)
colorsEven = mm256_srli_epi16(tmpEven, 8)
colorsOdd = mm256_srli_epi16(tmpOdd, 8)
colorsEven = mm256_add_epi16(colorsEven, tmpEven)
colorsOdd = mm256_add_epi16(colorsOdd, tmpOdd)
colorsEven = mm256_srli_epi16(colorsEven, 8)
colorsOdd = mm256_and_si256(colorsOdd, hiMask)
mm256_storeu_si256(data[i].addr, mm256_or_si256(colorsEven, colorsOdd))
i += 8

for i in i ..< data.len:
var c = data[i]
if c.a != 255:
c.r = ((c.r.uint32 * c.a) div 255).uint8
c.g = ((c.g.uint32 * c.a) div 255).uint8
c.b = ((c.b.uint32 * c.a) div 255).uint8
c.r = ((c.r.uint32 * c.a + 127) div 255).uint8
c.g = ((c.g.uint32 * c.a + 127) div 255).uint8
c.b = ((c.b.uint32 * c.a + 127) div 255).uint8
data[i] = c

when defined(release):
Expand Down
37 changes: 37 additions & 0 deletions src/pixie/simd/neon.nim
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,43 @@ proc isOpaqueNeon*(data: var seq[ColorRGBX], start, len: int): bool {.simd.} =
if data[i].a != 255:
return false

proc toPremultipliedAlphaNeon*(data: var seq[ColorRGBA | ColorRGBX]) {.simd.} =
var
i: int
p = cast[uint](data[0].addr)
# Align to 16 bytes
while i < data.len and (p and 15) != 0:
var c = data[i]
if c.a != 255:
c.r = ((c.r.uint32 * c.a + 127) div 255).uint8
c.g = ((c.g.uint32 * c.a + 127) div 255).uint8
c.b = ((c.b.uint32 * c.a + 127) div 255).uint8
data[i] = c
inc i
p += 4

proc premultiply(c, a: uint8x8): uint8x8 {.inline.} =
let ca = vmull_u8(c, a)
vraddhn_u16(ca, vrshrq_n_u16(ca, 8))

let iterations = (data.len - i) div 8
for _ in 0 ..< iterations:
var channels = vld4_u8(cast[pointer](p))
channels.val[0] = premultiply(channels.val[0], channels.val[3])
channels.val[1] = premultiply(channels.val[1], channels.val[3])
channels.val[2] = premultiply(channels.val[2], channels.val[3])
vst4_u8(cast[pointer](p), channels)
p += 32
i += 8

for i in i ..< data.len:
var c = data[i]
if c.a != 255:
c.r = ((c.r.uint32 * c.a + 127) div 255).uint8
c.g = ((c.g.uint32 * c.a + 127) div 255).uint8
c.b = ((c.b.uint32 * c.a + 127) div 255).uint8
data[i] = c

proc newImageNeon*(mask: Mask): Image {.simd.} =
result = newImage(mask.width, mask.height)

Expand Down
25 changes: 15 additions & 10 deletions src/pixie/simd/sse2.nim
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,8 @@ proc toPremultipliedAlphaSse2*(data: var seq[ColorRGBA | ColorRGBX]) {.simd.} =
let
alphaMask = mm_set1_epi32(cast[int32](0xff000000))
oddMask = mm_set1_epi16(0xff00)
div255 = mm_set1_epi16(0x8081)
vec128 = mm_set1_epi16(128)
hiMask = mm_set1_epi16(255 shl 8)
iterations = data.len div 4
for _ in 0 ..< iterations:
let
Expand All @@ -186,20 +187,24 @@ proc toPremultipliedAlphaSse2*(data: var seq[ColorRGBA | ColorRGBX]) {.simd.} =
colorsOdd = mm_and_si128(values, oddMask)
colorsEven = mm_mulhi_epu16(colorsEven, evenMultiplier)
colorsOdd = mm_mulhi_epu16(colorsOdd, oddMultiplier)
colorsEven = mm_srli_epi16(mm_mulhi_epu16(colorsEven, div255), 7)
colorsOdd = mm_srli_epi16(mm_mulhi_epu16(colorsOdd, div255), 7)
mm_storeu_si128(
data[i].addr,
mm_or_si128(colorsEven, mm_slli_epi16(colorsOdd, 8))
)
let
tmpEven = mm_add_epi16(colorsEven, vec128)
tmpOdd = mm_add_epi16(colorsOdd, vec128)
colorsEven = mm_srli_epi16(tmpEven, 8)
colorsOdd = mm_srli_epi16(tmpOdd, 8)
colorsEven = mm_add_epi16(colorsEven, tmpEven)
colorsOdd = mm_add_epi16(colorsOdd, tmpOdd)
colorsEven = mm_srli_epi16(colorsEven, 8)
colorsOdd = mm_and_si128(colorsOdd, hiMask)
mm_storeu_si128(data[i].addr, mm_or_si128(colorsEven, colorsOdd))
i += 4

for i in i ..< data.len:
var c = data[i]
if c.a != 255:
c.r = ((c.r.uint32 * c.a) div 255).uint8
c.g = ((c.g.uint32 * c.a) div 255).uint8
c.b = ((c.b.uint32 * c.a) div 255).uint8
c.r = ((c.r.uint32 * c.a + 127) div 255).uint8
c.g = ((c.g.uint32 * c.a + 127) div 255).uint8
c.b = ((c.b.uint32 * c.a + 127) div 255).uint8
data[i] = c

proc newImageSse2*(mask: Mask): Image {.simd.} =
Expand Down
28 changes: 27 additions & 1 deletion tests/test_images.nim
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ block:
let a = newImage(100, 100)
a.fill(rgbx(50, 100, 150, 200))
a.invert()
doAssert a[0, 0] == rgbx(44, 33, 22, 55)
doAssert a[0, 0] == rgbx(44, 33, 23, 55)

block:
let ctx = newContext(100, 100)
Expand Down Expand Up @@ -226,3 +226,29 @@ block:
292.0, 45.0, 1.0
)
)

block:
var
colors: seq[ColorRGBA]
premultiplied: seq[ColorRGBX]
for a in 0.uint8 .. 255:
for r in 0.uint8 .. 255:
let
rgba = rgba(r, 0, 0, a)
floats = rgba.color()
premul = color(floats.r * floats.a, 0, 0, floats.a)
rgbx = rgbx(
round(premul.r * 255).uint8,
0,
0,
round(premul.a * 255).uint8
)
colors.add(rgba)
premultiplied.add(rgbx)

var converted = cast[seq[ColorRGBX]](colors)
toPremultipliedAlpha(converted)

for i in 0 ..< premultiplied.len:
doAssert premultiplied[i] == converted[i]
doAssert colors[i].rgbx == converted[i]
2 changes: 1 addition & 1 deletion tests/test_images_draw.nim
Original file line number Diff line number Diff line change
Expand Up @@ -279,7 +279,7 @@ block:
image.draw(strokeImage)

image.xray("tests/images/fillOptimization.png")
doAssert image[10, 10] == rgbx(255, 127, 63, 255)
doAssert image[10, 10] == rgbx(255, 128, 64, 255)

block:
let a = newImage(100, 100)
Expand Down