Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions experiments/benchmark_cairo.nim
Original file line number Diff line number Diff line change
Expand Up @@ -194,6 +194,8 @@ block:
surface = imageSurfaceCreate(FORMAT_ARGB32, 900, 900)
ctx = surface.create()

ctx.setLineWidth(1)

timeIt "[cairo] " & benchmark.name:
for fill in benchmark.fills:
if fill.shapes.len > 0:
Expand Down Expand Up @@ -221,6 +223,7 @@ block:
FillRuleEvenOdd
)
ctx.fill()
# ctx.stroke()

# discard surface.writeToPng(("cairo_" & benchmark.name & ".png").cstring)

Expand All @@ -242,5 +245,11 @@ block:
fill.transform,
fill.windingRule
)
# image.strokePath(
# p,
# fill.paint,
# fill.transform,
# 1
# )

# image.writeFile("pixie_" & benchmark.name & ".png")
2 changes: 1 addition & 1 deletion pixie.nimble
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ requires "vmath >= 1.1.4"
requires "chroma >= 0.2.5"
requires "zippy >= 0.10.2"
requires "flatty >= 0.3.4"
requires "nimsimd >= 1.1.1"
requires "nimsimd >= 1.1.5"
requires "bumpy >= 1.1.1"

task bindings, "Generate bindings":
Expand Down
2 changes: 1 addition & 1 deletion src/pixie/fileformats/jpeg.nim
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import chroma, flatty/binny, pixie/common, pixie/images, pixie/internal,
pixie/masks, sequtils, std/decls, strutils
pixie/masks, std/decls, std/sequtils, std/strutils

when defined(amd64) and allowSimd:
import nimsimd/sse2
Expand Down
76 changes: 53 additions & 23 deletions src/pixie/images.nim
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import blends, bumpy, chroma, common, masks, pixie/internal, vmath

when defined(amd64) and allowSimd:
import nimsimd/sse2
import nimsimd/sse2, runtimechecked/avx2

const h = 0.5.float32

Expand Down Expand Up @@ -101,54 +101,84 @@ proc fill*(image: Image, color: SomeColor) {.inline, raises: [].} =

proc isOneColor*(image: Image): bool {.raises: [].} =
## Checks if the entire image is the same color.
when defined(amd64) and allowSimd:
if cpuHasAvx2:
return isOneColorAvx2(image.data, 0, image.data.len)

result = true

let color = image.data[0]

var i: int
when defined(amd64) and allowSimd:
let colorVec = mm_set1_epi32(cast[int32](color))
for _ in 0 ..< image.data.len div 16:
# Align to 16 bytes
var p = cast[uint](image.data[i].addr)
while i < image.data.len and (p and 15) != 0:
if image.data[i] != color:
return false
inc i
p += 4

let
colorVec = mm_set1_epi32(cast[int32](color))
iterations = (image.data.len - i) div 16
for _ in 0 ..< iterations:
let
values0 = mm_loadu_si128(image.data[i + 0].addr)
values1 = mm_loadu_si128(image.data[i + 4].addr)
values2 = mm_loadu_si128(image.data[i + 8].addr)
values3 = mm_loadu_si128(image.data[i + 12].addr)
values0 = mm_load_si128(cast[pointer](p))
values1 = mm_load_si128(cast[pointer](p + 16))
values2 = mm_load_si128(cast[pointer](p + 32))
values3 = mm_load_si128(cast[pointer](p + 48))
eq0 = mm_cmpeq_epi8(values0, colorVec)
eq1 = mm_cmpeq_epi8(values1, colorVec)
eq2 = mm_cmpeq_epi8(values2, colorVec)
eq3 = mm_cmpeq_epi8(values3, colorVec)
eq = mm_and_si128(mm_and_si128(eq0, eq1), mm_and_si128(eq2, eq3))
if mm_movemask_epi8(eq) != 0xffff:
eq0123 = mm_and_si128(mm_and_si128(eq0, eq1), mm_and_si128(eq2, eq3))
if mm_movemask_epi8(eq0123) != 0xffff:
return false
i += 16
p += 64
i += 16 * iterations

for j in i ..< image.data.len:
if image.data[j] != color:
for i in i ..< image.data.len:
if image.data[i] != color:
return false

proc isTransparent*(image: Image): bool {.raises: [].} =
## Checks if this image is fully transparent or not.
when defined(amd64) and allowSimd:
if cpuHasAvx2:
return isTransparentAvx2(image.data, 0, image.data.len)

result = true

var i: int
when defined(amd64) and allowSimd:
let vecZero = mm_setzero_si128()
for _ in 0 ..< image.data.len div 16:
# Align to 16 bytes
var p = cast[uint](image.data[i].addr)
while i < image.data.len and (p and 15) != 0:
if image.data[i].a != 0:
return false
inc i
p += 4

let
vecZero = mm_setzero_si128()
iterations = (image.data.len - i) div 16
for _ in 0 ..< iterations:
let
values0 = mm_loadu_si128(image.data[i + 0].addr)
values1 = mm_loadu_si128(image.data[i + 4].addr)
values2 = mm_loadu_si128(image.data[i + 8].addr)
values3 = mm_loadu_si128(image.data[i + 12].addr)
values0 = mm_load_si128(cast[pointer](p))
values1 = mm_load_si128(cast[pointer](p + 16))
values2 = mm_load_si128(cast[pointer](p + 32))
values3 = mm_load_si128(cast[pointer](p + 48))
values01 = mm_or_si128(values0, values1)
values23 = mm_or_si128(values2, values3)
values = mm_or_si128(values01, values23)
if mm_movemask_epi8(mm_cmpeq_epi8(values, vecZero)) != 0xffff:
values0123 = mm_or_si128(values01, values23)
if mm_movemask_epi8(mm_cmpeq_epi8(values0123, vecZero)) != 0xffff:
return false
i += 16
p += 64
i += 16 * iterations

for j in i ..< image.data.len:
if image.data[j].a != 0:
for i in i ..< image.data.len:
if image.data[i].a != 0:
return false

proc isOpaque*(image: Image): bool {.raises: [].} =
Expand Down
109 changes: 64 additions & 45 deletions src/pixie/internal.nim
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,10 @@ import bumpy, chroma, common, system/memory, vmath
const allowSimd* = not defined(pixieNoSimd) and not defined(tcc)

when defined(amd64) and allowSimd:
import nimsimd/runtimecheck, nimsimd/sse2, simd/avx
let cpuHasAvx* = checkInstructionSets({AVX})
import nimsimd/runtimecheck, nimsimd/sse2, runtimechecked/avx, runtimechecked/avx2
let
cpuHasAvx* = checkInstructionSets({AVX})
cpuHasAvx2* = checkInstructionSets({AVX, AVX2})

template currentExceptionAsPixieError*(): untyped =
## Gets the current exception and returns it as a PixieError with stack trace.
Expand Down Expand Up @@ -141,70 +143,87 @@ proc toPremultipliedAlpha*(data: var seq[ColorRGBA | ColorRGBX]) {.raises: [].}
## Converts an image to premultiplied alpha from straight alpha.
var i: int
when defined(amd64) and allowSimd:
# When supported, SIMD convert as much as possible
let
alphaMask = mm_set1_epi32(cast[int32](0xff000000))
oddMask = mm_set1_epi16(cast[int16](0xff00))
div255 = mm_set1_epi16(cast[int16](0x8081))
for _ in 0 ..< data.len div 4:
if cpuHasAvx2:
i = toPremultipliedAlphaAvx2(data)
else:
let
values = mm_loadu_si128(data[i].addr)
alpha = mm_and_si128(values, alphaMask)
eq = mm_cmpeq_epi8(values, alphaMask)
if (mm_movemask_epi8(eq) and 0x00008888) != 0x00008888:
alphaMask = mm_set1_epi32(cast[int32](0xff000000))
oddMask = mm_set1_epi16(cast[int16](0xff00))
div255 = mm_set1_epi16(cast[int16](0x8081))
for _ in 0 ..< data.len div 4:
let
evenMultiplier = mm_or_si128(alpha, mm_srli_epi32(alpha, 16))
oddMultiplier = mm_or_si128(evenMultiplier, alphaMask)
var
colorsEven = mm_slli_epi16(values, 8)
colorsOdd = mm_and_si128(values, oddMask)
colorsEven = mm_mulhi_epu16(colorsEven, evenMultiplier)
colorsOdd = mm_mulhi_epu16(colorsOdd, oddMultiplier)
colorsEven = mm_srli_epi16(mm_mulhi_epu16(colorsEven, div255), 7)
colorsOdd = mm_srli_epi16(mm_mulhi_epu16(colorsOdd, div255), 7)
mm_storeu_si128(
data[i].addr,
mm_or_si128(colorsEven, mm_slli_epi16(colorsOdd, 8))
)
i += 4
values = mm_loadu_si128(data[i].addr)
alpha = mm_and_si128(values, alphaMask)
eq = mm_cmpeq_epi8(values, alphaMask)
if (mm_movemask_epi8(eq) and 0x00008888) != 0x00008888:
let
evenMultiplier = mm_or_si128(alpha, mm_srli_epi32(alpha, 16))
oddMultiplier = mm_or_si128(evenMultiplier, alphaMask)
var
colorsEven = mm_slli_epi16(values, 8)
colorsOdd = mm_and_si128(values, oddMask)
colorsEven = mm_mulhi_epu16(colorsEven, evenMultiplier)
colorsOdd = mm_mulhi_epu16(colorsOdd, oddMultiplier)
colorsEven = mm_srli_epi16(mm_mulhi_epu16(colorsEven, div255), 7)
colorsOdd = mm_srli_epi16(mm_mulhi_epu16(colorsOdd, div255), 7)
mm_storeu_si128(
data[i].addr,
mm_or_si128(colorsEven, mm_slli_epi16(colorsOdd, 8))
)
i += 4

# Convert whatever is left
for j in i ..< data.len:
var c = data[j]
for i in i ..< data.len:
var c = data[i]
if c.a != 255:
c.r = ((c.r.uint32 * c.a.uint32) div 255).uint8
c.g = ((c.g.uint32 * c.a.uint32) div 255).uint8
c.b = ((c.b.uint32 * c.a.uint32) div 255).uint8
data[j] = c
c.r = ((c.r.uint32 * c.a) div 255).uint8
c.g = ((c.g.uint32 * c.a) div 255).uint8
c.b = ((c.b.uint32 * c.a) div 255).uint8
data[i] = c

proc isOpaque*(data: var seq[ColorRGBX], start, len: int): bool =
when defined(amd64) and allowSimd:
if cpuHasAvx2 and len >= 64:
return isOpaqueAvx2(data, start, len)

result = true

var i = start
when defined(amd64) and allowSimd:
let vec255 = mm_set1_epi32(cast[int32](uint32.high))
for _ in start ..< (start + len) div 16:
# Align to 16 bytes
var p = cast[uint](data[i].addr)
while i < (start + len) and (p and 15) != 0:
if data[i].a != 255:
return false
inc i
p += 4

let
vec255 = mm_set1_epi8(255)
iterations = (start + len - i) div 16
for _ in 0 ..< iterations:
let
values0 = mm_loadu_si128(data[i + 0].addr)
values1 = mm_loadu_si128(data[i + 4].addr)
values2 = mm_loadu_si128(data[i + 8].addr)
values3 = mm_loadu_si128(data[i + 12].addr)
values0 = mm_load_si128(cast[pointer](p))
values1 = mm_load_si128(cast[pointer](p + 16))
values2 = mm_load_si128(cast[pointer](p + 32))
values3 = mm_load_si128(cast[pointer](p + 48))
values01 = mm_and_si128(values0, values1)
values23 = mm_and_si128(values2, values3)
values = mm_and_si128(values01, values23)
eq = mm_cmpeq_epi8(values, vec255)
values0123 = mm_and_si128(values01, values23)
eq = mm_cmpeq_epi8(values0123, vec255)
if (mm_movemask_epi8(eq) and 0x00008888) != 0x00008888:
return false
i += 16
p += 64
i += 16 * iterations

for j in i ..< start + len:
if data[j].a != 255:
for i in i ..< start + len:
if data[i].a != 255:
return false

when defined(amd64) and allowSimd:
proc applyOpacity*(color: M128, opacity: float32): ColorRGBX {.inline.} =
let opacityVec = mm_set1_ps(opacity)
var finalColor = mm_cvtps_epi32(mm_mul_ps(color, opacityVec))
let opacityVec = mm_set1_ps(opacity)
var finalColor = mm_cvtps_epi32(mm_mul_ps(color, opacityVec))
finalColor = mm_packus_epi16(finalColor, mm_setzero_si128())
finalColor = mm_packus_epi16(finalColor, mm_setzero_si128())
cast[ColorRGBX](mm_cvtsi128_si32(finalColor))
Expand Down
Loading