Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pixie.nimble
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ requires "vmath >= 1.1.4"
requires "chroma >= 0.2.6"
requires "zippy >= 0.10.3"
requires "flatty >= 0.3.4"
requires "nimsimd >= 1.1.7"
requires "nimsimd >= 1.1.8"
requires "bumpy >= 1.1.1"

task bindings, "Generate bindings":
Expand Down
20 changes: 10 additions & 10 deletions src/pixie/images.nim
Original file line number Diff line number Diff line change
Expand Up @@ -320,38 +320,38 @@ proc magnifyBy2*(image: Image, power = 1): Image {.raises: [PixieError].} =
result.width * 4
)

proc applyOpacity*(target: Image, opacity: float32) {.hasSimd, raises: [].} =
proc applyOpacity*(image: Image, opacity: float32) {.hasSimd, raises: [].} =
## Multiplies alpha of the image by opacity.
let opacity = round(255 * opacity).uint16
if opacity == 255:
return

if opacity == 0:
target.fill(rgbx(0, 0, 0, 0))
image.fill(rgbx(0, 0, 0, 0))
return

for i in 0 ..< target.data.len:
var rgbx = target.data[i]
for i in 0 ..< image.data.len:
var rgbx = image.data[i]
rgbx.r = ((rgbx.r * opacity) div 255).uint8
rgbx.g = ((rgbx.g * opacity) div 255).uint8
rgbx.b = ((rgbx.b * opacity) div 255).uint8
rgbx.a = ((rgbx.a * opacity) div 255).uint8
target.data[i] = rgbx
image.data[i] = rgbx

proc invert*(target: Image) {.hasSimd, raises: [].} =
proc invert*(image: Image) {.hasSimd, raises: [].} =
## Inverts all of the colors and alpha.
for i in 0 ..< target.data.len:
var rgbx = target.data[i]
for i in 0 ..< image.data.len:
var rgbx = image.data[i]
rgbx.r = 255 - rgbx.r
rgbx.g = 255 - rgbx.g
rgbx.b = 255 - rgbx.b
rgbx.a = 255 - rgbx.a
target.data[i] = rgbx
image.data[i] = rgbx

# Inverting rgbx(50, 100, 150, 200) becomes rgbx(205, 155, 105, 55). This
# is not a valid premultiplied alpha color.
# We need to convert back to premultiplied alpha after inverting.
target.data.toPremultipliedAlpha()
image.data.toPremultipliedAlpha()

proc blur*(
image: Image, radius: float32, outOfBounds: SomeColor = color(0, 0, 0, 0)
Expand Down
14 changes: 7 additions & 7 deletions src/pixie/masks.nim
Original file line number Diff line number Diff line change
Expand Up @@ -165,18 +165,18 @@ proc magnifyBy2*(mask: Mask, power = 1): Mask {.raises: [PixieError].} =
result.width
)

proc applyOpacity*(target: Mask, opacity: float32) {.hasSimd, raises: [].} =
proc applyOpacity*(mask: Mask, opacity: float32) {.hasSimd, raises: [].} =
## Multiplies alpha of the image by opacity.
let opacity = round(255 * opacity).uint16
if opacity == 255:
return

if opacity == 0:
target.fill(0)
mask.fill(0)
return

for i in 0 ..< target.data.len:
target.data[i] = ((target.data[i] * opacity) div 255).uint8
for i in 0 ..< mask.data.len:
mask.data[i] = ((mask.data[i] * opacity) div 255).uint8

proc getValueSmooth*(mask: Mask, x, y: float32): uint8 {.raises: [].} =
## Gets a interpolated value with float point coordinates.
Expand Down Expand Up @@ -206,10 +206,10 @@ proc getValueSmooth*(mask: Mask, x, y: float32): uint8 {.raises: [].} =
else:
topMix

proc invert*(target: Mask) {.hasSimd, raises: [].} =
proc invert*(mask: Mask) {.hasSimd, raises: [].} =
## Inverts all of the values - creates a negative of the mask.
for i in 0 ..< target.data.len:
target.data[i] = 255 - target.data[i]
for i in 0 ..< mask.data.len:
mask.data[i] = 255 - mask.data[i]

proc spread*(mask: Mask, spread: float32) {.raises: [PixieError].} =
## Grows the mask by spread.
Expand Down
5 changes: 3 additions & 2 deletions src/pixie/simd.nim
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import simd/internal
import simd/internal, system/memory

export internal
export internal, memory

const allowSimd* = not defined(pixieNoSimd) and not defined(tcc)

Expand All @@ -20,6 +20,7 @@ when allowSimd:

elif defined(arm64):
import simd/neon
export neon

import nimsimd/neon as nimsimdneon
export nimsimdneon
85 changes: 84 additions & 1 deletion src/pixie/simd/avx2.nim
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import chroma, internal, nimsimd/avx2, pixie/common
import avx, chroma, internal, nimsimd/avx2, pixie/common, vmath

when defined(gcc) or defined(clang):
{.localPassc: "-mavx2".}
Expand Down Expand Up @@ -133,5 +133,88 @@ proc toPremultipliedAlphaAvx2*(data: var seq[ColorRGBA | ColorRGBX]) {.simd.} =
c.b = ((c.b.uint32 * c.a + 127) div 255).uint8
data[i] = c

proc invertAvx2*(image: Image) {.simd.} =
var
i: int
p = cast[uint](image.data[0].addr)
# Align to 32 bytes
while i < image.data.len and (p and 31) != 0:
var rgbx = image.data[i]
rgbx.r = 255 - rgbx.r
rgbx.g = 255 - rgbx.g
rgbx.b = 255 - rgbx.b
rgbx.a = 255 - rgbx.a
image.data[i] = rgbx
inc i
p += 4

let
vec255 = mm256_set1_epi8(255)
iterations = image.data.len div 16
for _ in 0 ..< iterations:
let
a = mm256_load_si256(cast[pointer](p))
b = mm256_load_si256(cast[pointer](p + 32))
mm256_store_si256(cast[pointer](p), mm256_sub_epi8(vec255, a))
mm256_store_si256(cast[pointer](p + 32), mm256_sub_epi8(vec255, b))
p += 64
i += 16 * iterations

for i in i ..< image.data.len:
var rgbx = image.data[i]
rgbx.r = 255 - rgbx.r
rgbx.g = 255 - rgbx.g
rgbx.b = 255 - rgbx.b
rgbx.a = 255 - rgbx.a
image.data[i] = rgbx

toPremultipliedAlphaAvx2(image.data)

proc applyOpacityAvx2*(image: Image, opacity: float32) {.simd.} =
let opacity = round(255 * opacity).uint16
if opacity == 255:
return

if opacity == 0:
fillUnsafeAvx(image.data, rgbx(0, 0, 0, 0), 0, image.data.len)
return

var
i: int
p = cast[uint](image.data[0].addr)

let
oddMask = mm256_set1_epi16(0xff00)
div255 = mm256_set1_epi16(0x8081)
zeroVec = mm256_setzero_si256()
opacityVec = mm256_slli_epi16(mm256_set1_epi16(opacity), 8)
iterations = image.data.len div 8
for _ in 0 ..< iterations:
let
values = mm256_loadu_si256(cast[pointer](p))
eqZero = mm256_cmpeq_epi16(values, zeroVec)
if mm256_movemask_epi8(eqZero) != cast[int32](0xffffffff):
var
valuesEven = mm256_slli_epi16(values, 8)
valuesOdd = mm256_and_si256(values, oddMask)
valuesEven = mm256_mulhi_epu16(valuesEven, opacityVec)
valuesOdd = mm256_mulhi_epu16(valuesOdd, opacityVec)
valuesEven = mm256_srli_epi16(mm256_mulhi_epu16(valuesEven, div255), 7)
valuesOdd = mm256_srli_epi16(mm256_mulhi_epu16(valuesOdd, div255), 7)
mm256_storeu_si256(
cast[pointer](p),
mm256_or_si256(valuesEven, mm256_slli_epi16(valuesOdd, 8))
)
p += 32
i += 8 * iterations

for i in i ..< image.data.len:
var rgbx = image.data[i]
rgbx.r = ((rgbx.r * opacity) div 255).uint8
rgbx.g = ((rgbx.g * opacity) div 255).uint8
rgbx.b = ((rgbx.b * opacity) div 255).uint8
rgbx.a = ((rgbx.a * opacity) div 255).uint8
image.data[i] = rgbx

when defined(release):
{.pop.}
52 changes: 40 additions & 12 deletions src/pixie/simd/internal.nim
Original file line number Diff line number Diff line change
Expand Up @@ -3,24 +3,42 @@ import std/macros, std/tables
var simdProcs* {.compiletime.}: Table[string, NimNode]

proc procName(procedure: NimNode): string =
## Given a procedure signature returns only name string.
## Given a procedure this returns the name as a string.
let nameNode = procedure[0]
if nameNode.kind == nnkPostfix:
nameNode[1].strVal
else:
nameNode.strVal

proc procArguments(procedure: NimNode): seq[NimNode] =
## Given a procedure signature gets the arguments as a list.
## Given a procedure this gets the arguments as a list.
for i, arg in procedure[3]:
if i > 0:
for j in 0 ..< arg.len - 2:
result.add(arg[j])

proc procReturnType(procedure: NimNode): NimNode =
## Given a procedure signature gets the return type.
## Given a procedure this gets the return type.
procedure[3][0]

proc procSignature(procedure: NimNode): string =
## Given a procedure this returns the signature as a string.
result = "("

for i, arg in procedure[3]:
if i > 0:
for j in 0 ..< arg.len - 2:
result &= arg[^2].repr & ", "

if procedure[3].len > 1:
result = result[0 ..^ 3]

result &= ")"

let ret = procedure.procReturnType()
if ret.kind != nnkEmpty:
result &= ": " & ret.repr

proc callAndReturn(name: NimNode, procedure: NimNode): NimNode =
## Produces a procedure call with arguments.
let
Expand All @@ -38,8 +56,8 @@ proc callAndReturn(name: NimNode, procedure: NimNode): NimNode =
return `call`

macro simd*(procedure: untyped) =
let name = procedure.procName()
simdProcs[name] = procedure.copy()
let signature = procedure.procName() & procSignature(procedure)
simdProcs[signature] = procedure.copy()
return procedure

macro hasSimd*(procedure: untyped) =
Expand All @@ -53,25 +71,31 @@ macro hasSimd*(procedure: untyped) =
callAvx = callAndReturn(ident(nameAvx), procedure)
callAvx2 = callAndReturn(ident(nameAvx2), procedure)

var body = newStmtList()
var
foundSimd: bool
body = newStmtList()

when defined(amd64) and not defined(pixieNoAvx):
if nameAvx2 in simdProcs:
if nameAvx2 & procSignature(procedure) in simdProcs:
foundSimd = true
body.add quote do:
if cpuHasAvx2:
`callAvx2`

if nameAvx in simdProcs:
if nameAvx & procSignature(procedure) in simdProcs:
foundSimd = true
body.add quote do:
if cpuHasAvx2:
`callAvx`

if nameSse2 in simdProcs:
let bodySse2 = simdProcs[nameSse2][6]
if nameSse2 & procSignature(procedure) in simdProcs:
foundSimd = true
let bodySse2 = simdProcs[nameSse2 & procSignature(procedure)][6]
body.add quote do:
`bodySse2`
elif nameNeon in simdProcs:
let bodyNeon = simdProcs[nameNeon][6]
elif nameNeon & procSignature(procedure) in simdProcs:
foundSimd = true
let bodyNeon = simdProcs[nameNeon & procSignature(procedure)][6]
body.add quote do:
`bodyNeon`
else:
Expand All @@ -80,4 +104,8 @@ macro hasSimd*(procedure: untyped) =

procedure[6] = body

when not defined(pixieNoSimd):
if not foundSimd:
echo "No SIMD found for " & name & procSignature(procedure)

return procedure
Loading