Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 14 additions & 5 deletions src/pixie/internal.nim
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@ import chroma, common, system/memory, vmath
const allowSimd* = not defined(pixieNoSimd) and not defined(tcc)

when defined(amd64) and allowSimd:
import nimsimd/sse2
import nimsimd/runtimecheck, nimsimd/sse2, simd/avx
let cpuHasAvx* = checkInstructionSets({AVX})

template currentExceptionAsPixieError*(): untyped =
## Gets the current exception and returns it as a PixieError with stack trace.
Expand Down Expand Up @@ -63,21 +64,29 @@ proc fillUnsafe*(
## Fills the image data with the color starting at index start and
## continuing for len indices.
let rgbx = color.asRgbx()

# If we can use AVX, do so
when defined(amd64) and allowSimd:
if cpuHasAvx and len >= 64:
fillUnsafeAvx(data, rgbx, start, len)
return

# Use memset when every byte has the same value
if rgbx.r == rgbx.g and rgbx.r == rgbx.b and rgbx.r == rgbx.a:
nimSetMem(data[start].addr, rgbx.r.cint, len * 4)
else:
var i = start
when defined(amd64) and allowSimd:
# Align to 16 bytes
while i < (start + len) and (cast[uint](data[i].addr) and 15) != 0:
var p = cast[uint](data[i].addr)
while i < (start + len) and (p and 15) != 0:
data[i] = rgbx
inc i
p += 4
# When supported, SIMD fill until we run out of room
let
colorVec = mm_set1_epi32(cast[int32](rgbx))
iterations = (start + len - i) div 8
var p = cast[uint](data[i].addr)
for _ in 0 ..< iterations:
mm_store_si128(cast[pointer](p), colorVec)
mm_store_si128(cast[pointer](p + 16), colorVec)
Expand All @@ -93,8 +102,8 @@ proc fillUnsafe*(
copyMem(data[i].addr, u64.addr, 8)
i += 2
# Fill whatever is left the slow way
for j in i ..< start + len:
data[j] = rgbx
for i in i ..< start + len:
data[i] = rgbx

const straightAlphaTable = block:
var table: array[256, array[256, uint8]]
Expand Down
44 changes: 31 additions & 13 deletions src/pixie/paths.nim
Original file line number Diff line number Diff line change
Expand Up @@ -1131,25 +1131,43 @@ proc partitionSegments(
startY = top.uint32
partitionHeight = height.uint32 div numPartitions

for (segment, winding) in segments:
var entry = initPartitionEntry(segment, winding)
if partitionHeight == 0:
result[0].entries.add(move entry)
else:
var entries = newSeq[PartitionEntry](segments.len)
for i, (segment, winding) in segments:
entries[i] = initPartitionEntry(segment, winding)

if numPartitions == 1:
result[0].entries = move entries
else:
iterator partitionRange(
segment: Segment,
numPartitions, startY, partitionHeight: uint32
): uint32 =
var
atPartition = max(0, segment.at.y - startY.float32).uint32
toPartition = max(0, segment.to.y - startY.float32).uint32
atPartition = atPartition div partitionHeight
toPartition = toPartition div partitionHeight
atPartition = min(atPartition, result.high.uint32)
toPartition = min(toPartition, result.high.uint32)
for i in atPartition .. toPartition:
result[i].entries.add(entry)
atPartition = min(atPartition, numPartitions - 1)
toPartition = min(toPartition, numPartitions - 1)
for partitionIndex in atPartition .. toPartition:
yield partitionIndex

# Set the bottom values for the partitions (y value where this partition ends)
var entryCounts = newSeq[int](numPartitions)
for (segment, _) in segments:
for partitionIndex in segment.partitionRange(numPartitions, startY, partitionHeight):
inc entryCounts[partitionIndex]

var partitionBottom = top + partitionHeight.int
for partitionIndex, entryCounts in entryCounts:
result[partitionIndex].entries.setLen(entryCounts)

var indexes = newSeq[int](numPartitions)
for i, (segment, winding) in segments:
for partitionIndex in segment.partitionRange(numPartitions, startY, partitionHeight):
result[partitionIndex].entries[indexes[partitionIndex]] = entries[i]
inc indexes[partitionIndex]

# Set the bottom values for the partitions (y value where this partition ends)
var partitionBottom = top + partitionHeight.int
for partition in result.mitems:
partition.bottom = partitionBottom
partition.requiresAntiAliasing =
Expand Down Expand Up @@ -1313,7 +1331,7 @@ proc computeCoverage(
if fillLen > 0:
var i = fillStart
when defined(amd64) and allowSimd:
let sampleCoverageVec = mm_set1_epi8(cast[int8](sampleCoverage))
let sampleCoverageVec = mm_set1_epi8(sampleCoverage)
for _ in 0 ..< fillLen div 16:
var coverageVec = mm_loadu_si128(coverages[i - startX].addr)
coverageVec = mm_add_epi8(coverageVec, sampleCoverageVec)
Expand Down Expand Up @@ -1354,7 +1372,7 @@ proc fillCoverage(
let
coverageVec = mm_loadu_si128(coverages[x - startX].unsafeAddr)
eqZero = mm_cmpeq_epi8(coverageVec, mm_setzero_si128())
eq255 = mm_cmpeq_epi8(coverageVec, mm_set1_epi8(cast[int8](255)))
eq255 = mm_cmpeq_epi8(coverageVec, mm_set1_epi8(255))
allZeroes = mm_movemask_epi8(eqZero) == 0xffff
all255 = mm_movemask_epi8(eq255) == 0xffff
yield (coverageVec, allZeroes, all255)
Expand Down
35 changes: 35 additions & 0 deletions src/pixie/simd/avx.nim
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
import chroma, nimsimd/avx

when defined(gcc) or defined(clang):
{.localPassc: "-mavx".}

when defined(release):
{.push checks: off.}

proc fillUnsafeAvx*(
data: var seq[ColorRGBX],
rgbx: ColorRGBX,
start, len: int
) =
var
i = start
p = cast[uint](data[i].addr)
# Align to 32 bytes
while i < (start + len) and (p and 31) != 0:
data[i] = rgbx
inc i
p += 4
# When supported, SIMD fill until we run out of room
let
iterations = (start + len - i) div 8
colorVec = mm256_set1_epi32(cast[int32](rgbx))
for _ in 0 ..< iterations:
mm256_store_si256(cast[pointer](p), colorVec)
p += 32
i += iterations * 8
# Fill whatever is left the slow way
for i in i ..< start + len:
data[i] = rgbx

when defined(release):
{.pop.}