Skip to content
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 7 additions & 5 deletions ffx-parallelsort/FFX_ParallelSort.h
Original file line number Diff line number Diff line change
Expand Up @@ -248,8 +248,10 @@
void FFX_ParallelSort_ScanPrefix(uint numValuesToScan, uint localID, uint groupID, uint BinOffset, uint BaseIndex, bool AddPartialSums,
FFX_ParallelSortCB CBuffer, RWStructuredBuffer<uint> ScanSrc, RWStructuredBuffer<uint> ScanDst, RWStructuredBuffer<uint> ScanScratch)
{
uint i;

// Perform coalesced loads into LDS
for (uint i = 0; i < FFX_PARALLELSORT_ELEMENTS_PER_THREAD; i++)
for (i = 0; i < FFX_PARALLELSORT_ELEMENTS_PER_THREAD; i++)
{
uint DataIndex = BaseIndex + (i * FFX_PARALLELSORT_THREADGROUP_SIZE) + localID;

Expand All @@ -263,7 +265,7 @@

uint threadgroupSum = 0;
// Calculate the local scan-prefix for current thread
for (uint i = 0; i < FFX_PARALLELSORT_ELEMENTS_PER_THREAD; i++)
for (i = 0; i < FFX_PARALLELSORT_ELEMENTS_PER_THREAD; i++)
{
uint tmp = gs_FFX_PARALLELSORT_LDS[i][localID];
gs_FFX_PARALLELSORT_LDS[i][localID] = threadgroupSum;
Expand All @@ -283,14 +285,14 @@
}

// Add the block scanned-prefixes back in
for (uint i = 0; i < FFX_PARALLELSORT_ELEMENTS_PER_THREAD; i++)
for (i = 0; i < FFX_PARALLELSORT_ELEMENTS_PER_THREAD; i++)
gs_FFX_PARALLELSORT_LDS[i][localID] += threadgroupSum;

// Wait for everyone to catch up
GroupMemoryBarrierWithGroupSync();

// Perform coalesced writes to scan dst
for (uint i = 0; i < FFX_PARALLELSORT_ELEMENTS_PER_THREAD; i++)
for (i = 0; i < FFX_PARALLELSORT_ELEMENTS_PER_THREAD; i++)
{
uint DataIndex = BaseIndex + (i * FFX_PARALLELSORT_THREADGROUP_SIZE) + localID;

Expand Down Expand Up @@ -378,7 +380,7 @@
uint bitKey = (keyIndex >> bitShift) & 0x3;

// Create a packed histogram
uint packedHistogram = 1 << (bitKey * 8);
uint packedHistogram = 1U << (bitKey * 8);

// Sum up all the packed keys (generates counted offsets up to current thread group)
uint localSum = FFX_ParallelSort_BlockScanPrefix(packedHistogram, localID);
Expand Down