Skip to content
Closed
19,857 changes: 19,857 additions & 0 deletions include/vk_mem_alloc.h

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion meson.build
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
project('grvk', 'c', version : '0.5.0', meson_version : '>= 0.46')
project('grvk', ['c', 'cpp'], version : '0.5.0', meson_version : '>= 0.46')

cpu_family = target_machine.cpu_family()

Expand Down
87 changes: 80 additions & 7 deletions src/mantle/mantle_cmd_buf.c
Original file line number Diff line number Diff line change
Expand Up @@ -314,6 +314,19 @@ static void grCmdBufferUpdateResources(
bindPoint->dirtyFlags = 0;
}

void grCmdTrackImage(GrCmdBuffer* grCmdBuffer, GrImage* image) {
if (image->image == VK_NULL_HANDLE || image->buffer == VK_NULL_HANDLE) {
return;
}

if (grCmdBuffer->linearImageCapacity == grCmdBuffer->linearImageCount) {
grCmdBuffer->linearImageCapacity += 8;
grCmdBuffer->linearImages = realloc(grCmdBuffer->linearImages, sizeof(GrImage*) * grCmdBuffer->linearImageCapacity);
}
grCmdBuffer->linearImages[grCmdBuffer->linearImageCount] = image;
grCmdBuffer->linearImageCount++;
}

// Command Buffer Building Functions

GR_VOID GR_STDCALL grCmdBindPipeline(
Expand Down Expand Up @@ -491,6 +504,15 @@ GR_VOID GR_STDCALL grCmdBindDescriptorSet(
bindPoint->grDescriptorSet = grDescriptorSet;
bindPoint->slotOffset = slotOffset;
bindPoint->dirtyFlags |= FLAG_DIRTY_DESCRIPTOR_SETS;

if (grDescriptorSet->linearImageCount != 0) {
if (grCmdBuffer->linearImageCapacity < grCmdBuffer->linearImageCount + grDescriptorSet->linearImageCount) {
grCmdBuffer->linearImageCapacity = max(grCmdBuffer->linearImageCount + grDescriptorSet->linearImageCount, grCmdBuffer->linearImageCapacity + 8);
grCmdBuffer->linearImages = realloc(grCmdBuffer->linearImages, grCmdBuffer->linearImageCapacity * sizeof(GrImage*));
}
memcpy(grCmdBuffer->linearImages + grCmdBuffer->linearImageCount, grDescriptorSet->linearImages, grDescriptorSet->linearImageCount * sizeof(GrImage*));
grCmdBuffer->linearImageCount += grDescriptorSet->linearImageCount;
}
}

GR_VOID GR_STDCALL grCmdBindDynamicMemoryView(
Expand Down Expand Up @@ -543,6 +565,8 @@ GR_VOID GR_STDCALL grCmdBindIndexData(
const GrDevice* grDevice = GET_OBJ_DEVICE(grCmdBuffer);
GrGpuMemory* grGpuMemory = (GrGpuMemory*)mem;

grGpuMemoryBindBuffer(grGpuMemory);

VKD.vkCmdBindIndexBuffer(grCmdBuffer->commandBuffer, grGpuMemory->buffer, offset,
getVkIndexType(indexType));
}
Expand All @@ -566,6 +590,8 @@ GR_VOID GR_STDCALL grCmdPrepareMemoryRegions(
const GR_MEMORY_STATE_TRANSITION* stateTransition = &pStateTransitions[i];
GrGpuMemory* grGpuMemory = (GrGpuMemory*)stateTransition->mem;

grGpuMemoryBindBuffer(grGpuMemory);

barriers[i] = (VkBufferMemoryBarrier) {
.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
.pNext = NULL,
Expand All @@ -575,7 +601,7 @@ GR_VOID GR_STDCALL grCmdPrepareMemoryRegions(
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.buffer = grGpuMemory->buffer,
.offset = stateTransition->offset,
.size = stateTransition->regionSize > 0 ? stateTransition->regionSize : VK_WHOLE_SIZE,
.size = stateTransition->regionSize > 0 ? min(grGpuMemory->deviceSize - stateTransition->offset, stateTransition->regionSize) : VK_WHOLE_SIZE,
};

srcStageMask |= getVkPipelineStageFlagsMemory(stateTransition->oldState);
Expand Down Expand Up @@ -667,6 +693,15 @@ GR_VOID GR_STDCALL grCmdBindTargets(
minExtent.depth = MIN(minExtent.depth, grDepthStencilView->extent.depth);
}

for (unsigned i = 0; i < colorTargetCount; i++) {
const GrColorTargetView* grColorTargetView = (GrColorTargetView*)pColorTargets[i].view;

if (grColorTargetView != NULL) {
GrImage* grImage = (GrImage*)grColorTargetView->image;
grCmdTrackImage(grCmdBuffer, grImage);
}
}

if (colorAttachmentCount != grCmdBuffer->colorAttachmentCount ||
memcmp(colorAttachments, grCmdBuffer->colorAttachments, colorAttachmentCount * sizeof(colorAttachments[0])) ||
hasDepthStencil != grCmdBuffer->hasDepthStencil ||
Expand Down Expand Up @@ -709,7 +744,11 @@ GR_VOID GR_STDCALL grCmdPrepareImages(

grCmdBufferEndRenderPass(grCmdBuffer);

unsigned bufferBarrierCount = 0;
unsigned imageBarrierCount = 0;

STACK_ARRAY(VkImageMemoryBarrier, barriers, 128, transitionCount);
STACK_ARRAY(VkBufferMemoryBarrier, bufferBarriers, 128, transitionCount);
VkPipelineStageFlags srcStageMask = 0;
VkPipelineStageFlags dstStageMask = 0;

Expand All @@ -718,27 +757,47 @@ GR_VOID GR_STDCALL grCmdPrepareImages(
GrImage* grImage = (GrImage*)stateTransition->image;
bool isDepthStencil = isVkFormatDepthStencil(grImage->format);

barriers[i] = (VkImageMemoryBarrier) {
if (grImage->buffer != VK_NULL_HANDLE) {
bufferBarriers[bufferBarrierCount] = (VkBufferMemoryBarrier) {
.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
.pNext = NULL,
.srcAccessMask = getVkAccessFlagsImage(stateTransition->oldState, false),
.dstAccessMask = getVkAccessFlagsImage(stateTransition->newState, false),
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.buffer = grImage->buffer,
.offset = 0,
.size = VK_WHOLE_SIZE,
};
bufferBarrierCount++;

if (grImage->image == VK_NULL_HANDLE) {
continue;
}
}

barriers[imageBarrierCount] = (VkImageMemoryBarrier) {
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
.pNext = NULL,
.srcAccessMask = getVkAccessFlagsImage(stateTransition->oldState, isDepthStencil),
.dstAccessMask = getVkAccessFlagsImage(stateTransition->newState, isDepthStencil),
.oldLayout = getVkImageLayout(stateTransition->oldState, isDepthStencil),
.newLayout = getVkImageLayout(stateTransition->newState, isDepthStencil),
.oldLayout = grImage->buffer == VK_NULL_HANDLE ? getVkImageLayout(stateTransition->oldState, isDepthStencil) : VK_IMAGE_LAYOUT_GENERAL,
.newLayout = grImage->buffer == VK_NULL_HANDLE ? getVkImageLayout(stateTransition->newState, isDepthStencil) : VK_IMAGE_LAYOUT_GENERAL,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.image = grImage->image,
.subresourceRange = getVkImageSubresourceRange(stateTransition->subresourceRange,
grImage->multiplyCubeLayers),
grImage->multiplyCubeLayers),
};

srcStageMask |= getVkPipelineStageFlagsImage(stateTransition->oldState);
dstStageMask |= getVkPipelineStageFlagsImage(stateTransition->newState);
imageBarrierCount++;
}

VKD.vkCmdPipelineBarrier(grCmdBuffer->commandBuffer, srcStageMask, dstStageMask,
0, 0, NULL, 0, NULL, transitionCount, barriers);

0, 0, NULL, bufferBarrierCount, bufferBarriers, imageBarrierCount, barriers);
STACK_ARRAY_FINISH(bufferBarriers);
STACK_ARRAY_FINISH(barriers);
}

Expand Down Expand Up @@ -840,6 +899,7 @@ GR_VOID GR_STDCALL grCmdDispatchIndirect(

grCmdBufferUpdateResources(grCmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE);
grCmdBufferEndRenderPass(grCmdBuffer);
grGpuMemoryBindBuffer(grGpuMemory);

VKD.vkCmdDispatchIndirect(grCmdBuffer->commandBuffer, grGpuMemory->buffer, offset);
}
Expand All @@ -858,6 +918,8 @@ GR_VOID GR_STDCALL grCmdCopyMemory(
GrGpuMemory* grDstGpuMemory = (GrGpuMemory*)destMem;

grCmdBufferEndRenderPass(grCmdBuffer);
grGpuMemoryBindBuffer(grSrcGpuMemory);
grGpuMemoryBindBuffer(grDstGpuMemory);

STACK_ARRAY(VkBufferCopy, vkRegions, 128, regionCount);

Expand Down Expand Up @@ -889,6 +951,8 @@ GR_VOID GR_STDCALL grCmdCopyImage(
const GrDevice* grDevice = GET_OBJ_DEVICE(grCmdBuffer);
GrImage* grSrcImage = (GrImage*)srcImage;
GrImage* grDstImage = (GrImage*)destImage;
grCmdTrackImage(grCmdBuffer, grSrcImage);
grCmdTrackImage(grCmdBuffer, grDstImage);
unsigned srcTileSize = getVkFormatTileSize(grSrcImage->format);
unsigned dstTileSize = getVkFormatTileSize(grDstImage->format);
unsigned extentTileSize = srcTileSize > dstTileSize ? dstTileSize : srcTileSize;
Expand Down Expand Up @@ -996,13 +1060,15 @@ GR_VOID GR_STDCALL grCmdCopyMemoryToImage(
const GrDevice* grDevice = GET_OBJ_DEVICE(grCmdBuffer);
GrGpuMemory* grSrcGpuMemory = (GrGpuMemory*)srcMem;
GrImage* grDstImage = (GrImage*)destImage;
grCmdTrackImage(grCmdBuffer, grDstImage);
unsigned dstTileSize = getVkFormatTileSize(grDstImage->format);

if (quirkHas(QUIRK_COMPRESSED_IMAGE_COPY_IN_TEXELS)) {
dstTileSize = 1;
}

grCmdBufferEndRenderPass(grCmdBuffer);
grGpuMemoryBindBuffer(grSrcGpuMemory);

STACK_ARRAY(VkBufferImageCopy, vkRegions, 128, regionCount);

Expand Down Expand Up @@ -1049,13 +1115,15 @@ GR_VOID GR_STDCALL grCmdCopyImageToMemory(
const GrDevice* grDevice = GET_OBJ_DEVICE(grCmdBuffer);
GrImage* grSrcImage = (GrImage*)srcImage;
GrGpuMemory* grDstGpuMemory = (GrGpuMemory*)destMem;
grCmdTrackImage(grCmdBuffer, grSrcImage);
unsigned srcTileSize = getVkFormatTileSize(grSrcImage->format);

if (quirkHas(QUIRK_COMPRESSED_IMAGE_COPY_IN_TEXELS)) {
srcTileSize = 1;
}

grCmdBufferEndRenderPass(grCmdBuffer);
grGpuMemoryBindBuffer(grDstGpuMemory);

STACK_ARRAY(VkBufferImageCopy, vkRegions, 128, regionCount);

Expand Down Expand Up @@ -1102,6 +1170,7 @@ GR_VOID GR_STDCALL grCmdUpdateMemory(
GrGpuMemory* grDstGpuMemory = (GrGpuMemory*)destMem;

grCmdBufferEndRenderPass(grCmdBuffer);
grGpuMemoryBindBuffer(grDstGpuMemory);

VKD.vkCmdUpdateBuffer(grCmdBuffer->commandBuffer, grDstGpuMemory->buffer, destOffset,
dataSize, pData);
Expand All @@ -1120,6 +1189,7 @@ GR_VOID GR_STDCALL grCmdFillMemory(
GrGpuMemory* grDstGpuMemory = (GrGpuMemory*)destMem;

grCmdBufferEndRenderPass(grCmdBuffer);
grGpuMemoryBindBuffer(grDstGpuMemory);

VKD.vkCmdFillBuffer(grCmdBuffer->commandBuffer, grDstGpuMemory->buffer, destOffset,
fillSize, data);
Expand All @@ -1137,6 +1207,7 @@ GR_VOID GR_STDCALL grCmdClearColorImage(
GrCmdBuffer* grCmdBuffer = (GrCmdBuffer*)cmdBuffer;
const GrDevice* grDevice = GET_OBJ_DEVICE(grCmdBuffer);
GrImage* grImage = (GrImage*)image;
grCmdTrackImage(grCmdBuffer, grImage);

grCmdBufferEndRenderPass(grCmdBuffer);

Expand Down Expand Up @@ -1169,6 +1240,7 @@ GR_VOID GR_STDCALL grCmdClearColorImageRaw(
GrCmdBuffer* grCmdBuffer = (GrCmdBuffer*)cmdBuffer;
const GrDevice* grDevice = GET_OBJ_DEVICE(grCmdBuffer);
GrImage* grImage = (GrImage*)image;
grCmdTrackImage(grCmdBuffer, grImage);

grCmdBufferEndRenderPass(grCmdBuffer);

Expand Down Expand Up @@ -1201,6 +1273,7 @@ GR_VOID GR_STDCALL grCmdClearDepthStencil(
GrCmdBuffer* grCmdBuffer = (GrCmdBuffer*)cmdBuffer;
const GrDevice* grDevice = GET_OBJ_DEVICE(grCmdBuffer);
GrImage* grImage = (GrImage*)image;
grCmdTrackImage(grCmdBuffer, grImage);

grCmdBufferEndRenderPass(grCmdBuffer);

Expand Down
68 changes: 68 additions & 0 deletions src/mantle/mantle_cmd_buf_man.c
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,12 @@ void grCmdBufferResetState(
VKD.vkResetDescriptorPool(grDevice->device, grCmdBuffer->descriptorPools[i], 0);
}

// Reset tracked linear images
grCmdBuffer->linearImageCount = 0;
free(grCmdBuffer->linearImages);
grCmdBuffer->linearImageCapacity = 8;
grCmdBuffer->linearImages = calloc(sizeof(GrImage*), grCmdBuffer->linearImageCapacity);

// Clear state
unsigned stateOffset = OFFSET_OF(GrCmdBuffer, isBuilding);
memset(&((uint8_t*)grCmdBuffer)[stateOffset], 0, sizeof(GrCmdBuffer) - stateOffset);
Expand Down Expand Up @@ -135,6 +141,9 @@ GR_RESULT GR_STDCALL grCreateCommandBuffer(
.stencilAttachment = { 0 },
.depthStencilFormat = 0,
.minExtent = { 0, 0, 0 },
.linearImages = calloc(sizeof(GrImage*), 8),
.linearImageCount = 0,
.linearImageCapacity = 8,
};

*pCmdBuffer = (GR_CMD_BUFFER)grCmdBuffer;
Expand Down Expand Up @@ -203,6 +212,65 @@ GR_RESULT GR_STDCALL grEndCommandBuffer(

grCmdBufferEndRenderPass(grCmdBuffer);

if (grCmdBuffer->linearImageCount != 0) {
VkBufferImageCopy* regions = NULL;
int regionsCapacity = 0;
for (int i = 0; i < grCmdBuffer->linearImageCount; i++) {
GrImage* image = grCmdBuffer->linearImages[i];
int imageRegionsCount = image->arrayLayers * image->mipLevels;
if (regions == NULL) {
regions = malloc(sizeof(VkBufferImageCopy) * imageRegionsCount);
regionsCapacity = imageRegionsCount;
} else if (regionsCapacity < imageRegionsCount) {
regions = realloc(regions, sizeof(VkBufferImageCopy) * imageRegionsCount);
regionsCapacity = imageRegionsCount;
}
for (int a = 0; a < image->arrayLayers; a++) {
for (int m = 0; m < image->mipLevels; m++) {
if (getVkFormatTileSize(image->format) != 1) {
LOGE("Block compressed linear images aren't supported");
continue;
}

VkImageAspectFlags aspectMask = 0;
if (isVkFormatDepth(image->format)) {
aspectMask |= VK_IMAGE_ASPECT_DEPTH_BIT;
}
if (isVkFormatStencil(image->format)) {
aspectMask |= VK_IMAGE_ASPECT_STENCIL_BIT;
}
if (aspectMask == 0) {
aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
}

int layerIndex = a * image->mipLevels + m;
regions[layerIndex] = (VkBufferImageCopy) {
.bufferOffset = grImageGetBufferOffset(image->extent, image->format, a, image->arrayLayers, m),
.bufferRowLength = 0,
.imageSubresource.aspectMask = aspectMask,
.imageSubresource.mipLevel = m,
.imageSubresource.baseArrayLayer = a,
.imageSubresource.layerCount = 1,
.imageOffset = (VkOffset3D) {
.x = 0,
.y = 0,
.z = 0,
},
.imageExtent = (VkExtent3D) {
.width = max(1, image->extent.width >> m),
.height = max(1, image->extent.height >> m),
.depth = max(1, image->extent.depth >> m),
},
};
}
}
VKD.vkCmdCopyImageToBuffer(grCmdBuffer->commandBuffer, image->image, VK_IMAGE_LAYOUT_GENERAL, image->buffer, imageRegionsCount, regions);
}
if (regions != NULL) {
free(regions);
}
}

VkResult res = VKD.vkEndCommandBuffer(grCmdBuffer->commandBuffer);
if (res != VK_SUCCESS) {
LOGE("vkEndCommandBuffer failed (%d)\n", res);
Expand Down
Loading