From ef08017c90eaef18252cb17722e408ff450e957a Mon Sep 17 00:00:00 2001 From: tqchen Date: Wed, 19 Apr 2023 10:40:03 -0400 Subject: [PATCH] [METAL] Fix flaky memory issue due to racing This PR aims to fix a relatively rare flaky race issuse when we are attempting to purge memory and the buffer is still in use in command buffer --- src/runtime/metal/metal_device_api.mm | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/runtime/metal/metal_device_api.mm b/src/runtime/metal/metal_device_api.mm index 0ef07b189a6b..0f1be0cc95ea 100644 --- a/src/runtime/metal/metal_device_api.mm +++ b/src/runtime/metal/metal_device_api.mm @@ -196,6 +196,10 @@ int GetWarpSize(id dev) { void MetalWorkspace::FreeDataSpace(Device dev, void* ptr) { AUTORELEASEPOOL { + // need to make sure buffer is not in use in command buffer + // before set the purgeable state to empty + // otherwise can cause issues sometimes + this->StreamSync(dev, nullptr); // MTLBuffer PurgeableState should be set to empty before manual // release in order to prevent memory leak [(id)ptr setPurgeableState:MTLPurgeableStateEmpty]; @@ -336,6 +340,10 @@ int GetWarpSize(id dev) { if (temp_buffer_[dev.device_id] == nil || temp_buffer_[dev.device_id].length < size) { id mtl_dev = MetalWorkspace::Global()->GetDevice(dev); if (temp_buffer_[dev.device_id] != nil) { + // need to make sure buffer is not in use in command buffer + // before set the purgeable state to empty + // otherwise can cause issues sometimes + MetalWorkspace::Global()->StreamSync(dev, nullptr); [temp_buffer_[dev.device_id] setPurgeableState:MTLPurgeableStateEmpty]; [temp_buffer_[dev.device_id] release]; }