From 4898afaf52e559a0a6edb7e36f48c6b18134705d Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Mon, 11 Aug 2025 11:35:09 -0600 Subject: [PATCH 01/10] Avoid double free in CometUnifiedShuffleMemoryAllocator --- .../shuffle/comet/CometUnifiedShuffleMemoryAllocator.java | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/spark/src/main/java/org/apache/spark/shuffle/comet/CometUnifiedShuffleMemoryAllocator.java b/spark/src/main/java/org/apache/spark/shuffle/comet/CometUnifiedShuffleMemoryAllocator.java index 917d96f0f2..193cda99e5 100644 --- a/spark/src/main/java/org/apache/spark/shuffle/comet/CometUnifiedShuffleMemoryAllocator.java +++ b/spark/src/main/java/org/apache/spark/shuffle/comet/CometUnifiedShuffleMemoryAllocator.java @@ -58,6 +58,10 @@ public synchronized MemoryBlock allocate(long required) { } public synchronized void free(MemoryBlock block) { + if (block.pageNumber == MemoryBlock.FREED_IN_ALLOCATOR_PAGE_NUMBER) { + // Already freed block + return; + } this.freePage(block); } From 4e607fdacd7ed8b570e55fa03daffde64e8254a7 Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Mon, 11 Aug 2025 11:37:02 -0600 Subject: [PATCH 02/10] Avoid double free in CometUnifiedShuffleMemoryAllocator --- .../spark/sql/comet/execution/shuffle/SpillWriter.java | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/spark/src/main/java/org/apache/spark/sql/comet/execution/shuffle/SpillWriter.java b/spark/src/main/java/org/apache/spark/sql/comet/execution/shuffle/SpillWriter.java index c8f845c5d2..ae1aa00608 100644 --- a/spark/src/main/java/org/apache/spark/sql/comet/execution/shuffle/SpillWriter.java +++ b/spark/src/main/java/org/apache/spark/sql/comet/execution/shuffle/SpillWriter.java @@ -218,8 +218,10 @@ protected long doSpilling( public long freeMemory() { long freed = 0L; for (MemoryBlock block : allocatedPages) { - freed += block.size(); - allocator.free(block); + if (block.pageNumber != MemoryBlock.FREED_IN_ALLOCATOR_PAGE_NUMBER) { + freed += block.size(); + allocator.free(block); + } } allocatedPages.clear(); currentPage = null; From d0c0377564667ba7d8eb0d93a5a53521e68f0542 Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Mon, 11 Aug 2025 11:55:34 -0600 Subject: [PATCH 03/10] check TMM --- .../shuffle/comet/CometUnifiedShuffleMemoryAllocator.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/spark/src/main/java/org/apache/spark/shuffle/comet/CometUnifiedShuffleMemoryAllocator.java b/spark/src/main/java/org/apache/spark/shuffle/comet/CometUnifiedShuffleMemoryAllocator.java index 193cda99e5..74a823dc17 100644 --- a/spark/src/main/java/org/apache/spark/shuffle/comet/CometUnifiedShuffleMemoryAllocator.java +++ b/spark/src/main/java/org/apache/spark/shuffle/comet/CometUnifiedShuffleMemoryAllocator.java @@ -58,7 +58,8 @@ public synchronized MemoryBlock allocate(long required) { } public synchronized void free(MemoryBlock block) { - if (block.pageNumber == MemoryBlock.FREED_IN_ALLOCATOR_PAGE_NUMBER) { + if (block.pageNumber == MemoryBlock.FREED_IN_ALLOCATOR_PAGE_NUMBER || + block.pageNumber == MemoryBlock.FREED_IN_TMM_PAGE_NUMBER) { // Already freed block return; } From 09c3b7f15f29ff4daa940cf88f5ddefe2eaaa824 Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Mon, 11 Aug 2025 11:57:11 -0600 Subject: [PATCH 04/10] improve checks --- .../shuffle/comet/CometBoundedShuffleMemoryAllocator.java | 3 ++- .../apache/spark/sql/comet/execution/shuffle/SpillWriter.java | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/spark/src/main/java/org/apache/spark/shuffle/comet/CometBoundedShuffleMemoryAllocator.java b/spark/src/main/java/org/apache/spark/shuffle/comet/CometBoundedShuffleMemoryAllocator.java index 54e9dc6848..b056ebe678 100644 --- a/spark/src/main/java/org/apache/spark/shuffle/comet/CometBoundedShuffleMemoryAllocator.java +++ b/spark/src/main/java/org/apache/spark/shuffle/comet/CometBoundedShuffleMemoryAllocator.java @@ -147,7 +147,8 @@ private synchronized MemoryBlock allocateMemoryBlock(long required) { } public synchronized void free(MemoryBlock block) { - if (block.pageNumber == MemoryBlock.FREED_IN_ALLOCATOR_PAGE_NUMBER) { + if (block.pageNumber == MemoryBlock.FREED_IN_ALLOCATOR_PAGE_NUMBER || + block.pageNumber == MemoryBlock.FREED_IN_TMM_PAGE_NUMBER) { // Already freed block return; } diff --git a/spark/src/main/java/org/apache/spark/sql/comet/execution/shuffle/SpillWriter.java b/spark/src/main/java/org/apache/spark/sql/comet/execution/shuffle/SpillWriter.java index ae1aa00608..92d063536f 100644 --- a/spark/src/main/java/org/apache/spark/sql/comet/execution/shuffle/SpillWriter.java +++ b/spark/src/main/java/org/apache/spark/sql/comet/execution/shuffle/SpillWriter.java @@ -218,7 +218,8 @@ protected long doSpilling( public long freeMemory() { long freed = 0L; for (MemoryBlock block : allocatedPages) { - if (block.pageNumber != MemoryBlock.FREED_IN_ALLOCATOR_PAGE_NUMBER) { + if (block.pageNumber == MemoryBlock.FREED_IN_ALLOCATOR_PAGE_NUMBER || + block.pageNumber == MemoryBlock.FREED_IN_TMM_PAGE_NUMBER) { freed += block.size(); allocator.free(block); } From 6e4282d5f14770eedae4a583feffb30959d0b7c0 Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Mon, 11 Aug 2025 12:00:13 -0600 Subject: [PATCH 05/10] format --- .../shuffle/comet/CometBoundedShuffleMemoryAllocator.java | 4 ++-- .../shuffle/comet/CometUnifiedShuffleMemoryAllocator.java | 4 ++-- .../apache/spark/sql/comet/execution/shuffle/SpillWriter.java | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/spark/src/main/java/org/apache/spark/shuffle/comet/CometBoundedShuffleMemoryAllocator.java b/spark/src/main/java/org/apache/spark/shuffle/comet/CometBoundedShuffleMemoryAllocator.java index b056ebe678..ef1c6ff6f9 100644 --- a/spark/src/main/java/org/apache/spark/shuffle/comet/CometBoundedShuffleMemoryAllocator.java +++ b/spark/src/main/java/org/apache/spark/shuffle/comet/CometBoundedShuffleMemoryAllocator.java @@ -147,8 +147,8 @@ private synchronized MemoryBlock allocateMemoryBlock(long required) { } public synchronized void free(MemoryBlock block) { - if (block.pageNumber == MemoryBlock.FREED_IN_ALLOCATOR_PAGE_NUMBER || - block.pageNumber == MemoryBlock.FREED_IN_TMM_PAGE_NUMBER) { + if (block.pageNumber == MemoryBlock.FREED_IN_ALLOCATOR_PAGE_NUMBER + || block.pageNumber == MemoryBlock.FREED_IN_TMM_PAGE_NUMBER) { // Already freed block return; } diff --git a/spark/src/main/java/org/apache/spark/shuffle/comet/CometUnifiedShuffleMemoryAllocator.java b/spark/src/main/java/org/apache/spark/shuffle/comet/CometUnifiedShuffleMemoryAllocator.java index 74a823dc17..42542a41d8 100644 --- a/spark/src/main/java/org/apache/spark/shuffle/comet/CometUnifiedShuffleMemoryAllocator.java +++ b/spark/src/main/java/org/apache/spark/shuffle/comet/CometUnifiedShuffleMemoryAllocator.java @@ -58,8 +58,8 @@ public synchronized MemoryBlock allocate(long required) { } public synchronized void free(MemoryBlock block) { - if (block.pageNumber == MemoryBlock.FREED_IN_ALLOCATOR_PAGE_NUMBER || - block.pageNumber == MemoryBlock.FREED_IN_TMM_PAGE_NUMBER) { + if (block.pageNumber == MemoryBlock.FREED_IN_ALLOCATOR_PAGE_NUMBER + || block.pageNumber == MemoryBlock.FREED_IN_TMM_PAGE_NUMBER) { // Already freed block return; } diff --git a/spark/src/main/java/org/apache/spark/sql/comet/execution/shuffle/SpillWriter.java b/spark/src/main/java/org/apache/spark/sql/comet/execution/shuffle/SpillWriter.java index 92d063536f..0093f44e4f 100644 --- a/spark/src/main/java/org/apache/spark/sql/comet/execution/shuffle/SpillWriter.java +++ b/spark/src/main/java/org/apache/spark/sql/comet/execution/shuffle/SpillWriter.java @@ -218,8 +218,8 @@ protected long doSpilling( public long freeMemory() { long freed = 0L; for (MemoryBlock block : allocatedPages) { - if (block.pageNumber == MemoryBlock.FREED_IN_ALLOCATOR_PAGE_NUMBER || - block.pageNumber == MemoryBlock.FREED_IN_TMM_PAGE_NUMBER) { + if (block.pageNumber == MemoryBlock.FREED_IN_ALLOCATOR_PAGE_NUMBER + || block.pageNumber == MemoryBlock.FREED_IN_TMM_PAGE_NUMBER) { freed += block.size(); allocator.free(block); } From ec69f687b62c97ff3ebcba9472942ae6b24a3f18 Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Mon, 11 Aug 2025 12:01:04 -0600 Subject: [PATCH 06/10] fix --- .../apache/spark/sql/comet/execution/shuffle/SpillWriter.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/spark/src/main/java/org/apache/spark/sql/comet/execution/shuffle/SpillWriter.java b/spark/src/main/java/org/apache/spark/sql/comet/execution/shuffle/SpillWriter.java index 0093f44e4f..fa16c53ec5 100644 --- a/spark/src/main/java/org/apache/spark/sql/comet/execution/shuffle/SpillWriter.java +++ b/spark/src/main/java/org/apache/spark/sql/comet/execution/shuffle/SpillWriter.java @@ -220,6 +220,8 @@ public long freeMemory() { for (MemoryBlock block : allocatedPages) { if (block.pageNumber == MemoryBlock.FREED_IN_ALLOCATOR_PAGE_NUMBER || block.pageNumber == MemoryBlock.FREED_IN_TMM_PAGE_NUMBER) { + // Already freed block + } else { freed += block.size(); allocator.free(block); } From 135c98652ed58dc27df0638ecf0455ab3c6792b6 Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Mon, 11 Aug 2025 14:35:40 -0600 Subject: [PATCH 07/10] address feedback --- native/Cargo.toml | 16 ++++++++++++++++ .../CometBoundedShuffleMemoryAllocator.java | 5 +++-- .../comet/CometShuffleMemoryAllocatorTrait.java | 2 +- .../CometUnifiedShuffleMemoryAllocator.java | 6 ++++-- .../sql/comet/execution/shuffle/SpillWriter.java | 8 +------- 5 files changed, 25 insertions(+), 12 deletions(-) diff --git a/native/Cargo.toml b/native/Cargo.toml index 5f0a2388fe..03fcc332da 100644 --- a/native/Cargo.toml +++ b/native/Cargo.toml @@ -60,3 +60,19 @@ overflow-checks = false lto = "thin" codegen-units = 1 strip = "debuginfo" + +# Cargo.toml +[profile.dev] +# Light optimization for your crate (keeps compile times reasonable) +opt-level = 1 # 0..3; try 1 or 2 for more speed +debug = 1 # smaller debug info; 0 = none, 2 = full +debug-assertions = false # turn off extra checks in std & your code +overflow-checks = false # disable integer overflow checks +incremental = true # faster rebuilds (slightly slower runtime) +codegen-units = 16 # more parallel codegen, faster builds +lto = "off" # keep LTO off in dev +panic = "abort" # faster, smaller binaries (no unwinding) + +# Heavier optimization for dependencies only (big win with small compile cost) +[profile.dev.package."*"] +opt-level = 3 \ No newline at end of file diff --git a/spark/src/main/java/org/apache/spark/shuffle/comet/CometBoundedShuffleMemoryAllocator.java b/spark/src/main/java/org/apache/spark/shuffle/comet/CometBoundedShuffleMemoryAllocator.java index ef1c6ff6f9..7ce72d4532 100644 --- a/spark/src/main/java/org/apache/spark/shuffle/comet/CometBoundedShuffleMemoryAllocator.java +++ b/spark/src/main/java/org/apache/spark/shuffle/comet/CometBoundedShuffleMemoryAllocator.java @@ -146,11 +146,11 @@ private synchronized MemoryBlock allocateMemoryBlock(long required) { return block; } - public synchronized void free(MemoryBlock block) { + public synchronized long free(MemoryBlock block) { if (block.pageNumber == MemoryBlock.FREED_IN_ALLOCATOR_PAGE_NUMBER || block.pageNumber == MemoryBlock.FREED_IN_TMM_PAGE_NUMBER) { // Already freed block - return; + return 0; } allocatedMemory -= block.size(); @@ -159,6 +159,7 @@ public synchronized void free(MemoryBlock block) { block.pageNumber = MemoryBlock.FREED_IN_TMM_PAGE_NUMBER; allocator.free(block); + return allocatedMemory; } /** diff --git a/spark/src/main/java/org/apache/spark/shuffle/comet/CometShuffleMemoryAllocatorTrait.java b/spark/src/main/java/org/apache/spark/shuffle/comet/CometShuffleMemoryAllocatorTrait.java index 6831396b3a..36fa9d2ff4 100644 --- a/spark/src/main/java/org/apache/spark/shuffle/comet/CometShuffleMemoryAllocatorTrait.java +++ b/spark/src/main/java/org/apache/spark/shuffle/comet/CometShuffleMemoryAllocatorTrait.java @@ -33,7 +33,7 @@ protected CometShuffleMemoryAllocatorTrait( public abstract MemoryBlock allocate(long required); - public abstract void free(MemoryBlock block); + public abstract long free(MemoryBlock block); public abstract long getOffsetInPage(long pagePlusOffsetAddress); diff --git a/spark/src/main/java/org/apache/spark/shuffle/comet/CometUnifiedShuffleMemoryAllocator.java b/spark/src/main/java/org/apache/spark/shuffle/comet/CometUnifiedShuffleMemoryAllocator.java index 42542a41d8..439505b4dc 100644 --- a/spark/src/main/java/org/apache/spark/shuffle/comet/CometUnifiedShuffleMemoryAllocator.java +++ b/spark/src/main/java/org/apache/spark/shuffle/comet/CometUnifiedShuffleMemoryAllocator.java @@ -57,13 +57,15 @@ public synchronized MemoryBlock allocate(long required) { return this.allocatePage(required); } - public synchronized void free(MemoryBlock block) { + public synchronized long free(MemoryBlock block) { if (block.pageNumber == MemoryBlock.FREED_IN_ALLOCATOR_PAGE_NUMBER || block.pageNumber == MemoryBlock.FREED_IN_TMM_PAGE_NUMBER) { // Already freed block - return; + return 0; } + long allocatedMemory = block.size(); this.freePage(block); + return allocatedMemory; } /** diff --git a/spark/src/main/java/org/apache/spark/sql/comet/execution/shuffle/SpillWriter.java b/spark/src/main/java/org/apache/spark/sql/comet/execution/shuffle/SpillWriter.java index fa16c53ec5..044c7842f0 100644 --- a/spark/src/main/java/org/apache/spark/sql/comet/execution/shuffle/SpillWriter.java +++ b/spark/src/main/java/org/apache/spark/sql/comet/execution/shuffle/SpillWriter.java @@ -218,13 +218,7 @@ protected long doSpilling( public long freeMemory() { long freed = 0L; for (MemoryBlock block : allocatedPages) { - if (block.pageNumber == MemoryBlock.FREED_IN_ALLOCATOR_PAGE_NUMBER - || block.pageNumber == MemoryBlock.FREED_IN_TMM_PAGE_NUMBER) { - // Already freed block - } else { - freed += block.size(); - allocator.free(block); - } + freed += allocator.free(block); } allocatedPages.clear(); currentPage = null; From 64f92464ec9e21eedf66b2c24d8a490963d7f99e Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Mon, 11 Aug 2025 14:36:09 -0600 Subject: [PATCH 08/10] address feedback --- .../shuffle/comet/CometBoundedShuffleMemoryAllocator.java | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/spark/src/main/java/org/apache/spark/shuffle/comet/CometBoundedShuffleMemoryAllocator.java b/spark/src/main/java/org/apache/spark/shuffle/comet/CometBoundedShuffleMemoryAllocator.java index 7ce72d4532..dae55c04bf 100644 --- a/spark/src/main/java/org/apache/spark/shuffle/comet/CometBoundedShuffleMemoryAllocator.java +++ b/spark/src/main/java/org/apache/spark/shuffle/comet/CometBoundedShuffleMemoryAllocator.java @@ -152,14 +152,15 @@ public synchronized long free(MemoryBlock block) { // Already freed block return 0; } - allocatedMemory -= block.size(); + long blockSize = block.size(); + allocatedMemory -= blockSize; pageTable[block.pageNumber] = null; allocatedPages.clear(block.pageNumber); block.pageNumber = MemoryBlock.FREED_IN_TMM_PAGE_NUMBER; allocator.free(block); - return allocatedMemory; + return blockSize; } /** From d2b3acaa8fd15d7176f78927312c9cc7df417223 Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Mon, 11 Aug 2025 14:36:31 -0600 Subject: [PATCH 09/10] address feedback --- .../shuffle/comet/CometUnifiedShuffleMemoryAllocator.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/spark/src/main/java/org/apache/spark/shuffle/comet/CometUnifiedShuffleMemoryAllocator.java b/spark/src/main/java/org/apache/spark/shuffle/comet/CometUnifiedShuffleMemoryAllocator.java index 439505b4dc..aa8de6f17f 100644 --- a/spark/src/main/java/org/apache/spark/shuffle/comet/CometUnifiedShuffleMemoryAllocator.java +++ b/spark/src/main/java/org/apache/spark/shuffle/comet/CometUnifiedShuffleMemoryAllocator.java @@ -63,9 +63,9 @@ public synchronized long free(MemoryBlock block) { // Already freed block return 0; } - long allocatedMemory = block.size(); + long blockSize = block.size(); this.freePage(block); - return allocatedMemory; + return blockSize; } /** From b4a82a0efa77f99afb9b3b38b5d18d71ceb01988 Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Mon, 11 Aug 2025 14:37:09 -0600 Subject: [PATCH 10/10] revert --- native/Cargo.toml | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/native/Cargo.toml b/native/Cargo.toml index 03fcc332da..5f0a2388fe 100644 --- a/native/Cargo.toml +++ b/native/Cargo.toml @@ -60,19 +60,3 @@ overflow-checks = false lto = "thin" codegen-units = 1 strip = "debuginfo" - -# Cargo.toml -[profile.dev] -# Light optimization for your crate (keeps compile times reasonable) -opt-level = 1 # 0..3; try 1 or 2 for more speed -debug = 1 # smaller debug info; 0 = none, 2 = full -debug-assertions = false # turn off extra checks in std & your code -overflow-checks = false # disable integer overflow checks -incremental = true # faster rebuilds (slightly slower runtime) -codegen-units = 16 # more parallel codegen, faster builds -lto = "off" # keep LTO off in dev -panic = "abort" # faster, smaller binaries (no unwinding) - -# Heavier optimization for dependencies only (big win with small compile cost) -[profile.dev.package."*"] -opt-level = 3 \ No newline at end of file