From 397f09a16e08509feabdd65b6efd227c881efcd7 Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Wed, 11 Mar 2026 11:16:36 -0600 Subject: [PATCH] fix: resolve Miri UB in null struct field test, re-enable Miri on PRs Add bounds-checking debug_assert in SparkUnsafeRow::get_element_offset to catch out-of-bounds accesses early. Fix test_append_null_struct_field_to_struct_builder which had an undersized 8-byte buffer (only null bitset, no field slot) with null bit unset, causing an out-of-bounds read in get_long. Use 16 bytes with bit 0 set to properly represent a null field. Re-enable Miri on pull_request trigger now that the upstream cargo nightly regression (#3499) is resolved. --- .github/workflows/miri.yml | 18 ++++++++---------- .../src/execution/shuffle/spark_unsafe/row.rs | 16 +++++++++++++--- 2 files changed, 21 insertions(+), 13 deletions(-) diff --git a/.github/workflows/miri.yml b/.github/workflows/miri.yml index c9ee6abdd9..ea36e1359a 100644 --- a/.github/workflows/miri.yml +++ b/.github/workflows/miri.yml @@ -28,16 +28,14 @@ on: - "native/core/benches/**" - "native/spark-expr/benches/**" - "spark/src/test/scala/org/apache/spark/sql/benchmark/**" -# Disabled until Miri compatibility is restored -# https://github.com/apache/datafusion-comet/issues/3499 -# pull_request: -# paths-ignore: -# - "doc/**" -# - "docs/**" -# - "**.md" -# - "native/core/benches/**" -# - "native/spark-expr/benches/**" -# - "spark/src/test/scala/org/apache/spark/sql/benchmark/**" + pull_request: + paths-ignore: + - "doc/**" + - "docs/**" + - "**.md" + - "native/core/benches/**" + - "native/spark-expr/benches/**" + - "spark/src/test/scala/org/apache/spark/sql/benchmark/**" # manual trigger # https://docs.github.com/en/actions/managing-workflow-runs/manually-running-a-workflow workflow_dispatch: diff --git a/native/core/src/execution/shuffle/spark_unsafe/row.rs b/native/core/src/execution/shuffle/spark_unsafe/row.rs index 6b41afae8d..4d937db76f 100644 --- a/native/core/src/execution/shuffle/spark_unsafe/row.rs +++ b/native/core/src/execution/shuffle/spark_unsafe/row.rs @@ -255,8 +255,15 @@ impl SparkUnsafeObject for SparkUnsafeRow { self.row_addr } - fn get_element_offset(&self, index: usize, _: usize) -> *const u8 { - (self.row_addr + self.row_bitset_width + (index * 8) as i64) as *const u8 + fn get_element_offset(&self, index: usize, element_size: usize) -> *const u8 { + let offset = self.row_bitset_width + (index * 8) as i64; + debug_assert!( + self.row_size >= 0 && offset + element_size as i64 <= self.row_size as i64, + "get_element_offset: access at offset {offset} with size {element_size} \ + exceeds row_size {} for index {index}", + self.row_size + ); + (self.row_addr + offset) as *const u8 } } @@ -1659,7 +1666,10 @@ mod test { let fields = Fields::from(vec![Field::new("st", data_type.clone(), true)]); let mut struct_builder = StructBuilder::from_fields(fields, 1); let mut row = SparkUnsafeRow::new_with_num_fields(1); - let data = [0; 8]; + // 8 bytes null bitset + 8 bytes field value = 16 bytes + // Set bit 0 in the null bitset to mark field 0 as null + let mut data = [0u8; 16]; + data[0] = 1; row.point_to_slice(&data); append_field(&data_type, &mut struct_builder, &row, 0).expect("append field"); struct_builder.append_null();