apache · andygrove · Mar 27, 2026 · Mar 23, 2026 · Mar 23, 2026 · Mar 23, 2026
diff --git a/native/core/src/execution/operators/shuffle_scan.rs b/native/core/src/execution/operators/shuffle_scan.rs
@@ -18,8 +18,7 @@
 use crate::{
     errors::CometError,
     execution::{
-        operators::ExecutionError, planner::TEST_EXEC_CONTEXT_ID,
-        shuffle::codec::read_ipc_compressed,
+        operators::ExecutionError, planner::TEST_EXEC_CONTEXT_ID, shuffle::ipc::read_ipc_compressed,
     },
     jvm_bridge::{jni_call, JVMClasses},
 };
@@ -352,15 +351,15 @@ impl RecordBatchStream for ShuffleScanStream {
 
 #[cfg(test)]
 mod tests {
-    use crate::execution::shuffle::codec::{CompressionCodec, ShuffleBlockWriter};
+    use crate::execution::shuffle::{CompressionCodec, ShuffleBlockWriter};
     use arrow::array::{Int32Array, StringArray};
     use arrow::datatypes::{DataType, Field, Schema};
     use arrow::record_batch::RecordBatch;
     use datafusion::physical_plan::metrics::Time;
     use std::io::Cursor;
     use std::sync::Arc;
 
-    use crate::execution::shuffle::codec::read_ipc_compressed;
+    use crate::execution::shuffle::ipc::read_ipc_compressed;
 
     #[test]
     #[cfg_attr(miri, ignore)] // Miri cannot call FFI functions (zstd)

diff --git a/native/shuffle/benches/row_columnar.rs b/native/shuffle/benches/row_columnar.rs
@@ -23,9 +23,8 @@
 
 use arrow::datatypes::{DataType as ArrowDataType, Field, Fields};
 use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion};
-use datafusion_comet_shuffle::spark_unsafe::row::{
-    process_sorted_row_partition, SparkUnsafeObject, SparkUnsafeRow,
-};
+use datafusion_comet_shuffle::spark_unsafe::row::{process_sorted_row_partition, SparkUnsafeRow};
+use datafusion_comet_shuffle::spark_unsafe::unsafe_object::SparkUnsafeObject;
 use datafusion_comet_shuffle::CompressionCodec;
 use std::sync::Arc;
 use tempfile::Builder;

diff --git a/native/shuffle/src/comet_partitioning.rs b/native/shuffle/src/comet_partitioning.rs
@@ -19,6 +19,7 @@ use arrow::row::{OwnedRow, RowConverter};
 use datafusion::physical_expr::{LexOrdering, PhysicalExpr};
 use std::sync::Arc;
 
+/// Partitioning scheme for distributing rows across shuffle output partitions.
 #[derive(Debug, Clone)]
 pub enum CometPartitioning {
     SinglePartition,

diff --git a/native/shuffle/src/ipc.rs b/native/shuffle/src/ipc.rs
@@ -0,0 +1,52 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use arrow::array::RecordBatch;
+use arrow::ipc::reader::StreamReader;
+use datafusion::common::DataFusionError;
+use datafusion::error::Result;
+
+pub fn read_ipc_compressed(bytes: &[u8]) -> Result<RecordBatch> {
+    match &bytes[0..4] {
+        b"SNAP" => {
+            let decoder = snap::read::FrameDecoder::new(&bytes[4..]);
+            let mut reader =
+                unsafe { StreamReader::try_new(decoder, None)?.with_skip_validation(true) };
+            reader.next().unwrap().map_err(|e| e.into())
+        }
+        b"LZ4_" => {
+            let decoder = lz4_flex::frame::FrameDecoder::new(&bytes[4..]);
+            let mut reader =
+                unsafe { StreamReader::try_new(decoder, None)?.with_skip_validation(true) };
+            reader.next().unwrap().map_err(|e| e.into())
+        }
+        b"ZSTD" => {
+            let decoder = zstd::Decoder::new(&bytes[4..])?;
+            let mut reader =
+                unsafe { StreamReader::try_new(decoder, None)?.with_skip_validation(true) };
+            reader.next().unwrap().map_err(|e| e.into())
+        }
+        b"NONE" => {
+            let mut reader =
+                unsafe { StreamReader::try_new(&bytes[4..], None)?.with_skip_validation(true) };
+            reader.next().unwrap().map_err(|e| e.into())
+        }
+        other => Err(DataFusionError::Execution(format!(
+            "Failed to decode batch: invalid compression codec: {other:?}"
+        ))),
+    }
+}
diff --git a/native/shuffle/src/lib.rs b/native/shuffle/src/lib.rs
@@ -15,14 +15,15 @@
 // specific language governing permissions and limitations
 // under the License.
 
-pub mod codec;
 pub(crate) mod comet_partitioning;
+pub mod ipc;
 pub(crate) mod metrics;
 pub(crate) mod partitioners;
 mod shuffle_writer;
 pub mod spark_unsafe;
 pub(crate) mod writers;
 
-pub use codec::{read_ipc_compressed, CompressionCodec, ShuffleBlockWriter};
 pub use comet_partitioning::CometPartitioning;
+pub use ipc::read_ipc_compressed;
 pub use shuffle_writer::ShuffleWriterExec;
+pub use writers::{CompressionCodec, ShuffleBlockWriter};
diff --git a/native/shuffle/src/metrics.rs b/native/shuffle/src/metrics.rs
@@ -19,6 +19,7 @@ use datafusion::physical_plan::metrics::{
     BaselineMetrics, Count, ExecutionPlanMetricsSet, MetricBuilder, Time,
 };
 
+/// Execution metrics for a shuffle partition operation.
 pub(crate) struct ShufflePartitionerMetrics {
     /// metrics
     pub(crate) baseline: BaselineMetrics,

diff --git a/native/shuffle/src/partitioners/mod.rs b/native/shuffle/src/partitioners/mod.rs
@@ -18,18 +18,9 @@
 mod multi_partition;
 mod partitioned_batch_iterator;
 mod single_partition;
-
-use arrow::record_batch::RecordBatch;
-use datafusion::common::Result;
+mod traits;
 
 pub(crate) use multi_partition::MultiPartitionShuffleRepartitioner;
 pub(crate) use partitioned_batch_iterator::PartitionedBatchIterator;
 pub(crate) use single_partition::SinglePartitionShufflePartitioner;
-
-#[async_trait::async_trait]
-pub(crate) trait ShufflePartitioner: Send + Sync {
-    /// Insert a batch into the partitioner
-    async fn insert_batch(&mut self, batch: RecordBatch) -> Result<()>;
-    /// Write shuffle data and shuffle index file to disk
-    fn shuffle_write(&mut self) -> Result<()>;
-}
+pub(crate) use traits::ShufflePartitioner;
diff --git a/native/shuffle/src/partitioners/multi_partition.rs b/native/shuffle/src/partitioners/multi_partition.rs
@@ -39,6 +39,7 @@ use std::io::{BufReader, BufWriter, Seek, Write};
 use std::sync::Arc;
 use tokio::time::Instant;
 
+/// Reusable scratch buffers for computing row-to-partition assignments.
 #[derive(Default)]
 struct ScratchSpace {
     /// Hashes for each row in the current batch.

diff --git a/native/shuffle/src/partitioners/partitioned_batch_iterator.rs b/native/shuffle/src/partitioners/partitioned_batch_iterator.rs
@@ -50,6 +50,7 @@ impl PartitionedBatchesProducer {
     }
 }
 
+/// Iterates over the shuffled record batches belonging to a single output partition.
 pub(crate) struct PartitionedBatchIterator<'a> {
     record_batches: Vec<&'a RecordBatch>,
     batch_size: usize,

diff --git a/native/shuffle/src/partitioners/traits.rs b/native/shuffle/src/partitioners/traits.rs
@@ -0,0 +1,27 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use arrow::record_batch::RecordBatch;
+use datafusion::common::Result;
+
+#[async_trait::async_trait]
+pub(crate) trait ShufflePartitioner: Send + Sync {
+    /// Insert a batch into the partitioner
+    async fn insert_batch(&mut self, batch: RecordBatch) -> Result<()>;
+    /// Write shuffle data and shuffle index file to disk
+    fn shuffle_write(&mut self) -> Result<()>;
+}
diff --git a/native/shuffle/src/spark_unsafe/list.rs b/native/shuffle/src/spark_unsafe/list.rs
@@ -17,10 +17,8 @@
 
 use crate::spark_unsafe::{
     map::append_map_elements,
-    row::{
-        append_field, downcast_builder_ref, impl_primitive_accessors, SparkUnsafeObject,
-        SparkUnsafeRow,
-    },
+    row::{append_field, downcast_builder_ref, SparkUnsafeRow},
+    unsafe_object::{impl_primitive_accessors, SparkUnsafeObject},
 };
 use arrow::array::{
     builder::{
@@ -86,6 +84,7 @@ macro_rules! impl_append_to_builder {
     };
 }
 
+/// A Spark `UnsafeArray` backed by JVM-allocated memory, providing element access by index.
 pub struct SparkUnsafeArray {
     row_addr: i64,
     num_elements: usize,

diff --git a/native/shuffle/src/spark_unsafe/map.rs b/native/shuffle/src/spark_unsafe/map.rs
@@ -20,6 +20,7 @@ use arrow::array::builder::{ArrayBuilder, MapBuilder, MapFieldNames};
 use arrow::datatypes::{DataType, FieldRef};
 use datafusion_comet_jni_bridge::errors::CometError;
 
+/// A Spark `UnsafeMap` backed by JVM-allocated memory, containing parallel keys and values arrays.
 pub struct SparkUnsafeMap {
     pub(crate) keys: SparkUnsafeArray,
     pub(crate) values: SparkUnsafeArray,

diff --git a/native/shuffle/src/spark_unsafe/mod.rs b/native/shuffle/src/spark_unsafe/mod.rs
@@ -18,3 +18,4 @@
 pub mod list;
 mod map;
 pub mod row;
+pub mod unsafe_object;