Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 3 additions & 4 deletions native/core/src/execution/operators/shuffle_scan.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,7 @@
use crate::{
errors::CometError,
execution::{
operators::ExecutionError, planner::TEST_EXEC_CONTEXT_ID,
shuffle::codec::read_ipc_compressed,
operators::ExecutionError, planner::TEST_EXEC_CONTEXT_ID, shuffle::ipc::read_ipc_compressed,
},
jvm_bridge::{jni_call, JVMClasses},
};
Expand Down Expand Up @@ -352,15 +351,15 @@ impl RecordBatchStream for ShuffleScanStream {

#[cfg(test)]
mod tests {
use crate::execution::shuffle::codec::{CompressionCodec, ShuffleBlockWriter};
use crate::execution::shuffle::{CompressionCodec, ShuffleBlockWriter};
use arrow::array::{Int32Array, StringArray};
use arrow::datatypes::{DataType, Field, Schema};
use arrow::record_batch::RecordBatch;
use datafusion::physical_plan::metrics::Time;
use std::io::Cursor;
use std::sync::Arc;

use crate::execution::shuffle::codec::read_ipc_compressed;
use crate::execution::shuffle::ipc::read_ipc_compressed;

#[test]
#[cfg_attr(miri, ignore)] // Miri cannot call FFI functions (zstd)
Expand Down
5 changes: 2 additions & 3 deletions native/shuffle/benches/row_columnar.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,8 @@

use arrow::datatypes::{DataType as ArrowDataType, Field, Fields};
use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion};
use datafusion_comet_shuffle::spark_unsafe::row::{
process_sorted_row_partition, SparkUnsafeObject, SparkUnsafeRow,
};
use datafusion_comet_shuffle::spark_unsafe::row::{process_sorted_row_partition, SparkUnsafeRow};
use datafusion_comet_shuffle::spark_unsafe::unsafe_object::SparkUnsafeObject;
use datafusion_comet_shuffle::CompressionCodec;
use std::sync::Arc;
use tempfile::Builder;
Expand Down
1 change: 1 addition & 0 deletions native/shuffle/src/comet_partitioning.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ use arrow::row::{OwnedRow, RowConverter};
use datafusion::physical_expr::{LexOrdering, PhysicalExpr};
use std::sync::Arc;

/// Partitioning scheme for distributing rows across shuffle output partitions.
#[derive(Debug, Clone)]
pub enum CometPartitioning {
SinglePartition,
Expand Down
52 changes: 52 additions & 0 deletions native/shuffle/src/ipc.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

use arrow::array::RecordBatch;
use arrow::ipc::reader::StreamReader;
use datafusion::common::DataFusionError;
use datafusion::error::Result;

pub fn read_ipc_compressed(bytes: &[u8]) -> Result<RecordBatch> {
match &bytes[0..4] {
b"SNAP" => {
let decoder = snap::read::FrameDecoder::new(&bytes[4..]);
let mut reader =
unsafe { StreamReader::try_new(decoder, None)?.with_skip_validation(true) };
reader.next().unwrap().map_err(|e| e.into())
}
b"LZ4_" => {
let decoder = lz4_flex::frame::FrameDecoder::new(&bytes[4..]);
let mut reader =
unsafe { StreamReader::try_new(decoder, None)?.with_skip_validation(true) };
reader.next().unwrap().map_err(|e| e.into())
}
b"ZSTD" => {
let decoder = zstd::Decoder::new(&bytes[4..])?;
let mut reader =
unsafe { StreamReader::try_new(decoder, None)?.with_skip_validation(true) };
reader.next().unwrap().map_err(|e| e.into())
}
b"NONE" => {
let mut reader =
unsafe { StreamReader::try_new(&bytes[4..], None)?.with_skip_validation(true) };
reader.next().unwrap().map_err(|e| e.into())
}
other => Err(DataFusionError::Execution(format!(
"Failed to decode batch: invalid compression codec: {other:?}"
))),
}
}
5 changes: 3 additions & 2 deletions native/shuffle/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,14 +15,15 @@
// specific language governing permissions and limitations
// under the License.

pub mod codec;
pub(crate) mod comet_partitioning;
pub mod ipc;
pub(crate) mod metrics;
pub(crate) mod partitioners;
mod shuffle_writer;
pub mod spark_unsafe;
pub(crate) mod writers;

pub use codec::{read_ipc_compressed, CompressionCodec, ShuffleBlockWriter};
pub use comet_partitioning::CometPartitioning;
pub use ipc::read_ipc_compressed;
pub use shuffle_writer::ShuffleWriterExec;
pub use writers::{CompressionCodec, ShuffleBlockWriter};
1 change: 1 addition & 0 deletions native/shuffle/src/metrics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ use datafusion::physical_plan::metrics::{
BaselineMetrics, Count, ExecutionPlanMetricsSet, MetricBuilder, Time,
};

/// Execution metrics for a shuffle partition operation.
pub(crate) struct ShufflePartitionerMetrics {
/// metrics
pub(crate) baseline: BaselineMetrics,
Expand Down
13 changes: 2 additions & 11 deletions native/shuffle/src/partitioners/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,18 +18,9 @@
mod multi_partition;
mod partitioned_batch_iterator;
mod single_partition;

use arrow::record_batch::RecordBatch;
use datafusion::common::Result;
mod traits;

pub(crate) use multi_partition::MultiPartitionShuffleRepartitioner;
pub(crate) use partitioned_batch_iterator::PartitionedBatchIterator;
pub(crate) use single_partition::SinglePartitionShufflePartitioner;

#[async_trait::async_trait]
pub(crate) trait ShufflePartitioner: Send + Sync {
/// Insert a batch into the partitioner
async fn insert_batch(&mut self, batch: RecordBatch) -> Result<()>;
/// Write shuffle data and shuffle index file to disk
fn shuffle_write(&mut self) -> Result<()>;
}
pub(crate) use traits::ShufflePartitioner;
1 change: 1 addition & 0 deletions native/shuffle/src/partitioners/multi_partition.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ use std::io::{BufReader, BufWriter, Seek, Write};
use std::sync::Arc;
use tokio::time::Instant;

/// Reusable scratch buffers for computing row-to-partition assignments.
#[derive(Default)]
struct ScratchSpace {
/// Hashes for each row in the current batch.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ impl PartitionedBatchesProducer {
}
}

/// Iterates over the shuffled record batches belonging to a single output partition.
pub(crate) struct PartitionedBatchIterator<'a> {
record_batches: Vec<&'a RecordBatch>,
batch_size: usize,
Expand Down
27 changes: 27 additions & 0 deletions native/shuffle/src/partitioners/traits.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

use arrow::record_batch::RecordBatch;
use datafusion::common::Result;

#[async_trait::async_trait]
pub(crate) trait ShufflePartitioner: Send + Sync {
/// Insert a batch into the partitioner
async fn insert_batch(&mut self, batch: RecordBatch) -> Result<()>;
/// Write shuffle data and shuffle index file to disk
fn shuffle_write(&mut self) -> Result<()>;
}
7 changes: 3 additions & 4 deletions native/shuffle/src/spark_unsafe/list.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,8 @@

use crate::spark_unsafe::{
map::append_map_elements,
row::{
append_field, downcast_builder_ref, impl_primitive_accessors, SparkUnsafeObject,
SparkUnsafeRow,
},
row::{append_field, downcast_builder_ref, SparkUnsafeRow},
unsafe_object::{impl_primitive_accessors, SparkUnsafeObject},
};
use arrow::array::{
builder::{
Expand Down Expand Up @@ -86,6 +84,7 @@ macro_rules! impl_append_to_builder {
};
}

/// A Spark `UnsafeArray` backed by JVM-allocated memory, providing element access by index.
pub struct SparkUnsafeArray {
row_addr: i64,
num_elements: usize,
Expand Down
1 change: 1 addition & 0 deletions native/shuffle/src/spark_unsafe/map.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ use arrow::array::builder::{ArrayBuilder, MapBuilder, MapFieldNames};
use arrow::datatypes::{DataType, FieldRef};
use datafusion_comet_jni_bridge::errors::CometError;

/// A Spark `UnsafeMap` backed by JVM-allocated memory, containing parallel keys and values arrays.
pub struct SparkUnsafeMap {
pub(crate) keys: SparkUnsafeArray,
pub(crate) values: SparkUnsafeArray,
Expand Down
1 change: 1 addition & 0 deletions native/shuffle/src/spark_unsafe/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,3 +18,4 @@
pub mod list;
mod map;
pub mod row;
pub mod unsafe_object;
Loading
Loading