Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
38 commits
Select commit Hold shift + click to select a range
39c3c4e
stash
mbutrovich Mar 4, 2026
0edf710
add iceberg and opendal as features
mbutrovich Mar 4, 2026
c96422e
cargo fmt
mbutrovich Mar 4, 2026
eca9b33
clippy fixes
mbutrovich Mar 4, 2026
b5ff641
bump to iceberg-rust df53 branch, clippy fixes
mbutrovich Mar 4, 2026
4258300
bump to iceberg-rust df53 branch, clippy fixes
mbutrovich Mar 4, 2026
efa9437
fix fileIO construction
mbutrovich Mar 4, 2026
a548871
update deps
mbutrovich Mar 4, 2026
f466c4b
Merge branch 'main' into df53
mbutrovich Mar 4, 2026
faf1c56
Bump to 53.0.0-rc2.
mbutrovich Mar 17, 2026
a31c1a3
Merge branch 'main' into df53
mbutrovich Mar 17, 2026
5a70cc9
Merge in upstream/main.
mbutrovich Mar 17, 2026
eb3198f
Fix native/core/Cargo.toml.
mbutrovich Mar 17, 2026
0de1381
Fix native test failures, clippy.
mbutrovich Mar 17, 2026
7257a24
Update to use object_store 0.13 in hdfs.rs.
mbutrovich Mar 17, 2026
43059fd
Fix memory pool issues.
mbutrovich Mar 17, 2026
1794cfd
Fix type coercion for Utf8View stuff.
mbutrovich Mar 17, 2026
6cfe190
Bump to released crates.
mbutrovich Mar 31, 2026
3b40b81
bump to iceberg-rust main commit with df53
mbutrovich Mar 31, 2026
eb64e29
Merge remote-tracking branch 'apache/main' into df53
mbutrovich Mar 31, 2026
a359213
putting missing file back
mbutrovich Mar 31, 2026
88e4bf4
fix
mbutrovich Mar 31, 2026
de3faa0
fix native test
mbutrovich Mar 31, 2026
1105bb2
workaround array_compact and array_repeat failures
mbutrovich Apr 1, 2026
a2f5257
workaround array_compact and array_repeat failures
mbutrovich Apr 1, 2026
7684997
bump opendal to commit on main with upgraded object_store.
mbutrovich Apr 1, 2026
9d6f776
Merge branch 'main' into df53
mbutrovich Apr 1, 2026
28d98e2
Merge branch 'main' into df53
mbutrovich Apr 3, 2026
1413355
cargo update
mbutrovich Apr 3, 2026
2be791c
fix mapping issue with native_datafusion
mbutrovich Apr 3, 2026
51ebbc3
make test consistent with others in the file
mbutrovich Apr 3, 2026
18efeb4
add fallback for SPARK-39393 test.
mbutrovich Apr 3, 2026
38dc46a
update docs
mbutrovich Apr 3, 2026
c73db1f
fix spotless.
mbutrovich Apr 3, 2026
b3e2785
Merge branch 'main' into df53
mbutrovich Apr 3, 2026
e84b109
Merge branch 'main' into df53
mbutrovich Apr 6, 2026
e518d16
rename from PR feedback
mbutrovich Apr 7, 2026
e144ff3
Merge branch 'main' into df53
mbutrovich Apr 7, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
862 changes: 539 additions & 323 deletions native/Cargo.lock

Large diffs are not rendered by default.

18 changes: 9 additions & 9 deletions native/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -34,14 +34,14 @@ edition = "2021"
rust-version = "1.88"

[workspace.dependencies]
arrow = { version = "57.3.0", features = ["prettyprint", "ffi", "chrono-tz"] }
arrow = { version = "58.1.0", features = ["prettyprint", "ffi", "chrono-tz"] }
async-trait = { version = "0.1" }
bytes = { version = "1.11.1" }
parquet = { version = "57.3.0", default-features = false, features = ["experimental"] }
datafusion = { version = "52.4.0", default-features = false, features = ["unicode_expressions", "crypto_expressions", "nested_expressions", "parquet"] }
datafusion-datasource = { version = "52.4.0" }
datafusion-physical-expr-adapter = { version = "52.4.0" }
datafusion-spark = { version = "52.4.0" }
parquet = { version = "58.1.0", default-features = false, features = ["experimental"] }
datafusion = { version = "53.0.0", default-features = false, features = ["unicode_expressions", "crypto_expressions", "nested_expressions", "parquet"] }
datafusion-datasource = { version = "53.0.0" }
datafusion-physical-expr-adapter = { version = "53.0.0" }
datafusion-spark = { version = "53.0.0", features = ["core"] }
datafusion-comet-spark-expr = { path = "spark-expr" }
datafusion-comet-common = { path = "common" }
datafusion-comet-jni-bridge = { path = "jni-bridge" }
Expand All @@ -54,12 +54,12 @@ num = "0.4"
rand = "0.10"
regex = "1.12.3"
thiserror = "2"
object_store = { version = "0.12.3", features = ["gcp", "azure", "aws", "http"] }
object_store = { version = "0.13.1", features = ["gcp", "azure", "aws", "http"] }
url = "2.2"
aws-config = "1.8.14"
aws-credential-types = "1.2.13"
iceberg = { git = "https://github.com/apache/iceberg-rust", tag = "v0.9.0-rc.1" }
iceberg-storage-opendal = { git = "https://github.com/apache/iceberg-rust", tag = "v0.9.0-rc.1", features = ["opendal-all"] }
iceberg = { git = "https://github.com/apache/iceberg-rust", rev = "477a1e5" }
iceberg-storage-opendal = { git = "https://github.com/apache/iceberg-rust", rev = "477a1e5", features = ["opendal-all"] }

[profile.release]
debug = true
Expand Down
6 changes: 3 additions & 3 deletions native/core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -70,9 +70,9 @@ aws-credential-types = { workspace = true }
parking_lot = "0.12.5"
datafusion-comet-objectstore-hdfs = { path = "../hdfs", optional = true, default-features = false, features = ["hdfs"] }
reqwest = { version = "0.12", default-features = false, features = ["rustls-tls-native-roots", "http2"] }
object_store_opendal = {version = "0.55.0", optional = true}
object_store_opendal = { git = "https://github.com/apache/opendal", rev = "173feb6", package = "object_store_opendal", optional = true}
hdfs-sys = {version = "0.3", optional = true, features = ["hdfs_3_3"]}
opendal = { version ="0.55.0", optional = true, features = ["services-hdfs"] }
opendal = { git = "https://github.com/apache/opendal", rev = "173feb6", optional = true, features = ["services-hdfs"] }
iceberg = { workspace = true }
iceberg-storage-opendal = { workspace = true }
serde_json = "1.0"
Expand All @@ -91,7 +91,7 @@ jni = { version = "0.22.4", features = ["invocation"] }
lazy_static = "1.4"
assertables = "9"
hex = "0.4.3"
datafusion-functions-nested = { version = "52.4.0" }
datafusion-functions-nested = { version = "53.0.0" }

[features]
backtrace = ["datafusion/backtrace"]
Expand Down
5 changes: 5 additions & 0 deletions native/core/src/execution/jni_api.rs
Original file line number Diff line number Diff line change
Expand Up @@ -393,6 +393,11 @@ fn prepare_datafusion_session_context(

// register UDFs from datafusion-spark crate
fn register_datafusion_spark_function(session_ctx: &SessionContext) {
// Don't register SparkArrayRepeat — it returns NULL when the element is NULL
// (e.g. array_repeat(null, 3) returns NULL instead of [null, null, null]).
// Comet's Scala serde wraps the call in a CaseWhen for null count handling,
// so DataFusion's built-in ArrayRepeat is sufficient.
// TODO: file upstream issue against datafusion-spark
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is one of datafusion spark built in issues? perhaps we can just fallback in this case like we do for other spark builtin functions?

session_ctx.register_udf(ScalarUDF::new_from_impl(SparkExpm1::default()));
session_ctx.register_udf(ScalarUDF::new_from_impl(SparkSha2::default()));
session_ctx.register_udf(ScalarUDF::new_from_impl(CharFunc::default()));
Expand Down
28 changes: 18 additions & 10 deletions native/core/src/execution/memory_pools/fair_pool.rs
Original file line number Diff line number Diff line change
Expand Up @@ -103,16 +103,21 @@ impl MemoryPool for CometFairMemoryPool {
.expect("unexpected amount of unregister happened");
}

fn grow(&self, reservation: &MemoryReservation, additional: usize) {
self.try_grow(reservation, additional).unwrap();
fn grow(&self, _reservation: &MemoryReservation, additional: usize) {
self.try_grow(_reservation, additional).unwrap();
}

fn shrink(&self, reservation: &MemoryReservation, subtractive: usize) {
fn shrink(&self, _reservation: &MemoryReservation, subtractive: usize) {
if subtractive > 0 {
let mut state = self.state.lock();
let size = reservation.size();
if size < subtractive {
panic!("Failed to release {subtractive} bytes where only {size} bytes reserved")
// We don't use reservation.size() here because DataFusion 53+ decrements
// the reservation's atomic size before calling pool.shrink(), so it would
// reflect the post-shrink value rather than the pre-shrink value.
if state.used < subtractive {
panic!(
"Failed to release {subtractive} bytes where only {} bytes tracked by pool",
state.used
)
}
self.release(subtractive)
.unwrap_or_else(|_| panic!("Failed to release {subtractive} bytes"));
Expand All @@ -122,7 +127,7 @@ impl MemoryPool for CometFairMemoryPool {

fn try_grow(
&self,
reservation: &MemoryReservation,
_reservation: &MemoryReservation,
additional: usize,
) -> Result<(), DataFusionError> {
if additional > 0 {
Expand All @@ -132,10 +137,13 @@ impl MemoryPool for CometFairMemoryPool {
.pool_size
.checked_div(num)
.expect("overflow in checked_div");
let size = reservation.size();
if limit < size + additional {
// We use state.used instead of reservation.size() because DataFusion 53+
// calls pool.try_grow() before incrementing the reservation's atomic size,
// so reservation.size() would not include prior grows.
let used = state.used;
if limit < used + additional {
return resources_err!(
"Failed to acquire {additional} bytes where {size} bytes already reserved and the fair limit is {limit} bytes, {num} registered"
"Failed to acquire {additional} bytes where {used} bytes already reserved and the fair limit is {limit} bytes, {num} registered"
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit already reserved =? already used?

);
}

Expand Down
8 changes: 4 additions & 4 deletions native/core/src/execution/operators/expand.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ pub struct ExpandExec {
projections: Vec<Vec<Arc<dyn PhysicalExpr>>>,
child: Arc<dyn ExecutionPlan>,
schema: SchemaRef,
cache: PlanProperties,
cache: Arc<PlanProperties>,
}

impl ExpandExec {
Expand All @@ -52,12 +52,12 @@ impl ExpandExec {
child: Arc<dyn ExecutionPlan>,
schema: SchemaRef,
) -> Self {
let cache = PlanProperties::new(
let cache = Arc::new(PlanProperties::new(
EquivalenceProperties::new(Arc::clone(&schema)),
Partitioning::UnknownPartitioning(1),
EmissionType::Final,
Boundedness::Bounded,
);
));

Self {
projections,
Expand Down Expand Up @@ -129,7 +129,7 @@ impl ExecutionPlan for ExpandExec {
Ok(Box::pin(expand_stream))
}

fn properties(&self) -> &PlanProperties {
fn properties(&self) -> &Arc<PlanProperties> {
&self.cache
}

Expand Down
12 changes: 6 additions & 6 deletions native/core/src/execution/operators/iceberg_scan.rs
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ pub struct IcebergScanExec {
/// Output schema after projection
output_schema: SchemaRef,
/// Cached execution plan properties
plan_properties: PlanProperties,
plan_properties: Arc<PlanProperties>,
/// Catalog-specific configuration for FileIO
catalog_properties: HashMap<String, String>,
/// Pre-planned file scan tasks
Expand Down Expand Up @@ -93,13 +93,13 @@ impl IcebergScanExec {
})
}

fn compute_properties(schema: SchemaRef, num_partitions: usize) -> PlanProperties {
PlanProperties::new(
fn compute_properties(schema: SchemaRef, num_partitions: usize) -> Arc<PlanProperties> {
Arc::new(PlanProperties::new(
EquivalenceProperties::new(schema),
Partitioning::UnknownPartitioning(num_partitions),
EmissionType::Incremental,
Boundedness::Bounded,
)
))
}
}

Expand All @@ -116,7 +116,7 @@ impl ExecutionPlan for IcebergScanExec {
Arc::clone(&self.output_schema)
}

fn properties(&self) -> &PlanProperties {
fn properties(&self) -> &Arc<PlanProperties> {
&self.plan_properties
}

Expand Down Expand Up @@ -288,7 +288,7 @@ where
_ => {
let adapter = self
.adapter_factory
.create(Arc::clone(&self.schema), Arc::clone(&file_schema));
.create(Arc::clone(&self.schema), Arc::clone(&file_schema))?;
let exprs =
build_projection_expressions(&self.schema, &adapter).map_err(|e| {
DataFusionError::Execution(format!(
Expand Down
40 changes: 24 additions & 16 deletions native/core/src/execution/operators/parquet_writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,16 +23,18 @@ use std::{
fmt,
fmt::{Debug, Formatter},
fs::File,
io::Cursor,
sync::Arc,
};

#[cfg(feature = "hdfs-opendal")]
use opendal::Operator;
#[cfg(feature = "hdfs-opendal")]
use std::io::Cursor;

use crate::execution::shuffle::CompressionCodec;
use crate::parquet::parquet_support::{
create_hdfs_operator, is_hdfs_scheme, prepare_object_store_with_configs,
};
use crate::parquet::parquet_support::is_hdfs_scheme;
#[cfg(feature = "hdfs-opendal")]
use crate::parquet::parquet_support::{create_hdfs_operator, prepare_object_store_with_configs};
use arrow::datatypes::{Schema, SchemaRef};
use arrow::record_batch::RecordBatch;
use async_trait::async_trait;
Expand All @@ -45,7 +47,7 @@ use datafusion::{
metrics::{ExecutionPlanMetricsSet, MetricsSet},
stream::RecordBatchStreamAdapter,
DisplayAs, DisplayFormatType, ExecutionPlan, ExecutionPlanProperties, PlanProperties,
SendableRecordBatchStream, Statistics,
SendableRecordBatchStream,
},
};
use futures::TryStreamExt;
Expand All @@ -64,6 +66,7 @@ enum ParquetWriter {
/// Contains the arrow writer, HDFS operator, and destination path
/// an Arrow writer writes to in-memory buffer the data converted to Parquet format
/// The opendal::Writer is created lazily on first write
#[cfg(feature = "hdfs-opendal")]
Remote(
ArrowWriter<Cursor<Vec<u8>>>,
Option<opendal::Writer>,
Expand All @@ -80,6 +83,7 @@ impl ParquetWriter {
) -> std::result::Result<(), parquet::errors::ParquetError> {
match self {
ParquetWriter::LocalFile(writer) => writer.write(batch),
#[cfg(feature = "hdfs-opendal")]
ParquetWriter::Remote(
arrow_parquet_buffer_writer,
hdfs_writer_opt,
Expand Down Expand Up @@ -134,6 +138,7 @@ impl ParquetWriter {
writer.close()?;
Ok(())
}
#[cfg(feature = "hdfs-opendal")]
ParquetWriter::Remote(
arrow_parquet_buffer_writer,
mut hdfs_writer_opt,
Expand Down Expand Up @@ -208,7 +213,7 @@ pub struct ParquetWriterExec {
/// Metrics
metrics: ExecutionPlanMetricsSet,
/// Cache for plan properties
cache: PlanProperties,
cache: Arc<PlanProperties>,
}

impl ParquetWriterExec {
Expand All @@ -228,12 +233,12 @@ impl ParquetWriterExec {
// Preserve the input's partitioning so each partition writes its own file
let input_partitioning = input.output_partitioning().clone();

let cache = PlanProperties::new(
let cache = Arc::new(PlanProperties::new(
EquivalenceProperties::new(Arc::clone(&input.schema())),
input_partitioning,
EmissionType::Final,
Boundedness::Bounded,
);
));

Ok(ParquetWriterExec {
input,
Expand Down Expand Up @@ -275,7 +280,7 @@ impl ParquetWriterExec {
output_file_path: &str,
schema: SchemaRef,
props: WriterProperties,
runtime_env: Arc<datafusion::execution::runtime_env::RuntimeEnv>,
_runtime_env: Arc<datafusion::execution::runtime_env::RuntimeEnv>,
object_store_options: &HashMap<String, String>,
) -> Result<ParquetWriter> {
// Parse URL and match on storage scheme directly
Expand All @@ -284,11 +289,11 @@ impl ParquetWriterExec {
})?;

if is_hdfs_scheme(&url, object_store_options) {
// HDFS storage
#[cfg(feature = "hdfs-opendal")]
{
// Use prepare_object_store_with_configs to create and register the object store
let (_object_store_url, object_store_path) = prepare_object_store_with_configs(
runtime_env,
_runtime_env,
output_file_path.to_string(),
object_store_options,
)
Expand Down Expand Up @@ -324,6 +329,12 @@ impl ParquetWriterExec {
object_store_path.to_string(),
))
}
#[cfg(not(feature = "hdfs-opendal"))]
{
Err(DataFusionError::Execution(
"HDFS support is not enabled. Rebuild with the 'hdfs-opendal' feature.".into(),
))
}
} else if output_file_path.starts_with("file://")
|| output_file_path.starts_with("file:")
|| !output_file_path.contains("://")
Expand Down Expand Up @@ -405,11 +416,7 @@ impl ExecutionPlan for ParquetWriterExec {
Some(self.metrics.clone_inner())
}

fn statistics(&self) -> Result<Statistics> {
self.input.partition_statistics(None)
}

fn properties(&self) -> &PlanProperties {
fn properties(&self) -> &Arc<PlanProperties> {
&self.cache
}

Expand Down Expand Up @@ -576,6 +583,7 @@ mod tests {

/// Helper function to create a test RecordBatch with 1000 rows of (int, string) data
/// Example batch_id 1 -> 0..1000, 2 -> 1001..2000
#[allow(dead_code)]
fn create_test_record_batch(batch_id: i32) -> Result<RecordBatch> {
assert!(batch_id > 0, "batch_id must be greater than 0");
let num_rows = batch_id * 1000;
Expand Down
8 changes: 4 additions & 4 deletions native/core/src/execution/operators/scan.rs
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ pub struct ScanExec {
/// It is also used in unit test to mock the input data from JVM.
pub batch: Arc<Mutex<Option<InputBatch>>>,
/// Cache of expensive-to-compute plan properties
cache: PlanProperties,
cache: Arc<PlanProperties>,
/// Metrics collector
metrics: ExecutionPlanMetricsSet,
/// Baseline metrics
Expand All @@ -93,14 +93,14 @@ impl ScanExec {
// Build schema directly from data types since get_next now always unpacks dictionaries
let schema = schema_from_data_types(&data_types);

let cache = PlanProperties::new(
let cache = Arc::new(PlanProperties::new(
EquivalenceProperties::new(Arc::clone(&schema)),
// The partitioning is not important because we are not using DataFusion's
// query planner or optimizer
Partitioning::UnknownPartitioning(1),
EmissionType::Final,
Boundedness::Bounded,
);
));

Ok(Self {
exec_context_id,
Expand Down Expand Up @@ -415,7 +415,7 @@ impl ExecutionPlan for ScanExec {
)))
}

fn properties(&self) -> &PlanProperties {
fn properties(&self) -> &Arc<PlanProperties> {
&self.cache
}

Expand Down
Loading
Loading