From ac1ec47868d34cd269058ac810b8c485ea0b2e6e Mon Sep 17 00:00:00 2001 From: Matt Butrovich Date: Wed, 10 Dec 2025 17:35:16 -0500 Subject: [PATCH 1/2] Fix null identity-partitioned cols after #1824. --- crates/iceberg/src/arrow/record_batch_transformer.rs | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/crates/iceberg/src/arrow/record_batch_transformer.rs b/crates/iceberg/src/arrow/record_batch_transformer.rs index c4782464c1..6d99c2d128 100644 --- a/crates/iceberg/src/arrow/record_batch_transformer.rs +++ b/crates/iceberg/src/arrow/record_batch_transformer.rs @@ -83,14 +83,10 @@ fn constants_map( // Handle both None (null) and Some(Literal::Primitive) cases match &partition_data[pos] { None => { - // TODO (https://github.com/apache/iceberg-rust/issues/1914): Add support for null datum values. - return Err(Error::new( - ErrorKind::Unexpected, - format!( - "Partition field {} has null value for identity transform", - field.source_id - ), - )); + // Skip null partition values - they will be resolved as null per Iceberg spec rule #4. + // When a partition value is null, we don't add it to the constants map, + // allowing downstream column resolution to handle it correctly. + continue; } Some(Literal::Primitive(value)) => { // Create a Datum from the primitive type and value From edf549cc60afbde443a206852ae7f3d0e8786c13 Mon Sep 17 00:00:00 2001 From: Matt Butrovich Date: Wed, 10 Dec 2025 18:31:07 -0500 Subject: [PATCH 2/2] add test that fails on main --- .../src/arrow/record_batch_transformer.rs | 69 +++++++++++++++++++ 1 file changed, 69 insertions(+) diff --git a/crates/iceberg/src/arrow/record_batch_transformer.rs b/crates/iceberg/src/arrow/record_batch_transformer.rs index 6d99c2d128..439358435c 100644 --- a/crates/iceberg/src/arrow/record_batch_transformer.rs +++ b/crates/iceberg/src/arrow/record_batch_transformer.rs @@ -1606,4 +1606,73 @@ mod test { assert_eq!(get_string_value(result.column(4).as_ref(), 0), ""); assert_eq!(get_string_value(result.column(4).as_ref(), 1), ""); } + + /// Test handling of null values in identity-partitioned columns. + /// + /// Reproduces TestPartitionValues.testNullPartitionValue() from iceberg-java, which + /// writes records where the partition column has null values. Before the fix in #1922, + /// this would error with "Partition field X has null value for identity transform". + #[test] + fn null_identity_partition_value() { + use crate::spec::{Struct, Transform}; + + let schema = Arc::new( + Schema::builder() + .with_schema_id(0) + .with_fields(vec![ + NestedField::optional(1, "id", Type::Primitive(PrimitiveType::Int)).into(), + NestedField::optional(2, "data", Type::Primitive(PrimitiveType::String)).into(), + ]) + .build() + .unwrap(), + ); + + let partition_spec = Arc::new( + crate::spec::PartitionSpec::builder(schema.clone()) + .with_spec_id(0) + .add_partition_field("data", "data", Transform::Identity) + .unwrap() + .build() + .unwrap(), + ); + + // Partition has null value for the data column + let partition_data = Struct::from_iter(vec![None]); + + let file_schema = Arc::new(ArrowSchema::new(vec![simple_field( + "id", + DataType::Int32, + true, + "1", + )])); + + let projected_field_ids = [1, 2]; + + let mut transformer = RecordBatchTransformerBuilder::new(schema, &projected_field_ids) + .with_partition(partition_spec, partition_data) + .expect("Should handle null partition values") + .build(); + + let file_batch = + RecordBatch::try_new(file_schema, vec![Arc::new(Int32Array::from(vec![1, 2, 3]))]) + .unwrap(); + + let result = transformer.process_record_batch(file_batch).unwrap(); + + assert_eq!(result.num_columns(), 2); + assert_eq!(result.num_rows(), 3); + + let id_col = result + .column(0) + .as_any() + .downcast_ref::() + .unwrap(); + assert_eq!(id_col.values(), &[1, 2, 3]); + + // Partition column with null value should produce nulls + let data_col = result.column(1); + assert!(data_col.is_null(0)); + assert!(data_col.is_null(1)); + assert!(data_col.is_null(2)); + } }