From 47c0012841bb135f202fba7523e78e848f6557b2 Mon Sep 17 00:00:00 2001 From: Elissa Z Date: Fri, 3 Apr 2026 03:46:36 +0000 Subject: [PATCH] fix: quote column names in merge_insert join to preserve case sensitivity DataFusion's DataFrame::join() lowercases unquoted identifiers, which causes merge_insert to fail when column names contain uppercase letters (e.g. "Stock", "Date"). The join key "target_Stock" is lowercased to "target_stock" internally, but the schema retains "target_Stock", causing a schema mismatch error: No field named target_stock. Did you mean '?table?.Stock'? Fix: wrap join column names in double quotes so DataFusion treats them as quoted identifiers and preserves their original casing. This is consistent with how prefix_columns() already quotes column names when aliasing. Affects both the FullCompatible and Subschema join paths in create_full_table_joined_stream(). The indexed path (create_indexed_joined_stream) is not affected because it uses Column::new_with_schema() which does direct schema lookup. --- rust/lance/src/dataset/write/merge_insert.rs | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/rust/lance/src/dataset/write/merge_insert.rs b/rust/lance/src/dataset/write/merge_insert.rs index 290a83fb707..493336b136e 100644 --- a/rust/lance/src/dataset/write/merge_insert.rs +++ b/rust/lance/src/dataset/write/merge_insert.rs @@ -790,17 +790,21 @@ impl MergeInsertJob { let session_ctx = SessionContext::new_with_config(session_config); let schema = source.schema(); let new_data = session_ctx.read_one_shot(source)?; + // Quote column names to preserve case sensitivity — DataFusion's + // DataFrame::join() lowercases unquoted identifiers, which breaks + // merge_insert when column names contain uppercase letters (e.g. "Stock"). let join_cols = self .params - .on // columns to join on + .on .iter() - .map(|c| c.as_str()) - .collect::>(); // vector of strings of col names to join + .map(|c| format!("\"{}\"", c)) + .collect::>(); + let join_cols = join_cols.iter().map(|s| s.as_str()).collect::>(); let target_cols = self .params .on .iter() - .map(|c| format!("target_{}", c)) + .map(|c| format!("\"target_{}\"", c)) .collect::>(); let target_cols = target_cols.iter().map(|s| s.as_str()).collect::>();