From 712a2eb7f82f24a74178321e6e12715061a31bc0 Mon Sep 17 00:00:00 2001 From: yanghua Date: Sun, 28 Sep 2025 19:51:48 +0800 Subject: [PATCH 01/17] refactor: rename Update operation fields --- java/lance-jni/src/transaction.rs | 32 +++---- .../com/lancedb/lance/operation/Update.java | 44 +++++----- python/python/lance/dataset.py | 19 ++-- python/src/transaction.rs | 24 ++--- rust/lance/src/dataset/transaction.rs | 88 ++++++++++++------- rust/lance/src/dataset/write/commit.rs | 4 +- rust/lance/src/dataset/write/merge_insert.rs | 10 +-- .../dataset/write/merge_insert/exec/write.rs | 4 +- rust/lance/src/dataset/write/update.rs | 13 ++- rust/lance/src/io/commit/conflict_resolver.rs | 36 ++++---- 10 files changed, 150 insertions(+), 124 deletions(-) diff --git a/java/lance-jni/src/transaction.rs b/java/lance-jni/src/transaction.rs index f9ce719f369..731b2a7ced3 100644 --- a/java/lance-jni/src/transaction.rs +++ b/java/lance-jni/src/transaction.rs @@ -495,9 +495,9 @@ fn convert_to_java_operation_inner<'local>( removed_fragment_ids, updated_fragments, new_fragments, - fields_modified, + bitmap_prune_field_ids, mem_wal_to_merge: _, - fields_for_preserving_frag_bitmap, + bitmap_preserve_exclude_field_ids, update_mode, } => { let removed_ids: Vec> = removed_fragment_ids @@ -507,9 +507,9 @@ fn convert_to_java_operation_inner<'local>( let removed_fragment_ids_obj = export_vec(env, &removed_ids)?; let updated_fragments_obj = export_vec(env, &updated_fragments)?; let new_fragments_obj = export_vec(env, &new_fragments)?; - let fields_modified = JLance(fields_modified.clone()).into_java(env)?; - let fields_for_preserving_frag_bitmap = - JLance(fields_for_preserving_frag_bitmap.clone()).into_java(env)?; + let bitmap_prune_field_ids = JLance(bitmap_prune_field_ids.clone()).into_java(env)?; + let bitmap_preserve_exclude_field_ids = + JLance(bitmap_preserve_exclude_field_ids.clone()).into_java(env)?; let update_mode = match update_mode { Some(update_mode) => update_mode.into_java(env), None => Ok(JObject::null()), @@ -529,8 +529,8 @@ fn convert_to_java_operation_inner<'local>( JValue::Object(&removed_fragment_ids_obj), JValue::Object(&updated_fragments_obj), JValue::Object(&new_fragments_obj), - JValueGen::Object(&fields_modified), - JValueGen::Object(&fields_for_preserving_frag_bitmap), + JValueGen::Object(&bitmap_prune_field_ids), + JValueGen::Object(&bitmap_preserve_exclude_field_ids), JValue::Object(&update_mode_optional), ], )?) @@ -939,16 +939,16 @@ fn convert_to_rust_operation( fragment.extract_object(env) })?; - let fields_modified = env - .call_method(java_operation, "fieldsModified", "()[J", &[])? + let bitmap_prune_field_ids = env + .call_method(java_operation, "bitmapPruneFieldIds", "()[J", &[])? .l()?; - let fields_modified = JLongArray::from(fields_modified).extract_object(env)?; + let bitmap_prune_field_ids = JLongArray::from(bitmap_prune_field_ids).extract_object(env)?; - let fields_for_preserving_frag_bitmap = env - .call_method(java_operation, "fieldsForPreservingFragBitmap", "()[J", &[])? + let bitmap_preserve_exclude_field_ids = env + .call_method(java_operation, "bitmapPreserveExcludeFieldIds", "()[J", &[])? .l()?; - let fields_for_preserving_frag_bitmap = - JLongArray::from(fields_for_preserving_frag_bitmap).extract_object(env)?; + let bitmap_preserve_exclude_field_ids = + JLongArray::from(bitmap_preserve_exclude_field_ids).extract_object(env)?; let update_mode: Option = env.get_optional_from_method(java_operation, "updateMode", |env, update_mode| { @@ -959,9 +959,9 @@ fn convert_to_rust_operation( removed_fragment_ids, updated_fragments, new_fragments, - fields_modified, + bitmap_prune_field_ids, mem_wal_to_merge: None, - fields_for_preserving_frag_bitmap, + bitmap_preserve_exclude_field_ids, update_mode, } } diff --git a/java/src/main/java/com/lancedb/lance/operation/Update.java b/java/src/main/java/com/lancedb/lance/operation/Update.java index d0d7c3871d1..5f2995aad7a 100644 --- a/java/src/main/java/com/lancedb/lance/operation/Update.java +++ b/java/src/main/java/com/lancedb/lance/operation/Update.java @@ -27,22 +27,22 @@ public class Update implements Operation { private final List removedFragmentIds; private final List updatedFragments; private final List newFragments; - private final long[] fieldsModified; - private final long[] fieldsForPreservingFragBitmap; + private final long[] bitmapPruneFieldIds; + private final long[] bitmapPreserveExcludeFieldIds; private final Optional updateMode; private Update( List removedFragmentIds, List updatedFragments, List newFragments, - long[] fieldsModified, - long[] fieldsForPreservingFragBitmap, + long[] bitmapPruneFieldIds, + long[] bitmapPreserveExcludeFieldIds, Optional updateMode) { this.removedFragmentIds = removedFragmentIds; this.updatedFragments = updatedFragments; this.newFragments = newFragments; - this.fieldsModified = fieldsModified; - this.fieldsForPreservingFragBitmap = fieldsForPreservingFragBitmap; + this.bitmapPruneFieldIds = bitmapPruneFieldIds; + this.bitmapPreserveExcludeFieldIds = bitmapPreserveExcludeFieldIds; this.updateMode = updateMode; } @@ -62,12 +62,12 @@ public List newFragments() { return newFragments; } - public long[] fieldsModified() { - return fieldsModified; + public long[] bitmapPruneFieldIds() { + return bitmapPruneFieldIds; } - public long[] fieldsForPreservingFragBitmap() { - return fieldsForPreservingFragBitmap; + public long[] bitmapPreserveExcludeFieldIds() { + return bitmapPreserveExcludeFieldIds; } public Optional updateMode() { @@ -84,8 +84,8 @@ public String toString() { .add("removedFragmentIds", removedFragmentIds) .add("updatedFragments", updatedFragments) .add("newFragments", newFragments) - .add("fieldsModified", fieldsModified) - .add("fieldsForPreservingFragBitmap", fieldsForPreservingFragBitmap) + .add("bitmapPruneFieldIds", bitmapPruneFieldIds) + .add("bitmapPreserveExcludeFieldIds", bitmapPreserveExcludeFieldIds) .add("updateMode", updateMode) .toString(); } @@ -98,8 +98,8 @@ public boolean equals(Object o) { return Objects.equals(removedFragmentIds, that.removedFragmentIds) && Objects.equals(updatedFragments, that.updatedFragments) && Objects.equals(newFragments, that.newFragments) - && Arrays.equals(fieldsModified, that.fieldsModified) - && Arrays.equals(fieldsForPreservingFragBitmap, that.fieldsForPreservingFragBitmap) + && Arrays.equals(bitmapPruneFieldIds, that.bitmapPruneFieldIds) + && Arrays.equals(bitmapPreserveExcludeFieldIds, that.bitmapPreserveExcludeFieldIds) && Objects.equals(updateMode, that.updateMode); } @@ -112,8 +112,8 @@ public static class Builder { private List removedFragmentIds = Collections.emptyList(); private List updatedFragments = Collections.emptyList(); private List newFragments = Collections.emptyList(); - private long[] fieldsModified = new long[0]; - private long[] fieldsForPreservingFragBitmap = new long[0]; + private long[] bitmapPruneFieldIds = new long[0]; + private long[] bitmapPreserveExcludeFieldIds = new long[0]; private Optional updateMode = Optional.empty(); private Builder() {} @@ -133,13 +133,13 @@ public Builder newFragments(List newFragments) { return this; } - public Builder fieldsModified(long[] fieldsModified) { - this.fieldsModified = fieldsModified; + public Builder bitmapPruneFieldIds(long[] bitmapPruneFieldIds) { + this.bitmapPruneFieldIds = bitmapPruneFieldIds; return this; } - public Builder fieldsForPreservingFragBitmap(long[] fieldsForPreservingFragBitmap) { - this.fieldsForPreservingFragBitmap = fieldsForPreservingFragBitmap; + public Builder bitmapPreserveExcludeFieldIds(long[] bitmapPreserveExcludeFieldIds) { + this.bitmapPreserveExcludeFieldIds = bitmapPreserveExcludeFieldIds; return this; } @@ -153,8 +153,8 @@ public Update build() { removedFragmentIds, updatedFragments, newFragments, - fieldsModified, - fieldsForPreservingFragBitmap, + bitmapPruneFieldIds, + bitmapPreserveExcludeFieldIds, updateMode); } } diff --git a/python/python/lance/dataset.py b/python/python/lance/dataset.py index 5b6a4064d29..f08a3f5ecab 100644 --- a/python/python/lance/dataset.py +++ b/python/python/lance/dataset.py @@ -3923,22 +3923,21 @@ class Update(BaseOperation): The fragments that have been updated with new deletion vectors. new_fragments: list[FragmentMetadata] The fragments that contain the new rows. - fields_modified: list[int] - If any fields are modified in updated_fragments, then they must be - listed here so those fragments can be removed from indices covering - those fields. - fields_for_preserving_frag_bitmap: list[int] - The fields that used to judge whether to preserve the new frag's id into - the frag bitmap of the specified indices. + bitmap_prune_field_ids: list[int] + Field IDs that drive index fragment bitmap pruning. If any fields are modified in updated_fragments, + then they must be listed here so those fragments can be removed from indices that + cover any of these fields. + bitmap_preserve_exclude_field_ids: list[int] + Field IDs used to decide whether to preserve new fragment IDs in an index's fragment bitmap. + Indices that do not cover these fields may preserve the new fragment IDs when applicable. """ - removed_fragment_ids: List[int] = dataclasses.field(default_factory=list) updated_fragments: List[FragmentMetadata] = dataclasses.field( default_factory=list ) new_fragments: List[FragmentMetadata] = dataclasses.field(default_factory=list) - fields_modified: List[int] = dataclasses.field(default_factory=list) - fields_for_preserving_frag_bitmap: List[int] = dataclasses.field( + bitmap_prune_field_ids: List[int] = dataclasses.field(default_factory=list) + bitmap_preserve_exclude_field_ids: List[int] = dataclasses.field( default_factory=list ) update_mode: str = "" diff --git a/python/src/transaction.rs b/python/src/transaction.rs index 0aa19669986..aed69e279fe 100644 --- a/python/src/transaction.rs +++ b/python/src/transaction.rs @@ -211,10 +211,10 @@ impl FromPyObject<'_> for PyLance { let new_fragments = extract_vec(&ob.getattr("new_fragments")?)?; - let fields_modified = ob.getattr("fields_modified")?.extract()?; + let bitmap_prune_field_ids = ob.getattr("bitmap_prune_field_ids")?.extract()?; - let fields_for_preserving_frag_bitmap = ob - .getattr("fields_for_preserving_frag_bitmap")? + let bitmap_preserve_exclude_field_ids = ob + .getattr("bitmap_preserve_exclude_field_ids")? .extract() .unwrap_or_default(); @@ -228,9 +228,9 @@ impl FromPyObject<'_> for PyLance { removed_fragment_ids, updated_fragments, new_fragments, - fields_modified, + bitmap_prune_field_ids, mem_wal_to_merge: None, - fields_for_preserving_frag_bitmap, + bitmap_preserve_exclude_field_ids, update_mode, }; Ok(Self(op)) @@ -367,17 +367,17 @@ impl<'py> IntoPyObject<'py> for PyLance<&Operation> { removed_fragment_ids, updated_fragments, new_fragments, - fields_modified, - fields_for_preserving_frag_bitmap, + bitmap_prune_field_ids, + bitmap_preserve_exclude_field_ids, update_mode, .. } => { let removed_fragment_ids = removed_fragment_ids.into_pyobject(py)?; let updated_fragments = export_vec(py, updated_fragments.as_slice())?; let new_fragments = export_vec(py, new_fragments.as_slice())?; - let fields_modified = fields_modified.into_pyobject(py)?; - let fields_for_preserving_frag_bitmap = - fields_for_preserving_frag_bitmap.into_pyobject(py)?; + let bitmap_prune_field_ids = bitmap_prune_field_ids.into_pyobject(py)?; + let bitmap_preserve_exclude_field_ids = + bitmap_preserve_exclude_field_ids.into_pyobject(py)?; let update_mode = match update_mode { Some(mode) => match mode { lance::dataset::transaction::UpdateMode::RewriteRows => "rewrite_rows", @@ -394,8 +394,8 @@ impl<'py> IntoPyObject<'py> for PyLance<&Operation> { removed_fragment_ids, updated_fragments, new_fragments, - fields_modified, - fields_for_preserving_frag_bitmap, + bitmap_prune_field_ids, + bitmap_preserve_exclude_field_ids, update_mode, )) } diff --git a/rust/lance/src/dataset/transaction.rs b/rust/lance/src/dataset/transaction.rs index f22c7426b19..ba0d47896cc 100644 --- a/rust/lance/src/dataset/transaction.rs +++ b/rust/lance/src/dataset/transaction.rs @@ -244,9 +244,9 @@ pub enum Operation { /// /// A horizontal update adds new columns. In this case, the updated fragments /// may have fields removed or added. It is even possible for a field to be tombstoned - /// and then added back in the same update. (which is a field modification). If any - /// fields are modified in this way then they need to be added to the fields_modified list. - /// This way we can correctly update the indices. + /// and then added back in the same update. (which is a field modification). + /// Any fields modified in this way must be added to the bitmap_prune_field_ids list so indices that cover those fields can prune fragment bitmaps accordingly. + /// This ensures indices remain consistent after updates. /// This is what is used by a merge insert that does not match the whole schema. Update { /// Ids of fragments that have been moved @@ -255,13 +255,14 @@ pub enum Operation { updated_fragments: Vec, /// Fragments that have been added new_fragments: Vec, - /// The fields that have been modified - fields_modified: Vec, + /// Field IDs that drive index fragment bitmap pruning + bitmap_prune_field_ids: Vec, /// The MemWAL (pre-image) that should be marked as merged after this transaction mem_wal_to_merge: Option, - /// The fields that used to judge whether to preserve the new frag's id into - /// the frag bitmap of the specified indices. - fields_for_preserving_frag_bitmap: Vec, + /// Field IDs used to decide whether to preserve new fragment IDs in the + /// fragment bitmap of specified indices. Indices that do not cover these + /// fields may preserve the new fragment IDs when applicable. + bitmap_preserve_exclude_field_ids: Vec, /// The mode of update update_mode: Option, }, @@ -312,6 +313,33 @@ pub enum UpdateMode { RewriteColumns, } +impl Operation { + /// Returns the field IDs that drive index fragment bitmap pruning for Update operations. + /// For non-Update operations, returns an empty slice. + pub fn prune_fields(&self) -> &[u32] { + match self { + Operation::Update { + bitmap_prune_field_ids, + .. + } => bitmap_prune_field_ids.as_slice(), + _ => &[], + } + } + + /// Returns the field IDs used to decide whether to preserve new fragment IDs + /// in an index's fragment bitmap for Update operations. For non-Update operations, + /// returns an empty slice. + pub fn bitmap_preserve_exclude_fields(&self) -> &[u32] { + match self { + Operation::Update { + bitmap_preserve_exclude_field_ids, + .. + } => bitmap_preserve_exclude_field_ids.as_slice(), + _ => &[], + } + } +} + impl std::fmt::Display for Operation { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { @@ -449,29 +477,29 @@ impl PartialEq for Operation { removed_fragment_ids: a_removed, updated_fragments: a_updated, new_fragments: a_new, - fields_modified: a_fields, + bitmap_prune_field_ids: a_prune_fields, mem_wal_to_merge: a_mem_wal_to_merge, - fields_for_preserving_frag_bitmap: a_fields_for_preserving_frag_bitmap, + bitmap_preserve_exclude_field_ids: a_bitmap_preserve_exclude_fields, update_mode: a_update_mode, }, Self::Update { removed_fragment_ids: b_removed, updated_fragments: b_updated, new_fragments: b_new, - fields_modified: b_fields, + bitmap_prune_field_ids: b_prune_fields, mem_wal_to_merge: b_mem_wal_to_merge, - fields_for_preserving_frag_bitmap: b_fields_for_preserving_frag_bitmap, + bitmap_preserve_exclude_field_ids: b_bitmap_preserve_exclude_fields, update_mode: b_update_mode, }, ) => { compare_vec(a_removed, b_removed) && compare_vec(a_updated, b_updated) && compare_vec(a_new, b_new) - && compare_vec(a_fields, b_fields) + && compare_vec(a_prune_fields, b_prune_fields) && a_mem_wal_to_merge == b_mem_wal_to_merge && compare_vec( - a_fields_for_preserving_frag_bitmap, - b_fields_for_preserving_frag_bitmap, + a_bitmap_preserve_exclude_fields, + b_bitmap_preserve_exclude_fields, ) && a_update_mode == b_update_mode } @@ -1722,9 +1750,9 @@ impl Transaction { removed_fragment_ids, updated_fragments, new_fragments, - fields_modified, + bitmap_prune_field_ids, mem_wal_to_merge, - fields_for_preserving_frag_bitmap, + bitmap_preserve_exclude_field_ids, update_mode, } => { // Extract existing fragments once for reuse @@ -1759,7 +1787,7 @@ impl Transaction { Self::prune_updated_fields_from_indices( &mut final_indices, updated_fragments, - fields_modified, + bitmap_prune_field_ids, ); let mut new_fragments = @@ -1918,7 +1946,7 @@ impl Transaction { &mut final_indices, &pure_updated_frag_ids, &original_fragment_ids, - fields_for_preserving_frag_bitmap, + bitmap_preserve_exclude_field_ids, ); } @@ -2323,13 +2351,13 @@ impl Transaction { indices: &mut [IndexMetadata], pure_update_frag_ids: &[u64], original_fragment_ids: &[u64], - fields_for_preserving_frag_bitmap: &[u32], + bitmap_preserve_exclude_field_ids: &[u32], ) { if pure_update_frag_ids.is_empty() { return; } - let value_updated_field_set = fields_for_preserving_frag_bitmap + let value_updated_field_set = bitmap_preserve_exclude_field_ids .iter() .collect::>(); @@ -2363,15 +2391,15 @@ impl Transaction { fn prune_updated_fields_from_indices( indices: &mut [IndexMetadata], updated_fragments: &[Fragment], - fields_modified: &[u32], + bitmap_prune_field_ids: &[u32], ) { - if fields_modified.is_empty() { + if bitmap_prune_field_ids.is_empty() { return; } // If we modified any fields in the fragments then we need to remove those fragments // from the index if the index covers one of those modified fields. - let fields_modified_set = fields_modified.iter().collect::>(); + let fields_modified_set = bitmap_prune_field_ids.iter().collect::>(); for index in indices.iter_mut() { if index .fields @@ -2911,9 +2939,9 @@ impl TryFrom for Transaction { .into_iter() .map(Fragment::try_from) .collect::>>()?, - fields_modified, + bitmap_prune_field_ids: fields_modified, mem_wal_to_merge: mem_wal_to_merge.map(|m| MemWal::try_from(m).unwrap()), - fields_for_preserving_frag_bitmap, + bitmap_preserve_exclude_field_ids: fields_for_preserving_frag_bitmap, update_mode: match update_mode { 0 => Some(UpdateMode::RewriteRows), 1 => Some(UpdateMode::RewriteColumns), @@ -3266,9 +3294,9 @@ impl From<&Transaction> for pb::Transaction { removed_fragment_ids, updated_fragments, new_fragments, - fields_modified, + bitmap_prune_field_ids, mem_wal_to_merge, - fields_for_preserving_frag_bitmap, + bitmap_preserve_exclude_field_ids, update_mode, } => pb::transaction::Operation::Update(pb::transaction::Update { removed_fragment_ids: removed_fragment_ids.clone(), @@ -3277,9 +3305,9 @@ impl From<&Transaction> for pb::Transaction { .map(pb::DataFragment::from) .collect(), new_fragments: new_fragments.iter().map(pb::DataFragment::from).collect(), - fields_modified: fields_modified.clone(), + fields_modified: bitmap_prune_field_ids.clone(), mem_wal_to_merge: mem_wal_to_merge.as_ref().map(|m| m.into()), - fields_for_preserving_frag_bitmap: fields_for_preserving_frag_bitmap.clone(), + fields_for_preserving_frag_bitmap: bitmap_preserve_exclude_field_ids.clone(), update_mode: update_mode .as_ref() .map(|mode| match mode { diff --git a/rust/lance/src/dataset/write/commit.rs b/rust/lance/src/dataset/write/commit.rs index 15c1e0e8e31..913d833a497 100644 --- a/rust/lance/src/dataset/write/commit.rs +++ b/rust/lance/src/dataset/write/commit.rs @@ -788,9 +788,9 @@ mod tests { updated_fragments: vec![], new_fragments: vec![], removed_fragment_ids: vec![], - fields_modified: vec![], + bitmap_prune_field_ids: vec![], mem_wal_to_merge: None, - fields_for_preserving_frag_bitmap: vec![], + bitmap_preserve_exclude_field_ids: vec![], update_mode: None, }, read_version: 1, diff --git a/rust/lance/src/dataset/write/merge_insert.rs b/rust/lance/src/dataset/write/merge_insert.rs index 340e60cdff9..f58b5a36112 100644 --- a/rust/lance/src/dataset/write/merge_insert.rs +++ b/rust/lance/src/dataset/write/merge_insert.rs @@ -1467,7 +1467,7 @@ impl MergeInsertJob { // We will have a different commit path here too, as we are modifying // fragments rather than writing new ones - let (updated_fragments, new_fragments, fields_modified) = Self::update_fragments( + let (updated_fragments, new_fragments, bitmap_prune_field_ids) = Self::update_fragments( self.dataset.clone(), Box::pin(stream), self.dataset.manifest.version + 1, @@ -1478,9 +1478,9 @@ impl MergeInsertJob { removed_fragment_ids: Vec::new(), updated_fragments, new_fragments, - fields_modified, + bitmap_prune_field_ids, mem_wal_to_merge: self.params.mem_wal_to_merge, - fields_for_preserving_frag_bitmap: vec![], // in-place update do not affect preserving frag bitmap + bitmap_preserve_exclude_field_ids: vec![], // in-place update does not affect preserving fragment bitmap update_mode: Some(RewriteColumns), }; // We have rewritten the fragments, not just the deletion files, so @@ -1551,9 +1551,9 @@ impl MergeInsertJob { new_fragments, // On this path we only make deletions against updated_fragments and will not // modify any field values. - fields_modified: vec![], + bitmap_prune_field_ids: vec![], mem_wal_to_merge: self.params.mem_wal_to_merge, - fields_for_preserving_frag_bitmap: full_schema + bitmap_preserve_exclude_field_ids: full_schema .fields .iter() .map(|f| f.id as u32) diff --git a/rust/lance/src/dataset/write/merge_insert/exec/write.rs b/rust/lance/src/dataset/write/merge_insert/exec/write.rs index 0df589ce71a..5621f38cbb1 100644 --- a/rust/lance/src/dataset/write/merge_insert/exec/write.rs +++ b/rust/lance/src/dataset/write/merge_insert/exec/write.rs @@ -882,9 +882,9 @@ impl ExecutionPlan for FullSchemaMergeInsertExec { removed_fragment_ids, updated_fragments, new_fragments, - fields_modified: vec![], // No fields are modified in schema for upsert + bitmap_prune_field_ids: vec![], // No schema fields are modified for upsert mem_wal_to_merge, - fields_for_preserving_frag_bitmap: dataset + bitmap_preserve_exclude_field_ids: dataset .schema() .fields .iter() diff --git a/rust/lance/src/dataset/write/update.rs b/rust/lance/src/dataset/write/update.rs index 5d40570ac91..e4c8bb4ec7a 100644 --- a/rust/lance/src/dataset/write/update.rs +++ b/rust/lance/src/dataset/write/update.rs @@ -393,10 +393,10 @@ impl UpdateJob { dataset: Arc, update_data: UpdateData, ) -> Result { - let mut fields_for_preserving_frag_bitmap = Vec::new(); + let mut bitmap_preserve_exclude_field_ids = Vec::new(); for column_name in self.updates.keys() { if let Ok(field_id) = dataset.schema().field_id(column_name) { - fields_for_preserving_frag_bitmap.push(field_id as u32); + bitmap_preserve_exclude_field_ids.push(field_id as u32); } } @@ -405,12 +405,11 @@ impl UpdateJob { removed_fragment_ids: update_data.removed_fragment_ids, updated_fragments: update_data.old_fragments, new_fragments: update_data.new_fragments, - // In "rewrite rows" mode, the rows that are updated in the fragment - // are moved(deleted and appended). - // so we do not need to handle the frag bitmap of the index about it. - fields_modified: vec![], + // In "rewrite rows" mode, rows updated in the fragment are moved (deleted and appended). + // Therefore we do not need to prune index fragment bitmaps based on updated values here. + bitmap_prune_field_ids: vec![], mem_wal_to_merge: None, - fields_for_preserving_frag_bitmap, + bitmap_preserve_exclude_field_ids, update_mode: Some(RewriteRows), }; diff --git a/rust/lance/src/io/commit/conflict_resolver.rs b/rust/lance/src/io/commit/conflict_resolver.rs index 84110c69249..12e8cb3e0e9 100644 --- a/rust/lance/src/io/commit/conflict_resolver.rs +++ b/rust/lance/src/io/commit/conflict_resolver.rs @@ -1757,9 +1757,9 @@ mod tests { updated_fragments: vec![Fragment::new(0)], removed_fragment_ids: vec![], new_fragments: vec![], - fields_modified: vec![], + bitmap_prune_field_ids: vec![], mem_wal_to_merge: None, - fields_for_preserving_frag_bitmap: vec![], + bitmap_preserve_exclude_field_ids: vec![], update_mode: None, }; let transaction = Transaction::new_from_version(1, operation); @@ -1768,9 +1768,9 @@ mod tests { updated_fragments: vec![Fragment::new(1)], removed_fragment_ids: vec![2], new_fragments: vec![], - fields_modified: vec![], + bitmap_prune_field_ids: vec![], mem_wal_to_merge: None, - fields_for_preserving_frag_bitmap: vec![], + bitmap_preserve_exclude_field_ids: vec![], update_mode: None, }, Operation::Delete { @@ -1782,9 +1782,9 @@ mod tests { removed_fragment_ids: vec![], updated_fragments: vec![Fragment::new(4)], new_fragments: vec![], - fields_modified: vec![], + bitmap_prune_field_ids: vec![], mem_wal_to_merge: None, - fields_for_preserving_frag_bitmap: vec![], + bitmap_preserve_exclude_field_ids: vec![], update_mode: None, }, ]; @@ -1883,9 +1883,9 @@ mod tests { updated_fragments: vec![apply_deletion(&[0], &mut fragment, &dataset).await], removed_fragment_ids: vec![], new_fragments: vec![sample_file.clone()], - fields_modified: vec![], + bitmap_prune_field_ids: vec![], mem_wal_to_merge: None, - fields_for_preserving_frag_bitmap: vec![], + bitmap_preserve_exclude_field_ids: vec![], update_mode: None, }, Operation::Delete { @@ -1897,9 +1897,9 @@ mod tests { updated_fragments: vec![apply_deletion(&[2], &mut fragment, &dataset).await], removed_fragment_ids: vec![], new_fragments: vec![sample_file], - fields_modified: vec![], + bitmap_prune_field_ids: vec![], mem_wal_to_merge: None, - fields_for_preserving_frag_bitmap: vec![], + bitmap_preserve_exclude_field_ids: vec![], update_mode: None, }, ]; @@ -2018,9 +2018,9 @@ mod tests { updated_fragments: vec![], removed_fragment_ids: vec![0], new_fragments: vec![sample_file.clone()], - fields_modified: vec![], + bitmap_prune_field_ids: vec![], mem_wal_to_merge: None, - fields_for_preserving_frag_bitmap: vec![], + bitmap_preserve_exclude_field_ids: vec![], update_mode: None, }, ), @@ -2030,9 +2030,9 @@ mod tests { updated_fragments: vec![apply_deletion(&[0], &mut fragment, &dataset).await], removed_fragment_ids: vec![], new_fragments: vec![sample_file.clone()], - fields_modified: vec![], + bitmap_prune_field_ids: vec![], mem_wal_to_merge: None, - fields_for_preserving_frag_bitmap: vec![], + bitmap_preserve_exclude_field_ids: vec![], update_mode: None, }, ), @@ -2188,9 +2188,9 @@ mod tests { removed_fragment_ids: vec![1], updated_fragments: vec![fragment0.clone()], new_fragments: vec![fragment2.clone()], - fields_modified: vec![0], + bitmap_prune_field_ids: vec![0], mem_wal_to_merge: None, - fields_for_preserving_frag_bitmap: vec![], + bitmap_preserve_exclude_field_ids: vec![], update_mode: None, }, create_update_config_for_test( @@ -2383,9 +2383,9 @@ mod tests { updated_fragments: vec![fragment0], removed_fragment_ids: vec![], new_fragments: vec![fragment2], - fields_modified: vec![0], + bitmap_prune_field_ids: vec![0], mem_wal_to_merge: None, - fields_for_preserving_frag_bitmap: vec![], + bitmap_preserve_exclude_field_ids: vec![], update_mode: None, }, [ From e2ca03f27fc46b0d287e94acf7b6678d411f2574 Mon Sep 17 00:00:00 2001 From: yanghua Date: Sun, 28 Sep 2025 23:03:56 +0800 Subject: [PATCH 02/17] fix format issue --- python/python/lance/dataset.py | 13 ++++++++----- rust/lance/src/dataset/transaction.rs | 8 ++++---- 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/python/python/lance/dataset.py b/python/python/lance/dataset.py index f08a3f5ecab..fcda34874eb 100644 --- a/python/python/lance/dataset.py +++ b/python/python/lance/dataset.py @@ -3924,12 +3924,15 @@ class Update(BaseOperation): new_fragments: list[FragmentMetadata] The fragments that contain the new rows. bitmap_prune_field_ids: list[int] - Field IDs that drive index fragment bitmap pruning. If any fields are modified in updated_fragments, - then they must be listed here so those fragments can be removed from indices that - cover any of these fields. + Field IDs that drive index fragment bitmap pruning. If any fields are + modified in updated_fragments, + then they must be listed here so those fragments can be removed from + indices that cover any of these fields. bitmap_preserve_exclude_field_ids: list[int] - Field IDs used to decide whether to preserve new fragment IDs in an index's fragment bitmap. - Indices that do not cover these fields may preserve the new fragment IDs when applicable. + Field IDs used to decide whether to preserve new fragment IDs in an index's + fragment bitmap. + Indices that do not cover these fields may preserve the new fragment IDs + when applicable. """ removed_fragment_ids: List[int] = dataclasses.field(default_factory=list) updated_fragments: List[FragmentMetadata] = dataclasses.field( diff --git a/rust/lance/src/dataset/transaction.rs b/rust/lance/src/dataset/transaction.rs index ba0d47896cc..a6811cc2aa8 100644 --- a/rust/lance/src/dataset/transaction.rs +++ b/rust/lance/src/dataset/transaction.rs @@ -245,8 +245,8 @@ pub enum Operation { /// A horizontal update adds new columns. In this case, the updated fragments /// may have fields removed or added. It is even possible for a field to be tombstoned /// and then added back in the same update. (which is a field modification). - /// Any fields modified in this way must be added to the bitmap_prune_field_ids list so indices that cover those fields can prune fragment bitmaps accordingly. - /// This ensures indices remain consistent after updates. + /// Any fields modified in this way must be added to the bitmap_prune_field_ids list so indices that cover those fields can prune fragment bitmaps accordingly. + /// This ensures indices remain consistent after updates. /// This is what is used by a merge insert that does not match the whole schema. Update { /// Ids of fragments that have been moved @@ -318,7 +318,7 @@ impl Operation { /// For non-Update operations, returns an empty slice. pub fn prune_fields(&self) -> &[u32] { match self { - Operation::Update { + Self::Update { bitmap_prune_field_ids, .. } => bitmap_prune_field_ids.as_slice(), @@ -331,7 +331,7 @@ impl Operation { /// returns an empty slice. pub fn bitmap_preserve_exclude_fields(&self) -> &[u32] { match self { - Operation::Update { + Self::Update { bitmap_preserve_exclude_field_ids, .. } => bitmap_preserve_exclude_field_ids.as_slice(), From 0179a5a001ff9d69f1a5e3ef0ae083aeb0db6f35 Mon Sep 17 00:00:00 2001 From: yanghua Date: Mon, 29 Sep 2025 21:17:30 +0800 Subject: [PATCH 03/17] refactor code --- rust/lance/src/dataset/transaction.rs | 27 --------------------------- 1 file changed, 27 deletions(-) diff --git a/rust/lance/src/dataset/transaction.rs b/rust/lance/src/dataset/transaction.rs index a6811cc2aa8..1480e6f1e3b 100644 --- a/rust/lance/src/dataset/transaction.rs +++ b/rust/lance/src/dataset/transaction.rs @@ -313,33 +313,6 @@ pub enum UpdateMode { RewriteColumns, } -impl Operation { - /// Returns the field IDs that drive index fragment bitmap pruning for Update operations. - /// For non-Update operations, returns an empty slice. - pub fn prune_fields(&self) -> &[u32] { - match self { - Self::Update { - bitmap_prune_field_ids, - .. - } => bitmap_prune_field_ids.as_slice(), - _ => &[], - } - } - - /// Returns the field IDs used to decide whether to preserve new fragment IDs - /// in an index's fragment bitmap for Update operations. For non-Update operations, - /// returns an empty slice. - pub fn bitmap_preserve_exclude_fields(&self) -> &[u32] { - match self { - Self::Update { - bitmap_preserve_exclude_field_ids, - .. - } => bitmap_preserve_exclude_field_ids.as_slice(), - _ => &[], - } - } -} - impl std::fmt::Display for Operation { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { From 24e291d8f883051c44449ccfdaab5f2b8c9e78f5 Mon Sep 17 00:00:00 2001 From: yanghua Date: Tue, 30 Sep 2025 07:18:52 +0800 Subject: [PATCH 04/17] refactor code --- protos/transaction.proto | 11 ++++++----- rust/lance/src/dataset/transaction.rs | 12 ++++++------ 2 files changed, 12 insertions(+), 11 deletions(-) diff --git a/protos/transaction.proto b/protos/transaction.proto index 186847d52b5..7619b0669f3 100644 --- a/protos/transaction.proto +++ b/protos/transaction.proto @@ -193,13 +193,14 @@ message Transaction { repeated DataFragment updated_fragments = 2; // The new fragments where updated rows have been moved to. repeated DataFragment new_fragments = 3; - // The ids of the fields that have been modified. - repeated uint32 fields_modified = 4; + /// Field IDs that drive index fragment bitmap pruning + repeated uint32 bitmap_prune_field_ids = 4; /// The MemWAL (pre-image) that should be marked as merged after this transaction MemWalIndexDetails.MemWal mem_wal_to_merge = 5; - /// The fields that used to judge whether to preserve the new frag's id into - /// the frag bitmap of the specified indices. - repeated uint32 fields_for_preserving_frag_bitmap = 6; + /// Field IDs used to decide whether to preserve new fragment IDs in the + /// fragment bitmap of specified indices. Indices that do not cover these + /// fields may preserve the new fragment IDs when applicable. + repeated uint32 bitmap_preserve_exclude_field_ids = 6; // The mode of update UpdateMode update_mode = 7; } diff --git a/rust/lance/src/dataset/transaction.rs b/rust/lance/src/dataset/transaction.rs index 1480e6f1e3b..6fb20a17057 100644 --- a/rust/lance/src/dataset/transaction.rs +++ b/rust/lance/src/dataset/transaction.rs @@ -2898,9 +2898,9 @@ impl TryFrom for Transaction { removed_fragment_ids, updated_fragments, new_fragments, - fields_modified, + bitmap_prune_field_ids, mem_wal_to_merge, - fields_for_preserving_frag_bitmap, + bitmap_preserve_exclude_field_ids, update_mode, })) => Operation::Update { removed_fragment_ids, @@ -2912,9 +2912,9 @@ impl TryFrom for Transaction { .into_iter() .map(Fragment::try_from) .collect::>>()?, - bitmap_prune_field_ids: fields_modified, + bitmap_prune_field_ids, mem_wal_to_merge: mem_wal_to_merge.map(|m| MemWal::try_from(m).unwrap()), - bitmap_preserve_exclude_field_ids: fields_for_preserving_frag_bitmap, + bitmap_preserve_exclude_field_ids, update_mode: match update_mode { 0 => Some(UpdateMode::RewriteRows), 1 => Some(UpdateMode::RewriteColumns), @@ -3278,9 +3278,9 @@ impl From<&Transaction> for pb::Transaction { .map(pb::DataFragment::from) .collect(), new_fragments: new_fragments.iter().map(pb::DataFragment::from).collect(), - fields_modified: bitmap_prune_field_ids.clone(), + bitmap_prune_field_ids: bitmap_prune_field_ids.clone(), mem_wal_to_merge: mem_wal_to_merge.as_ref().map(|m| m.into()), - fields_for_preserving_frag_bitmap: bitmap_preserve_exclude_field_ids.clone(), + bitmap_preserve_exclude_field_ids: bitmap_preserve_exclude_field_ids.clone(), update_mode: update_mode .as_ref() .map(|mode| match mode { From 5ab71a7e6d1597efc6aca18e419d040b4b5ae15c Mon Sep 17 00:00:00 2001 From: yanghua Date: Tue, 30 Sep 2025 09:48:45 +0800 Subject: [PATCH 05/17] refactor code --- java/lance-jni/src/transaction.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/java/lance-jni/src/transaction.rs b/java/lance-jni/src/transaction.rs index 731b2a7ced3..0716d2d73db 100644 --- a/java/lance-jni/src/transaction.rs +++ b/java/lance-jni/src/transaction.rs @@ -942,7 +942,8 @@ fn convert_to_rust_operation( let bitmap_prune_field_ids = env .call_method(java_operation, "bitmapPruneFieldIds", "()[J", &[])? .l()?; - let bitmap_prune_field_ids = JLongArray::from(bitmap_prune_field_ids).extract_object(env)?; + let bitmap_prune_field_ids = + JLongArray::from(bitmap_prune_field_ids).extract_object(env)?; let bitmap_preserve_exclude_field_ids = env .call_method(java_operation, "bitmapPreserveExcludeFieldIds", "()[J", &[])? From bbf11dbb0381ec9f4b32fe5bff516a65cdb67385 Mon Sep 17 00:00:00 2001 From: yanghua Date: Tue, 30 Sep 2025 11:44:07 +0800 Subject: [PATCH 06/17] rename to bitmap_preserve_field_ids --- java/lance-jni/src/transaction.rs | 16 +++++------ protos/transaction.proto | 2 +- python/python/lance/dataset.py | 4 +-- python/src/transaction.rs | 14 +++++----- rust/lance/src/dataset/transaction.rs | 28 +++++++++---------- rust/lance/src/dataset/write/commit.rs | 2 +- rust/lance/src/dataset/write/merge_insert.rs | 8 ++---- .../dataset/write/merge_insert/exec/write.rs | 2 +- rust/lance/src/dataset/write/update.rs | 6 ++-- rust/lance/src/io/commit/conflict_resolver.rs | 18 ++++++------ 10 files changed, 47 insertions(+), 53 deletions(-) diff --git a/java/lance-jni/src/transaction.rs b/java/lance-jni/src/transaction.rs index 0716d2d73db..32b81f7205f 100644 --- a/java/lance-jni/src/transaction.rs +++ b/java/lance-jni/src/transaction.rs @@ -497,7 +497,7 @@ fn convert_to_java_operation_inner<'local>( new_fragments, bitmap_prune_field_ids, mem_wal_to_merge: _, - bitmap_preserve_exclude_field_ids, + bitmap_preserve_field_ids, update_mode, } => { let removed_ids: Vec> = removed_fragment_ids @@ -508,8 +508,8 @@ fn convert_to_java_operation_inner<'local>( let updated_fragments_obj = export_vec(env, &updated_fragments)?; let new_fragments_obj = export_vec(env, &new_fragments)?; let bitmap_prune_field_ids = JLance(bitmap_prune_field_ids.clone()).into_java(env)?; - let bitmap_preserve_exclude_field_ids = - JLance(bitmap_preserve_exclude_field_ids.clone()).into_java(env)?; + let bitmap_preserve_field_ids = + JLance(bitmap_preserve_field_ids.clone()).into_java(env)?; let update_mode = match update_mode { Some(update_mode) => update_mode.into_java(env), None => Ok(JObject::null()), @@ -530,7 +530,7 @@ fn convert_to_java_operation_inner<'local>( JValue::Object(&updated_fragments_obj), JValue::Object(&new_fragments_obj), JValueGen::Object(&bitmap_prune_field_ids), - JValueGen::Object(&bitmap_preserve_exclude_field_ids), + JValueGen::Object(&bitmap_preserve_field_ids), JValue::Object(&update_mode_optional), ], )?) @@ -945,11 +945,11 @@ fn convert_to_rust_operation( let bitmap_prune_field_ids = JLongArray::from(bitmap_prune_field_ids).extract_object(env)?; - let bitmap_preserve_exclude_field_ids = env + let bitmap_preserve_field_ids = env .call_method(java_operation, "bitmapPreserveExcludeFieldIds", "()[J", &[])? .l()?; - let bitmap_preserve_exclude_field_ids = - JLongArray::from(bitmap_preserve_exclude_field_ids).extract_object(env)?; + let bitmap_preserve_field_ids = + JLongArray::from(bitmap_preserve_field_ids).extract_object(env)?; let update_mode: Option = env.get_optional_from_method(java_operation, "updateMode", |env, update_mode| { @@ -962,7 +962,7 @@ fn convert_to_rust_operation( new_fragments, bitmap_prune_field_ids, mem_wal_to_merge: None, - bitmap_preserve_exclude_field_ids, + bitmap_preserve_field_ids, update_mode, } } diff --git a/protos/transaction.proto b/protos/transaction.proto index 7619b0669f3..c912f5fb047 100644 --- a/protos/transaction.proto +++ b/protos/transaction.proto @@ -200,7 +200,7 @@ message Transaction { /// Field IDs used to decide whether to preserve new fragment IDs in the /// fragment bitmap of specified indices. Indices that do not cover these /// fields may preserve the new fragment IDs when applicable. - repeated uint32 bitmap_preserve_exclude_field_ids = 6; + repeated uint32 bitmap_preserve_field_ids = 6; // The mode of update UpdateMode update_mode = 7; } diff --git a/python/python/lance/dataset.py b/python/python/lance/dataset.py index fcda34874eb..3736cdc2366 100644 --- a/python/python/lance/dataset.py +++ b/python/python/lance/dataset.py @@ -3928,7 +3928,7 @@ class Update(BaseOperation): modified in updated_fragments, then they must be listed here so those fragments can be removed from indices that cover any of these fields. - bitmap_preserve_exclude_field_ids: list[int] + bitmap_preserve_field_ids: list[int] Field IDs used to decide whether to preserve new fragment IDs in an index's fragment bitmap. Indices that do not cover these fields may preserve the new fragment IDs @@ -3985,7 +3985,7 @@ class Merge(BaseOperation): 0 1 a 1 2 b 2 3 c - 3 4 d + 3 4 dpython/python/lance/dataset.py >>> def double_a(batch: pa.RecordBatch) -> pa.RecordBatch: ... doubled = pc.multiply(batch["a"], 2) ... return pa.record_batch([doubled], ["a_doubled"]) diff --git a/python/src/transaction.rs b/python/src/transaction.rs index aed69e279fe..c029863a4ae 100644 --- a/python/src/transaction.rs +++ b/python/src/transaction.rs @@ -213,8 +213,8 @@ impl FromPyObject<'_> for PyLance { let bitmap_prune_field_ids = ob.getattr("bitmap_prune_field_ids")?.extract()?; - let bitmap_preserve_exclude_field_ids = ob - .getattr("bitmap_preserve_exclude_field_ids")? + let bitmap_preserve_field_ids = ob + .getattr("bitmap_preserve_field_ids")? .extract() .unwrap_or_default(); @@ -230,7 +230,7 @@ impl FromPyObject<'_> for PyLance { new_fragments, bitmap_prune_field_ids, mem_wal_to_merge: None, - bitmap_preserve_exclude_field_ids, + bitmap_preserve_field_ids, update_mode, }; Ok(Self(op)) @@ -368,7 +368,7 @@ impl<'py> IntoPyObject<'py> for PyLance<&Operation> { updated_fragments, new_fragments, bitmap_prune_field_ids, - bitmap_preserve_exclude_field_ids, + bitmap_preserve_field_ids, update_mode, .. } => { @@ -376,8 +376,8 @@ impl<'py> IntoPyObject<'py> for PyLance<&Operation> { let updated_fragments = export_vec(py, updated_fragments.as_slice())?; let new_fragments = export_vec(py, new_fragments.as_slice())?; let bitmap_prune_field_ids = bitmap_prune_field_ids.into_pyobject(py)?; - let bitmap_preserve_exclude_field_ids = - bitmap_preserve_exclude_field_ids.into_pyobject(py)?; + let bitmap_preserve_field_ids = + bitmap_preserve_field_ids.into_pyobject(py)?; let update_mode = match update_mode { Some(mode) => match mode { lance::dataset::transaction::UpdateMode::RewriteRows => "rewrite_rows", @@ -395,7 +395,7 @@ impl<'py> IntoPyObject<'py> for PyLance<&Operation> { updated_fragments, new_fragments, bitmap_prune_field_ids, - bitmap_preserve_exclude_field_ids, + bitmap_preserve_field_ids, update_mode, )) } diff --git a/rust/lance/src/dataset/transaction.rs b/rust/lance/src/dataset/transaction.rs index 6fb20a17057..edf99b95620 100644 --- a/rust/lance/src/dataset/transaction.rs +++ b/rust/lance/src/dataset/transaction.rs @@ -262,7 +262,7 @@ pub enum Operation { /// Field IDs used to decide whether to preserve new fragment IDs in the /// fragment bitmap of specified indices. Indices that do not cover these /// fields may preserve the new fragment IDs when applicable. - bitmap_preserve_exclude_field_ids: Vec, + bitmap_preserve_field_ids: Vec, /// The mode of update update_mode: Option, }, @@ -452,7 +452,7 @@ impl PartialEq for Operation { new_fragments: a_new, bitmap_prune_field_ids: a_prune_fields, mem_wal_to_merge: a_mem_wal_to_merge, - bitmap_preserve_exclude_field_ids: a_bitmap_preserve_exclude_fields, + bitmap_preserve_field_ids: a_bitmap_preserve_exclude_fields, update_mode: a_update_mode, }, Self::Update { @@ -461,7 +461,7 @@ impl PartialEq for Operation { new_fragments: b_new, bitmap_prune_field_ids: b_prune_fields, mem_wal_to_merge: b_mem_wal_to_merge, - bitmap_preserve_exclude_field_ids: b_bitmap_preserve_exclude_fields, + bitmap_preserve_field_ids: b_bitmap_preserve_exclude_fields, update_mode: b_update_mode, }, ) => { @@ -1725,7 +1725,7 @@ impl Transaction { new_fragments, bitmap_prune_field_ids, mem_wal_to_merge, - bitmap_preserve_exclude_field_ids, + bitmap_preserve_field_ids, update_mode, } => { // Extract existing fragments once for reuse @@ -1919,7 +1919,7 @@ impl Transaction { &mut final_indices, &pure_updated_frag_ids, &original_fragment_ids, - bitmap_preserve_exclude_field_ids, + bitmap_preserve_field_ids, ); } @@ -2324,15 +2324,13 @@ impl Transaction { indices: &mut [IndexMetadata], pure_update_frag_ids: &[u64], original_fragment_ids: &[u64], - bitmap_preserve_exclude_field_ids: &[u32], + bitmap_preserve_field_ids: &[u32], ) { if pure_update_frag_ids.is_empty() { return; } - let value_updated_field_set = bitmap_preserve_exclude_field_ids - .iter() - .collect::>(); + let value_updated_field_set = bitmap_preserve_field_ids.iter().collect::>(); for index in indices.iter_mut() { let index_covers_modified_field = index.fields.iter().any(|field_id| { @@ -2372,12 +2370,12 @@ impl Transaction { // If we modified any fields in the fragments then we need to remove those fragments // from the index if the index covers one of those modified fields. - let fields_modified_set = bitmap_prune_field_ids.iter().collect::>(); + let fields_set_for_pruning = bitmap_prune_field_ids.iter().collect::>(); for index in indices.iter_mut() { if index .fields .iter() - .any(|field_id| fields_modified_set.contains(&u32::try_from(*field_id).unwrap())) + .any(|field_id| fields_set_for_pruning.contains(&u32::try_from(*field_id).unwrap())) { if let Some(fragment_bitmap) = &mut index.fragment_bitmap { for fragment_id in updated_fragments.iter().map(|f| f.id as u32) { @@ -2900,7 +2898,7 @@ impl TryFrom for Transaction { new_fragments, bitmap_prune_field_ids, mem_wal_to_merge, - bitmap_preserve_exclude_field_ids, + bitmap_preserve_field_ids, update_mode, })) => Operation::Update { removed_fragment_ids, @@ -2914,7 +2912,7 @@ impl TryFrom for Transaction { .collect::>>()?, bitmap_prune_field_ids, mem_wal_to_merge: mem_wal_to_merge.map(|m| MemWal::try_from(m).unwrap()), - bitmap_preserve_exclude_field_ids, + bitmap_preserve_field_ids, update_mode: match update_mode { 0 => Some(UpdateMode::RewriteRows), 1 => Some(UpdateMode::RewriteColumns), @@ -3269,7 +3267,7 @@ impl From<&Transaction> for pb::Transaction { new_fragments, bitmap_prune_field_ids, mem_wal_to_merge, - bitmap_preserve_exclude_field_ids, + bitmap_preserve_field_ids, update_mode, } => pb::transaction::Operation::Update(pb::transaction::Update { removed_fragment_ids: removed_fragment_ids.clone(), @@ -3280,7 +3278,7 @@ impl From<&Transaction> for pb::Transaction { new_fragments: new_fragments.iter().map(pb::DataFragment::from).collect(), bitmap_prune_field_ids: bitmap_prune_field_ids.clone(), mem_wal_to_merge: mem_wal_to_merge.as_ref().map(|m| m.into()), - bitmap_preserve_exclude_field_ids: bitmap_preserve_exclude_field_ids.clone(), + bitmap_preserve_field_ids: bitmap_preserve_field_ids.clone(), update_mode: update_mode .as_ref() .map(|mode| match mode { diff --git a/rust/lance/src/dataset/write/commit.rs b/rust/lance/src/dataset/write/commit.rs index 913d833a497..48df3283544 100644 --- a/rust/lance/src/dataset/write/commit.rs +++ b/rust/lance/src/dataset/write/commit.rs @@ -790,7 +790,7 @@ mod tests { removed_fragment_ids: vec![], bitmap_prune_field_ids: vec![], mem_wal_to_merge: None, - bitmap_preserve_exclude_field_ids: vec![], + bitmap_preserve_field_ids: vec![], update_mode: None, }, read_version: 1, diff --git a/rust/lance/src/dataset/write/merge_insert.rs b/rust/lance/src/dataset/write/merge_insert.rs index f58b5a36112..f40c3457362 100644 --- a/rust/lance/src/dataset/write/merge_insert.rs +++ b/rust/lance/src/dataset/write/merge_insert.rs @@ -1480,7 +1480,7 @@ impl MergeInsertJob { new_fragments, bitmap_prune_field_ids, mem_wal_to_merge: self.params.mem_wal_to_merge, - bitmap_preserve_exclude_field_ids: vec![], // in-place update does not affect preserving fragment bitmap + bitmap_preserve_field_ids: vec![], // in-place update does not affect preserving fragment bitmap update_mode: Some(RewriteColumns), }; // We have rewritten the fragments, not just the deletion files, so @@ -1553,11 +1553,7 @@ impl MergeInsertJob { // modify any field values. bitmap_prune_field_ids: vec![], mem_wal_to_merge: self.params.mem_wal_to_merge, - bitmap_preserve_exclude_field_ids: full_schema - .fields - .iter() - .map(|f| f.id as u32) - .collect(), + bitmap_preserve_field_ids: full_schema.fields.iter().map(|f| f.id as u32).collect(), update_mode: Some(RewriteRows), }; diff --git a/rust/lance/src/dataset/write/merge_insert/exec/write.rs b/rust/lance/src/dataset/write/merge_insert/exec/write.rs index 5621f38cbb1..338fb829ed3 100644 --- a/rust/lance/src/dataset/write/merge_insert/exec/write.rs +++ b/rust/lance/src/dataset/write/merge_insert/exec/write.rs @@ -884,7 +884,7 @@ impl ExecutionPlan for FullSchemaMergeInsertExec { new_fragments, bitmap_prune_field_ids: vec![], // No schema fields are modified for upsert mem_wal_to_merge, - bitmap_preserve_exclude_field_ids: dataset + bitmap_preserve_field_ids: dataset .schema() .fields .iter() diff --git a/rust/lance/src/dataset/write/update.rs b/rust/lance/src/dataset/write/update.rs index e4c8bb4ec7a..4d430266944 100644 --- a/rust/lance/src/dataset/write/update.rs +++ b/rust/lance/src/dataset/write/update.rs @@ -393,10 +393,10 @@ impl UpdateJob { dataset: Arc, update_data: UpdateData, ) -> Result { - let mut bitmap_preserve_exclude_field_ids = Vec::new(); + let mut bitmap_preserve_field_ids = Vec::new(); for column_name in self.updates.keys() { if let Ok(field_id) = dataset.schema().field_id(column_name) { - bitmap_preserve_exclude_field_ids.push(field_id as u32); + bitmap_preserve_field_ids.push(field_id as u32); } } @@ -409,7 +409,7 @@ impl UpdateJob { // Therefore we do not need to prune index fragment bitmaps based on updated values here. bitmap_prune_field_ids: vec![], mem_wal_to_merge: None, - bitmap_preserve_exclude_field_ids, + bitmap_preserve_field_ids, update_mode: Some(RewriteRows), }; diff --git a/rust/lance/src/io/commit/conflict_resolver.rs b/rust/lance/src/io/commit/conflict_resolver.rs index 12e8cb3e0e9..ff6846b58ab 100644 --- a/rust/lance/src/io/commit/conflict_resolver.rs +++ b/rust/lance/src/io/commit/conflict_resolver.rs @@ -1759,7 +1759,7 @@ mod tests { new_fragments: vec![], bitmap_prune_field_ids: vec![], mem_wal_to_merge: None, - bitmap_preserve_exclude_field_ids: vec![], + bitmap_preserve_field_ids: vec![], update_mode: None, }; let transaction = Transaction::new_from_version(1, operation); @@ -1770,7 +1770,7 @@ mod tests { new_fragments: vec![], bitmap_prune_field_ids: vec![], mem_wal_to_merge: None, - bitmap_preserve_exclude_field_ids: vec![], + bitmap_preserve_field_ids: vec![], update_mode: None, }, Operation::Delete { @@ -1784,7 +1784,7 @@ mod tests { new_fragments: vec![], bitmap_prune_field_ids: vec![], mem_wal_to_merge: None, - bitmap_preserve_exclude_field_ids: vec![], + bitmap_preserve_field_ids: vec![], update_mode: None, }, ]; @@ -1885,7 +1885,7 @@ mod tests { new_fragments: vec![sample_file.clone()], bitmap_prune_field_ids: vec![], mem_wal_to_merge: None, - bitmap_preserve_exclude_field_ids: vec![], + bitmap_preserve_field_ids: vec![], update_mode: None, }, Operation::Delete { @@ -1899,7 +1899,7 @@ mod tests { new_fragments: vec![sample_file], bitmap_prune_field_ids: vec![], mem_wal_to_merge: None, - bitmap_preserve_exclude_field_ids: vec![], + bitmap_preserve_field_ids: vec![], update_mode: None, }, ]; @@ -2020,7 +2020,7 @@ mod tests { new_fragments: vec![sample_file.clone()], bitmap_prune_field_ids: vec![], mem_wal_to_merge: None, - bitmap_preserve_exclude_field_ids: vec![], + bitmap_preserve_field_ids: vec![], update_mode: None, }, ), @@ -2032,7 +2032,7 @@ mod tests { new_fragments: vec![sample_file.clone()], bitmap_prune_field_ids: vec![], mem_wal_to_merge: None, - bitmap_preserve_exclude_field_ids: vec![], + bitmap_preserve_field_ids: vec![], update_mode: None, }, ), @@ -2190,7 +2190,7 @@ mod tests { new_fragments: vec![fragment2.clone()], bitmap_prune_field_ids: vec![0], mem_wal_to_merge: None, - bitmap_preserve_exclude_field_ids: vec![], + bitmap_preserve_field_ids: vec![], update_mode: None, }, create_update_config_for_test( @@ -2385,7 +2385,7 @@ mod tests { new_fragments: vec![fragment2], bitmap_prune_field_ids: vec![0], mem_wal_to_merge: None, - bitmap_preserve_exclude_field_ids: vec![], + bitmap_preserve_field_ids: vec![], update_mode: None, }, [ From f930ae890e4a2a5c29e0e866e126bc79198594c4 Mon Sep 17 00:00:00 2001 From: yanghua Date: Tue, 30 Sep 2025 11:59:02 +0800 Subject: [PATCH 07/17] fix format issue --- python/src/transaction.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/python/src/transaction.rs b/python/src/transaction.rs index c029863a4ae..4c729af87ac 100644 --- a/python/src/transaction.rs +++ b/python/src/transaction.rs @@ -376,8 +376,7 @@ impl<'py> IntoPyObject<'py> for PyLance<&Operation> { let updated_fragments = export_vec(py, updated_fragments.as_slice())?; let new_fragments = export_vec(py, new_fragments.as_slice())?; let bitmap_prune_field_ids = bitmap_prune_field_ids.into_pyobject(py)?; - let bitmap_preserve_field_ids = - bitmap_preserve_field_ids.into_pyobject(py)?; + let bitmap_preserve_field_ids = bitmap_preserve_field_ids.into_pyobject(py)?; let update_mode = match update_mode { Some(mode) => match mode { lance::dataset::transaction::UpdateMode::RewriteRows => "rewrite_rows", From 8d9641ec37dafcd3b3d2e46f88b67c0c2c58f571 Mon Sep 17 00:00:00 2001 From: yanghua Date: Tue, 30 Sep 2025 14:03:45 +0800 Subject: [PATCH 08/17] refactor code --- rust/lance/src/dataset/transaction.rs | 3 ++- rust/lance/src/dataset/write/merge_insert/exec/write.rs | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/rust/lance/src/dataset/transaction.rs b/rust/lance/src/dataset/transaction.rs index edf99b95620..876e7a6c27e 100644 --- a/rust/lance/src/dataset/transaction.rs +++ b/rust/lance/src/dataset/transaction.rs @@ -245,7 +245,8 @@ pub enum Operation { /// A horizontal update adds new columns. In this case, the updated fragments /// may have fields removed or added. It is even possible for a field to be tombstoned /// and then added back in the same update. (which is a field modification). - /// Any fields modified in this way must be added to the bitmap_prune_field_ids list so indices that cover those fields can prune fragment bitmaps accordingly. + /// Any fields modified in this way must be added to the bitmap_prune_field_ids list so + /// indices that cover those fields can prune fragment bitmaps accordingly. /// This ensures indices remain consistent after updates. /// This is what is used by a merge insert that does not match the whole schema. Update { diff --git a/rust/lance/src/dataset/write/merge_insert/exec/write.rs b/rust/lance/src/dataset/write/merge_insert/exec/write.rs index 338fb829ed3..0b0369349f2 100644 --- a/rust/lance/src/dataset/write/merge_insert/exec/write.rs +++ b/rust/lance/src/dataset/write/merge_insert/exec/write.rs @@ -882,7 +882,7 @@ impl ExecutionPlan for FullSchemaMergeInsertExec { removed_fragment_ids, updated_fragments, new_fragments, - bitmap_prune_field_ids: vec![], // No schema fields are modified for upsert + bitmap_prune_field_ids: vec![], mem_wal_to_merge, bitmap_preserve_field_ids: dataset .schema() From 0fb8d222a4f4cf7a4ac04c2724d14fb606783bb8 Mon Sep 17 00:00:00 2001 From: yanghua Date: Wed, 29 Oct 2025 08:09:46 +0800 Subject: [PATCH 09/17] recover old name for pb --- protos/transaction.proto | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/protos/transaction.proto b/protos/transaction.proto index c912f5fb047..d2c39f9bd00 100644 --- a/protos/transaction.proto +++ b/protos/transaction.proto @@ -194,13 +194,13 @@ message Transaction { // The new fragments where updated rows have been moved to. repeated DataFragment new_fragments = 3; /// Field IDs that drive index fragment bitmap pruning - repeated uint32 bitmap_prune_field_ids = 4; + repeated uint32 fields_modified = 4; /// The MemWAL (pre-image) that should be marked as merged after this transaction MemWalIndexDetails.MemWal mem_wal_to_merge = 5; /// Field IDs used to decide whether to preserve new fragment IDs in the /// fragment bitmap of specified indices. Indices that do not cover these /// fields may preserve the new fragment IDs when applicable. - repeated uint32 bitmap_preserve_field_ids = 6; + repeated uint32 fields_for_preserving_frag_bitmap = 6; // The mode of update UpdateMode update_mode = 7; } From 298bcbdf6f00f310f49ee34bfcafb9455e6f6ef2 Mon Sep 17 00:00:00 2001 From: yanghua Date: Wed, 29 Oct 2025 08:37:25 +0800 Subject: [PATCH 10/17] recover old name for pb --- python/python/lance/dataset.py | 1 + rust/lance/src/dataset/fragment.rs | 8 ++++---- rust/lance/src/dataset/transaction.rs | 8 ++++---- rust/lance/src/dataset/write/merge_insert.rs | 13 +++++++------ rust/lance/src/io/commit/conflict_resolver.rs | 4 ++-- 5 files changed, 18 insertions(+), 16 deletions(-) diff --git a/python/python/lance/dataset.py b/python/python/lance/dataset.py index 3736cdc2366..375c98bb552 100644 --- a/python/python/lance/dataset.py +++ b/python/python/lance/dataset.py @@ -3934,6 +3934,7 @@ class Update(BaseOperation): Indices that do not cover these fields may preserve the new fragment IDs when applicable. """ + removed_fragment_ids: List[int] = dataclasses.field(default_factory=list) updated_fragments: List[FragmentMetadata] = dataclasses.field( default_factory=list diff --git a/rust/lance/src/dataset/fragment.rs b/rust/lance/src/dataset/fragment.rs index 1544bc3583a..14ac46f437f 100644 --- a/rust/lance/src/dataset/fragment.rs +++ b/rust/lance/src/dataset/fragment.rs @@ -2829,9 +2829,9 @@ mod tests { removed_fragment_ids: vec![], updated_fragments: vec![updated_fragment1], new_fragments: vec![], - fields_modified: fields_modified1, + bitmap_prune_field_ids: fields_modified1, mem_wal_to_merge: None, - fields_for_preserving_frag_bitmap: vec![], + bitmap_preserve_field_ids: vec![], update_mode: Some(UpdateMode::RewriteColumns), }; let mut dataset1 = Dataset::commit( @@ -2901,9 +2901,9 @@ mod tests { removed_fragment_ids: vec![], updated_fragments: vec![updated_fragment2], new_fragments: vec![], - fields_modified: fields_modified2, + bitmap_prune_field_ids: fields_modified2, mem_wal_to_merge: None, - fields_for_preserving_frag_bitmap: vec![], + bitmap_preserve_field_ids: vec![], update_mode: Some(UpdateMode::RewriteColumns), }; let dataset2 = Dataset::commit( diff --git a/rust/lance/src/dataset/transaction.rs b/rust/lance/src/dataset/transaction.rs index 876e7a6c27e..3ebb845e811 100644 --- a/rust/lance/src/dataset/transaction.rs +++ b/rust/lance/src/dataset/transaction.rs @@ -2897,9 +2897,9 @@ impl TryFrom for Transaction { removed_fragment_ids, updated_fragments, new_fragments, - bitmap_prune_field_ids, + fields_modified: bitmap_prune_field_ids, mem_wal_to_merge, - bitmap_preserve_field_ids, + fields_for_preserving_frag_bitmap: bitmap_preserve_field_ids, update_mode, })) => Operation::Update { removed_fragment_ids, @@ -3277,9 +3277,9 @@ impl From<&Transaction> for pb::Transaction { .map(pb::DataFragment::from) .collect(), new_fragments: new_fragments.iter().map(pb::DataFragment::from).collect(), - bitmap_prune_field_ids: bitmap_prune_field_ids.clone(), + fields_modified: bitmap_prune_field_ids.clone(), mem_wal_to_merge: mem_wal_to_merge.as_ref().map(|m| m.into()), - bitmap_preserve_field_ids: bitmap_preserve_field_ids.clone(), + fields_for_preserving_frag_bitmap: bitmap_preserve_field_ids.clone(), update_mode: update_mode .as_ref() .map(|mode| match mode { diff --git a/rust/lance/src/dataset/write/merge_insert.rs b/rust/lance/src/dataset/write/merge_insert.rs index f40c3457362..4796a5ee32b 100644 --- a/rust/lance/src/dataset/write/merge_insert.rs +++ b/rust/lance/src/dataset/write/merge_insert.rs @@ -1467,12 +1467,13 @@ impl MergeInsertJob { // We will have a different commit path here too, as we are modifying // fragments rather than writing new ones - let (updated_fragments, new_fragments, bitmap_prune_field_ids) = Self::update_fragments( - self.dataset.clone(), - Box::pin(stream), - self.dataset.manifest.version + 1, - ) - .await?; + let (updated_fragments, new_fragments, bitmap_prune_field_ids) = + Self::update_fragments( + self.dataset.clone(), + Box::pin(stream), + self.dataset.manifest.version + 1, + ) + .await?; let operation = Operation::Update { removed_fragment_ids: Vec::new(), diff --git a/rust/lance/src/io/commit/conflict_resolver.rs b/rust/lance/src/io/commit/conflict_resolver.rs index ff6846b58ab..5bacb167b0a 100644 --- a/rust/lance/src/io/commit/conflict_resolver.rs +++ b/rust/lance/src/io/commit/conflict_resolver.rs @@ -2805,9 +2805,9 @@ mod tests { updated_fragments: vec![Fragment::new(0)], removed_fragment_ids: vec![], new_fragments: vec![], - fields_modified: vec![], + bitmap_prune_field_ids: vec![], mem_wal_to_merge: None, - fields_for_preserving_frag_bitmap: vec![], + bitmap_preserve_field_ids: vec![], update_mode: None, }, ]; From c3d61579364dd3d08b85a4e5535c9c6ca819ee75 Mon Sep 17 00:00:00 2001 From: yanghua Date: Wed, 29 Oct 2025 09:03:17 +0800 Subject: [PATCH 11/17] recover old name for pb --- python/python/lance/fragment.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/python/lance/fragment.py b/python/python/lance/fragment.py index d08167fcb52..48f8b833c55 100644 --- a/python/python/lance/fragment.py +++ b/python/python/lance/fragment.py @@ -680,7 +680,7 @@ def update_columns( ... }) >>> # Update the fragment >>> fragment = dataset.get_fragment(0) - >>> updated_fragment, fields_modified = fragment.update_columns( + >>> updated_fragment, bitmap_prune_field_ids = fragment.update_columns( ... update_data, ... left_on="id", ... right_on="id" @@ -689,7 +689,7 @@ def update_columns( >>> from lance import LanceOperation >>> op = LanceOperation.Update( ... updated_fragments=[updated_fragment], - ... fields_modified=fields_modified, + ... bitmap_prune_field_ids=bitmap_prune_field_ids, ... ) >>> dataset = lance.LanceDataset.commit( ... "dataset", From 6b4302e532f19c7072002749624402aaa80fc25e Mon Sep 17 00:00:00 2001 From: yanghua Date: Wed, 29 Oct 2025 10:35:39 +0800 Subject: [PATCH 12/17] recover old name for pb --- python/python/lance/dataset.py | 4 +--- python/python/tests/test_fragment.py | 10 +++++----- 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/python/python/lance/dataset.py b/python/python/lance/dataset.py index 375c98bb552..bd7509a4f34 100644 --- a/python/python/lance/dataset.py +++ b/python/python/lance/dataset.py @@ -3941,9 +3941,7 @@ class Update(BaseOperation): ) new_fragments: List[FragmentMetadata] = dataclasses.field(default_factory=list) bitmap_prune_field_ids: List[int] = dataclasses.field(default_factory=list) - bitmap_preserve_exclude_field_ids: List[int] = dataclasses.field( - default_factory=list - ) + bitmap_preserve_field_ids: List[int] = dataclasses.field(default_factory=list) update_mode: str = "" def __post_init__(self): diff --git a/python/python/tests/test_fragment.py b/python/python/tests/test_fragment.py index 40e2a69df22..5647abb4a3a 100644 --- a/python/python/tests/test_fragment.py +++ b/python/python/tests/test_fragment.py @@ -550,7 +550,7 @@ def test_fragment_update_columns_basic(tmp_path): op = LanceOperation.Update( updated_fragments=[updated_fragment], - fields_modified=fields_modified, + bitmap_prune_field_ids=fields_modified, ) updated_dataset = lance.LanceDataset.commit( str(dataset_uri), op, read_version=dataset.version @@ -596,7 +596,7 @@ def test_fragment_update_columns_with_custom_join_key(tmp_path): op = LanceOperation.Update( updated_fragments=[updated_fragment], - fields_modified=fields_modified, + bitmap_prune_field_ids=fields_modified, ) updated_dataset = lance.LanceDataset.commit( str(dataset_uri), op, read_version=dataset.version @@ -639,7 +639,7 @@ def test_fragment_update_columns_with_nulls(tmp_path): op = LanceOperation.Update( updated_fragments=[updated_fragment], - fields_modified=fields_modified, + bitmap_prune_field_ids=fields_modified, ) updated_dataset = lance.LanceDataset.commit( str(dataset_uri), op, read_version=dataset.version @@ -680,7 +680,7 @@ def test_fragment_update_columns_partial_update(tmp_path): op = LanceOperation.Update( updated_fragments=[updated_fragment], - fields_modified=fields_modified, + bitmap_prune_field_ids=fields_modified, ) updated_dataset = lance.LanceDataset.commit( str(dataset_uri), op, read_version=dataset.version @@ -723,7 +723,7 @@ def test_fragment_update_columns_no_match(tmp_path): op = LanceOperation.Update( updated_fragments=[updated_fragment], - fields_modified=fields_modified, + bitmap_prune_field_ids=fields_modified, ) updated_dataset = lance.LanceDataset.commit( str(dataset_uri), op, read_version=dataset.version From cd35006844a4ec864f80337d56345d27eabb3b31 Mon Sep 17 00:00:00 2001 From: yanghua Date: Wed, 29 Oct 2025 11:11:16 +0800 Subject: [PATCH 13/17] fix java issues --- java/lance-jni/src/fragment.rs | 4 ++-- .../lancedb/lance/fragment/FragmentUpdateResult.java | 12 ++++++------ .../java/com/lancedb/lance/operation/UpdateTest.java | 2 +- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/java/lance-jni/src/fragment.rs b/java/lance-jni/src/fragment.rs index 98be13a3e44..6820654d1d0 100644 --- a/java/lance-jni/src/fragment.rs +++ b/java/lance-jni/src/fragment.rs @@ -412,11 +412,11 @@ fn inner_update_column<'local>( let reader = unsafe { ArrowArrayStreamReader::from_raw(stream_ptr) }?; let left_on_str: String = left_on.extract(env)?; let right_on_str: String = right_on.extract(env)?; - let (updated_fragment, fields_modified) = + let (updated_fragment, bitmap_prune_field_ids) = RT.block_on(fragment.update_columns(reader, &left_on_str, &right_on_str))?; let result = FragmentUpdateResult { updated_fragment, - fields_modified, + bitmap_prune_field_ids, }; result.into_java(env) } diff --git a/java/src/main/java/com/lancedb/lance/fragment/FragmentUpdateResult.java b/java/src/main/java/com/lancedb/lance/fragment/FragmentUpdateResult.java index 1769c145cd9..56111bbc7a3 100644 --- a/java/src/main/java/com/lancedb/lance/fragment/FragmentUpdateResult.java +++ b/java/src/main/java/com/lancedb/lance/fragment/FragmentUpdateResult.java @@ -24,26 +24,26 @@ */ public class FragmentUpdateResult { private final FragmentMetadata updatedFragment; - private final long[] fieldsModified; + private final long[] bitmapPruneFieldIds; - public FragmentUpdateResult(FragmentMetadata updatedFragment, long[] updatedFieldIds) { + public FragmentUpdateResult(FragmentMetadata updatedFragment, long[] bitmapPruneFieldIds) { this.updatedFragment = updatedFragment; - this.fieldsModified = updatedFieldIds; + this.bitmapPruneFieldIds = bitmapPruneFieldIds; } public FragmentMetadata getUpdatedFragment() { return updatedFragment; } - public long[] getFieldsModified() { - return fieldsModified; + public long[] getBitmapPruneFieldIds() { + return bitmapPruneFieldIds; } @Override public String toString() { return MoreObjects.toStringHelper(this) .add("fragmentMetadata", updatedFragment) - .add("updatedFieldIds", fieldsModified) + .add("bitmapPruneFieldIds", bitmapPruneFieldIds) .toString(); } } diff --git a/java/src/test/java/com/lancedb/lance/operation/UpdateTest.java b/java/src/test/java/com/lancedb/lance/operation/UpdateTest.java index 63cb58ebaa0..da5ea8f9b6f 100644 --- a/java/src/test/java/com/lancedb/lance/operation/UpdateTest.java +++ b/java/src/test/java/com/lancedb/lance/operation/UpdateTest.java @@ -152,7 +152,7 @@ void testUpdateColumns(@TempDir Path tempDir) throws Exception { Update.builder() .updatedFragments( Collections.singletonList(updateResult.getUpdatedFragment())) - .fieldsModified(updateResult.getFieldsModified()) + .bitmapPruneFieldIds(updateResult.getBitmapPruneFieldIds()) .build()) .build(); try (Dataset dataset = updateTransaction.commit()) { From f52441eceab5335a2e059552ea47352a51e36034 Mon Sep 17 00:00:00 2001 From: yanghua Date: Wed, 29 Oct 2025 11:39:08 +0800 Subject: [PATCH 14/17] fix code style issue --- java/lance-jni/src/fragment.rs | 6 +++--- python/python/lance/dataset.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/java/lance-jni/src/fragment.rs b/java/lance-jni/src/fragment.rs index 6820654d1d0..7abfec093c8 100644 --- a/java/lance-jni/src/fragment.rs +++ b/java/lance-jni/src/fragment.rs @@ -37,7 +37,7 @@ pub(crate) struct FragmentMergeResult { #[derive(Debug, Clone)] pub(crate) struct FragmentUpdateResult { updated_fragment: Fragment, - fields_modified: Vec, + bitmap_prune_field_ids: Vec, } ////////////////// @@ -456,13 +456,13 @@ impl IntoJava for &FragmentMergeResult { impl IntoJava for &FragmentUpdateResult { fn into_java<'a>(self, env: &mut JNIEnv<'a>) -> Result> { let java_updated_fragment = self.updated_fragment.into_java(env)?; - let java_fields_modified = JLance(self.fields_modified.clone()).into_java(env)?; + let java_bitmap_prune_field_ids = JLance(self.bitmap_prune_field_ids.clone()).into_java(env)?; Ok(env.new_object( FRAGMENT_UPDATE_RESULT_CLASS, FRAGMENT_UPDATE_RESULT_CONSTRUCTOR_SIG, &[ JValueGen::Object(&java_updated_fragment), - JValueGen::Object(&java_fields_modified), + JValueGen::Object(&java_bitmap_prune_field_ids), ], )?) } diff --git a/python/python/lance/dataset.py b/python/python/lance/dataset.py index bd7509a4f34..595a828bd24 100644 --- a/python/python/lance/dataset.py +++ b/python/python/lance/dataset.py @@ -3984,7 +3984,7 @@ class Merge(BaseOperation): 0 1 a 1 2 b 2 3 c - 3 4 dpython/python/lance/dataset.py + 3 4 d >>> def double_a(batch: pa.RecordBatch) -> pa.RecordBatch: ... doubled = pc.multiply(batch["a"], 2) ... return pa.record_batch([doubled], ["a_doubled"]) From 4c22d0dfec59261b1b0baa6204b457e300d2c238 Mon Sep 17 00:00:00 2001 From: yanghua Date: Wed, 29 Oct 2025 13:04:04 +0800 Subject: [PATCH 15/17] fix code style issue --- java/lance-jni/src/fragment.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/java/lance-jni/src/fragment.rs b/java/lance-jni/src/fragment.rs index 7abfec093c8..5b8cc2e9f36 100644 --- a/java/lance-jni/src/fragment.rs +++ b/java/lance-jni/src/fragment.rs @@ -456,7 +456,8 @@ impl IntoJava for &FragmentMergeResult { impl IntoJava for &FragmentUpdateResult { fn into_java<'a>(self, env: &mut JNIEnv<'a>) -> Result> { let java_updated_fragment = self.updated_fragment.into_java(env)?; - let java_bitmap_prune_field_ids = JLance(self.bitmap_prune_field_ids.clone()).into_java(env)?; + let java_bitmap_prune_field_ids = + JLance(self.bitmap_prune_field_ids.clone()).into_java(env)?; Ok(env.new_object( FRAGMENT_UPDATE_RESULT_CLASS, FRAGMENT_UPDATE_RESULT_CONSTRUCTOR_SIG, From c8a39cd08ee566e8ba5c73778a55cb52b824f3df Mon Sep 17 00:00:00 2001 From: yanghua Date: Wed, 29 Oct 2025 14:56:35 +0800 Subject: [PATCH 16/17] refactor code --- python/python/tests/test_fragment.py | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/python/python/tests/test_fragment.py b/python/python/tests/test_fragment.py index 5647abb4a3a..9395f70b14f 100644 --- a/python/python/tests/test_fragment.py +++ b/python/python/tests/test_fragment.py @@ -540,17 +540,17 @@ def test_fragment_update_columns_basic(tmp_path): # Get the fragment and update columns fragment = dataset.get_fragment(0) - updated_fragment, fields_modified = fragment.update_columns(update_data) + updated_fragment, bitmap_prune_field_ids = fragment.update_columns(update_data) # Verify fields_modified is returned - assert isinstance(fields_modified, list) - assert len(fields_modified) > 0 + assert isinstance(bitmap_prune_field_ids, list) + assert len(bitmap_prune_field_ids) > 0 # Commit the changes using Update operation op = LanceOperation.Update( updated_fragments=[updated_fragment], - bitmap_prune_field_ids=fields_modified, + bitmap_prune_field_ids=bitmap_prune_field_ids, ) updated_dataset = lance.LanceDataset.commit( str(dataset_uri), op, read_version=dataset.version @@ -588,7 +588,7 @@ def test_fragment_update_columns_with_custom_join_key(tmp_path): # Get the fragment and update columns fragment = dataset.get_fragment(0) - updated_fragment, fields_modified = fragment.update_columns( + updated_fragment, bitmap_prune_field_ids = fragment.update_columns( update_data, left_on="id", right_on="id" ) @@ -596,7 +596,7 @@ def test_fragment_update_columns_with_custom_join_key(tmp_path): op = LanceOperation.Update( updated_fragments=[updated_fragment], - bitmap_prune_field_ids=fields_modified, + bitmap_prune_field_ids=bitmap_prune_field_ids, ) updated_dataset = lance.LanceDataset.commit( str(dataset_uri), op, read_version=dataset.version @@ -633,13 +633,13 @@ def test_fragment_update_columns_with_nulls(tmp_path): # Get the fragment and update columns fragment = dataset.get_fragment(0) - updated_fragment, fields_modified = fragment.update_columns(update_data) + updated_fragment, bitmap_prune_field_ids = fragment.update_columns(update_data) # Commit the changes op = LanceOperation.Update( updated_fragments=[updated_fragment], - bitmap_prune_field_ids=fields_modified, + bitmap_prune_field_ids=bitmap_prune_field_ids, ) updated_dataset = lance.LanceDataset.commit( str(dataset_uri), op, read_version=dataset.version @@ -674,13 +674,13 @@ def test_fragment_update_columns_partial_update(tmp_path): # Get the fragment and update columns fragment = dataset.get_fragment(0) - updated_fragment, fields_modified = fragment.update_columns(update_data) + updated_fragment, bitmap_prune_field_ids = fragment.update_columns(update_data) # Commit the changes op = LanceOperation.Update( updated_fragments=[updated_fragment], - bitmap_prune_field_ids=fields_modified, + bitmap_prune_field_ids=bitmap_prune_field_ids, ) updated_dataset = lance.LanceDataset.commit( str(dataset_uri), op, read_version=dataset.version @@ -717,13 +717,13 @@ def test_fragment_update_columns_no_match(tmp_path): # Get the fragment and update columns fragment = dataset.get_fragment(0) - updated_fragment, fields_modified = fragment.update_columns(update_data) + updated_fragment, bitmap_prune_field_ids = fragment.update_columns(update_data) # Commit the changes op = LanceOperation.Update( updated_fragments=[updated_fragment], - bitmap_prune_field_ids=fields_modified, + bitmap_prune_field_ids=bitmap_prune_field_ids, ) updated_dataset = lance.LanceDataset.commit( str(dataset_uri), op, read_version=dataset.version From 5be50bde4dce699e0896d3102b3fd6a4f342621a Mon Sep 17 00:00:00 2001 From: yanghua Date: Wed, 29 Oct 2025 14:56:44 +0800 Subject: [PATCH 17/17] refactor code --- python/python/tests/test_fragment.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/python/tests/test_fragment.py b/python/python/tests/test_fragment.py index 9395f70b14f..fef7c1c7eff 100644 --- a/python/python/tests/test_fragment.py +++ b/python/python/tests/test_fragment.py @@ -542,7 +542,7 @@ def test_fragment_update_columns_basic(tmp_path): fragment = dataset.get_fragment(0) updated_fragment, bitmap_prune_field_ids = fragment.update_columns(update_data) - # Verify fields_modified is returned + # Verify bitmap_prune_field_ids is returned assert isinstance(bitmap_prune_field_ids, list) assert len(bitmap_prune_field_ids) > 0