From 41ac2e98ac54109cdf32d0f72907b725b4c7101b Mon Sep 17 00:00:00 2001 From: YueZhang Date: Wed, 31 Dec 2025 14:10:41 +0800 Subject: [PATCH 1/7] feat:support truncate table api --- java/lance-jni/src/blocking_dataset.rs | 15 +++++ java/src/main/java/org/lance/Dataset.java | 13 ++++ .../org/lance/operation/TruncateTest.java | 62 +++++++++++++++++++ python/python/lance/dataset.py | 8 +++ python/python/tests/test_dataset.py | 18 ++++++ python/src/dataset.rs | 8 +++ rust/lance/src/dataset.rs | 14 +++++ rust/lance/src/dataset/tests/dataset_io.rs | 29 +++++++++ 8 files changed, 167 insertions(+) create mode 100644 java/src/test/java/org/lance/operation/TruncateTest.java diff --git a/java/lance-jni/src/blocking_dataset.rs b/java/lance-jni/src/blocking_dataset.rs index 55b108dd9cc..b2362d9ff70 100644 --- a/java/lance-jni/src/blocking_dataset.rs +++ b/java/lance-jni/src/blocking_dataset.rs @@ -1616,6 +1616,21 @@ fn inner_delete(env: &mut JNIEnv, java_dataset: JObject, predicate: JString) -> Ok(()) } +#[no_mangle] +pub extern "system" fn Java_org_lance_Dataset_nativeTruncateTable( + mut env: JNIEnv, + java_dataset: JObject, +) { + ok_or_throw_without_return!(env, inner_truncate_table(&mut env, java_dataset)) +} + +fn inner_truncate_table(env: &mut JNIEnv, java_dataset: JObject) -> Result<()> { + let mut dataset_guard = + unsafe { env.get_rust_field::<_, _, BlockingDataset>(java_dataset, NATIVE_DATASET) }?; + RT.block_on(dataset_guard.inner.truncate_table())?; + Ok(()) +} + ////////////////////////////// // Schema evolution Methods // ////////////////////////////// diff --git a/java/src/main/java/org/lance/Dataset.java b/java/src/main/java/org/lance/Dataset.java index 76e854afe64..7903b782717 100644 --- a/java/src/main/java/org/lance/Dataset.java +++ b/java/src/main/java/org/lance/Dataset.java @@ -636,6 +636,19 @@ public void delete(String predicate) { private native void nativeDelete(String predicate); + /** + * Truncate the dataset by committing an empty manifest using overwrite mode. + * Preserves the schema and creates a new version without fragments. + */ + public void truncateTable() { + try (LockManager.WriteLock writeLock = lockManager.acquireWriteLock()) { + Preconditions.checkArgument(nativeDatasetHandle != 0, "Dataset is closed"); + nativeTruncateTable(); + } + } + + private native void nativeTruncateTable(); + /** * Gets the URI of the dataset. * diff --git a/java/src/test/java/org/lance/operation/TruncateTest.java b/java/src/test/java/org/lance/operation/TruncateTest.java new file mode 100644 index 00000000000..f6cc9cb3719 --- /dev/null +++ b/java/src/test/java/org/lance/operation/TruncateTest.java @@ -0,0 +1,62 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.lance.operation; + +import org.lance.Dataset; +import org.lance.TestUtils; +import org.lance.FragmentMetadata; +import org.lance.Transaction; + +import org.apache.arrow.memory.RootAllocator; +import org.apache.arrow.vector.types.pojo.Schema; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; + +import java.nio.file.Path; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +public class TruncateTest extends OperationTestBase { + + @Test + void testTruncateTable(@TempDir Path tempDir) throws Exception { + String datasetPath = tempDir.resolve("testTruncate").toString(); + try (RootAllocator allocator = new RootAllocator(Long.MAX_VALUE)) { + TestUtils.SimpleTestDataset testDataset = + new TestUtils.SimpleTestDataset(allocator, datasetPath); + dataset = testDataset.createEmptyDataset(); + + // Append some data + int rowCount = 20; + FragmentMetadata fragmentMeta = testDataset.createNewFragment(rowCount); + Transaction transaction = + dataset + .newTransactionBuilder() + .operation(Append.builder().fragments(java.util.Collections.singletonList(fragmentMeta)).build()) + .build(); + try (Dataset ds1 = transaction.commit()) { + assertEquals(rowCount, ds1.countRows()); + + // Truncate to empty while preserving schema + ds1.truncateTable(); + assertEquals(0, ds1.countRows()); + + try (org.lance.ipc.LanceScanner scanner = ds1.newScan()) { + Schema schemaRes = scanner.schema(); + assertEquals(testDataset.getSchema(), schemaRes); + } + } + } + } +} diff --git a/python/python/lance/dataset.py b/python/python/lance/dataset.py index 99f44f9c6ce..ba806ccc35e 100644 --- a/python/python/lance/dataset.py +++ b/python/python/lance/dataset.py @@ -2007,6 +2007,14 @@ def delete( predicate = str(predicate) self._ds.delete(predicate, conflict_retries, retry_timeout) + def truncate_table(self) -> None: + """ + Truncate the dataset to zero rows by committing an empty manifest. + Preserves the schema and creates a new version without fragments. + """ + self._ds.truncate_table() + self._list_indices_res = None + def insert( self, data: ReaderLike, diff --git a/python/python/tests/test_dataset.py b/python/python/tests/test_dataset.py index 9ddb6db6881..022d1dcd71f 100644 --- a/python/python/tests/test_dataset.py +++ b/python/python/tests/test_dataset.py @@ -108,6 +108,24 @@ def test_dataset_overwrite(tmp_path: Path): assert ds_v1.to_table() == table1 +def test_truncate_table(tmp_path: Path): + base_dir = tmp_path / "truncate" + table = pa.table( + { + "i": pa.array([1, 2, 3], pa.int32()), + "dict": pa.DictionaryArray.from_arrays( + pa.array([0, 1, 2], pa.uint16()), pa.array(["a", "b", "c"]) + ), + } + ) + ds = lance.write_dataset(table, base_dir, data_storage_version="stable") + assert ds.count_rows() == 3 + + ds.truncate_table() + assert ds.count_rows() == 0 + assert ds.schema == table.schema + + def test_dataset_append(tmp_path: Path): table = pa.Table.from_pydict({"colA": [1, 2, 3], "colB": [4, 5, 6]}) base_dir = tmp_path / "test" diff --git a/python/src/dataset.rs b/python/src/dataset.rs index 68031cc3c70..7006354a10d 100644 --- a/python/src/dataset.rs +++ b/python/src/dataset.rs @@ -1509,6 +1509,14 @@ impl Dataset { Ok(()) } + fn truncate_table(&mut self) -> PyResult<()> { + let mut new_self = self.ds.as_ref().clone(); + rt().block_on(None, new_self.truncate_table())? + .map_err(|err: lance::Error| PyIOError::new_err(err.to_string()))?; + self.ds = Arc::new(new_self); + Ok(()) + } + /// Cleanup old versions from the dataset #[pyo3(signature = (older_than_micros = None, retain_versions = None, delete_unverified = None, error_if_tagged_old_versions = None))] fn cleanup_old_versions( diff --git a/rust/lance/src/dataset.rs b/rust/lance/src/dataset.rs index 547dfaf8b9e..0410d12e33b 100644 --- a/rust/lance/src/dataset.rs +++ b/rust/lance/src/dataset.rs @@ -1543,6 +1543,20 @@ impl Dataset { info!(target: TRACE_DATASET_EVENTS, event=DATASET_DELETING_EVENT, uri = &self.uri, predicate=predicate); write::delete::delete(self, predicate).await } + + /// Truncate the dataset by committing an empty fragments manifest. + pub async fn truncate_table(&mut self) -> Result<()> { + let op = Operation::Overwrite { + fragments: Vec::new(), + schema: self.schema().clone(), + config_upsert_values: None, + initial_bases: None, + }; + let transaction = Transaction::new(self.manifest.version, op, None); + self.apply_commit(transaction, &Default::default(), &Default::default()) + .await?; + Ok(()) + } /// Add new base paths to the dataset. /// diff --git a/rust/lance/src/dataset/tests/dataset_io.rs b/rust/lance/src/dataset/tests/dataset_io.rs index 0c163de3f96..1a74021f99d 100644 --- a/rust/lance/src/dataset/tests/dataset_io.rs +++ b/rust/lance/src/dataset/tests/dataset_io.rs @@ -40,6 +40,35 @@ use futures::TryStreamExt; use lance_table::io::manifest::read_manifest; use rstest::rstest; +#[tokio::test] +async fn test_truncate_table() { + let tmpdir = tempfile::tempdir().unwrap(); + let path = tmpdir.path(); + create_file(path, WriteMode::Create, LanceFileVersion::V2_2).await; + + let uri = path.to_str().unwrap(); + let mut ds = Dataset::open(uri).await.unwrap(); + let rows_before = ds.count_rows(None).await.unwrap(); + assert!(rows_before > 0); + + ds.truncate_table().await.unwrap(); + + let rows_after = ds.count_rows(None).await.unwrap(); + assert_eq!(rows_after, 0); + assert_eq!(ds.count_fragments(), 0); + + let expected_schema = Arc::new(ArrowSchema::new(vec![ + ArrowField::new("i", DataType::Int32, false), + ArrowField::new( + "dict", + DataType::Dictionary(Box::new(DataType::UInt16), Box::new(DataType::Utf8)), + false, + ), + ])); + let actual_schema = ArrowSchema::from(ds.schema()); + assert_eq!(&actual_schema, expected_schema.as_ref()); +} + #[rstest] #[lance_test_macros::test(tokio::test)] async fn test_create_dataset( From 0affa853e326d8e6642557dee61bd2c69e1806f6 Mon Sep 17 00:00:00 2001 From: YueZhang Date: Wed, 31 Dec 2025 14:25:36 +0800 Subject: [PATCH 2/7] fmt --- rust/lance/src/dataset.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rust/lance/src/dataset.rs b/rust/lance/src/dataset.rs index 2f283469b18..08e0d37d416 100644 --- a/rust/lance/src/dataset.rs +++ b/rust/lance/src/dataset.rs @@ -1562,7 +1562,7 @@ impl Dataset { info!(target: TRACE_DATASET_EVENTS, event=DATASET_DELETING_EVENT, uri = &self.uri, predicate=predicate); write::delete::delete(self, predicate).await } - + /// Truncate the dataset by committing an empty fragments manifest. pub async fn truncate_table(&mut self) -> Result<()> { let op = Operation::Overwrite { From 05e61d9add93de89dcd327cd9eb121f29e74c733 Mon Sep 17 00:00:00 2001 From: YueZhang Date: Wed, 31 Dec 2025 14:34:54 +0800 Subject: [PATCH 3/7] fmt --- java/src/main/java/org/lance/Dataset.java | 4 ++-- java/src/test/java/org/lance/operation/TruncateTest.java | 7 +++++-- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/java/src/main/java/org/lance/Dataset.java b/java/src/main/java/org/lance/Dataset.java index 090f89cbe55..a149457ee66 100644 --- a/java/src/main/java/org/lance/Dataset.java +++ b/java/src/main/java/org/lance/Dataset.java @@ -638,8 +638,8 @@ public void delete(String predicate) { private native void nativeDelete(String predicate); /** - * Truncate the dataset by committing an empty manifest using overwrite mode. - * Preserves the schema and creates a new version without fragments. + * Truncate the dataset by committing an empty manifest using overwrite mode. Preserves the schema + * and creates a new version without fragments. */ public void truncateTable() { try (LockManager.WriteLock writeLock = lockManager.acquireWriteLock()) { diff --git a/java/src/test/java/org/lance/operation/TruncateTest.java b/java/src/test/java/org/lance/operation/TruncateTest.java index f6cc9cb3719..93f5b689e8c 100644 --- a/java/src/test/java/org/lance/operation/TruncateTest.java +++ b/java/src/test/java/org/lance/operation/TruncateTest.java @@ -14,8 +14,8 @@ package org.lance.operation; import org.lance.Dataset; -import org.lance.TestUtils; import org.lance.FragmentMetadata; +import org.lance.TestUtils; import org.lance.Transaction; import org.apache.arrow.memory.RootAllocator; @@ -43,7 +43,10 @@ void testTruncateTable(@TempDir Path tempDir) throws Exception { Transaction transaction = dataset .newTransactionBuilder() - .operation(Append.builder().fragments(java.util.Collections.singletonList(fragmentMeta)).build()) + .operation( + Append.builder() + .fragments(java.util.Collections.singletonList(fragmentMeta)) + .build()) .build(); try (Dataset ds1 = transaction.commit()) { assertEquals(rowCount, ds1.countRows()); From 371f40a391645642b0658417727dfda73a253213 Mon Sep 17 00:00:00 2001 From: YueZhang Date: Tue, 6 Jan 2026 19:28:59 +0800 Subject: [PATCH 4/7] code review --- java/src/main/java/org/lance/Dataset.java | 3 +-- python/python/lance/dataset.py | 4 ++-- python/src/dataset.rs | 1 + rust/lance/src/dataset.rs | 13 ++----------- 4 files changed, 6 insertions(+), 15 deletions(-) diff --git a/java/src/main/java/org/lance/Dataset.java b/java/src/main/java/org/lance/Dataset.java index a149457ee66..1097f1f1267 100644 --- a/java/src/main/java/org/lance/Dataset.java +++ b/java/src/main/java/org/lance/Dataset.java @@ -638,8 +638,7 @@ public void delete(String predicate) { private native void nativeDelete(String predicate); /** - * Truncate the dataset by committing an empty manifest using overwrite mode. Preserves the schema - * and creates a new version without fragments. + * Truncate the dataset by deleting all rows. The schema is preserved and a new version is created. */ public void truncateTable() { try (LockManager.WriteLock writeLock = lockManager.acquireWriteLock()) { diff --git a/python/python/lance/dataset.py b/python/python/lance/dataset.py index 07e293f0876..dcef75ad9a8 100644 --- a/python/python/lance/dataset.py +++ b/python/python/lance/dataset.py @@ -1988,8 +1988,8 @@ def delete( def truncate_table(self) -> None: """ - Truncate the dataset to zero rows by committing an empty manifest. - Preserves the schema and creates a new version without fragments. + Truncate the dataset to zero rows by deleting all rows. + The schema is preserved and a new version is created. """ self._ds.truncate_table() self._list_indices_res = None diff --git a/python/src/dataset.rs b/python/src/dataset.rs index f5e6dec126e..ff281e5962e 100644 --- a/python/src/dataset.rs +++ b/python/src/dataset.rs @@ -1509,6 +1509,7 @@ impl Dataset { Ok(()) } + /// Truncate the dataset by deleting all rows. The schema is preserved and a new version is created. fn truncate_table(&mut self) -> PyResult<()> { let mut new_self = self.ds.as_ref().clone(); rt().block_on(None, new_self.truncate_table())? diff --git a/rust/lance/src/dataset.rs b/rust/lance/src/dataset.rs index 08e0d37d416..f766c31d49c 100644 --- a/rust/lance/src/dataset.rs +++ b/rust/lance/src/dataset.rs @@ -1563,18 +1563,9 @@ impl Dataset { write::delete::delete(self, predicate).await } - /// Truncate the dataset by committing an empty fragments manifest. + /// Truncate the dataset by deleting all rows. pub async fn truncate_table(&mut self) -> Result<()> { - let op = Operation::Overwrite { - fragments: Vec::new(), - schema: self.schema().clone(), - config_upsert_values: None, - initial_bases: None, - }; - let transaction = Transaction::new(self.manifest.version, op, None); - self.apply_commit(transaction, &Default::default(), &Default::default()) - .await?; - Ok(()) + self.delete("true").await } /// Add new base paths to the dataset. From 1416a6d4b7475f47e9466ba11e79f231c1b4fffa Mon Sep 17 00:00:00 2001 From: YueZhang Date: Tue, 6 Jan 2026 19:30:19 +0800 Subject: [PATCH 5/7] code review --- python/python/lance/dataset.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/python/lance/dataset.py b/python/python/lance/dataset.py index dcef75ad9a8..c10dc1a214b 100644 --- a/python/python/lance/dataset.py +++ b/python/python/lance/dataset.py @@ -1988,7 +1988,7 @@ def delete( def truncate_table(self) -> None: """ - Truncate the dataset to zero rows by deleting all rows. + Truncate the dataset by deleting all rows. The schema is preserved and a new version is created. """ self._ds.truncate_table() From 43eb27e9ef26f44e2e2b950b67c6a131e0efec20 Mon Sep 17 00:00:00 2001 From: YueZhang Date: Tue, 6 Jan 2026 19:35:22 +0800 Subject: [PATCH 6/7] code review --- java/src/main/java/org/lance/Dataset.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/java/src/main/java/org/lance/Dataset.java b/java/src/main/java/org/lance/Dataset.java index 1097f1f1267..8f9e5463e8d 100644 --- a/java/src/main/java/org/lance/Dataset.java +++ b/java/src/main/java/org/lance/Dataset.java @@ -638,7 +638,8 @@ public void delete(String predicate) { private native void nativeDelete(String predicate); /** - * Truncate the dataset by deleting all rows. The schema is preserved and a new version is created. + * Truncate the dataset by deleting all rows. The schema is preserved and a new version is + * created. */ public void truncateTable() { try (LockManager.WriteLock writeLock = lockManager.acquireWriteLock()) { From 4c12a92420794f5dd6448e80071c45858b8d4baf Mon Sep 17 00:00:00 2001 From: YueZhang Date: Tue, 6 Jan 2026 19:51:30 +0800 Subject: [PATCH 7/7] code review --- python/src/dataset.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/src/dataset.rs b/python/src/dataset.rs index ff281e5962e..d2dd2e1fea4 100644 --- a/python/src/dataset.rs +++ b/python/src/dataset.rs @@ -1509,7 +1509,7 @@ impl Dataset { Ok(()) } - /// Truncate the dataset by deleting all rows. The schema is preserved and a new version is created. + /// Truncate the dataset by deleting all rows. The schema is preserved and a new version is created. fn truncate_table(&mut self) -> PyResult<()> { let mut new_self = self.ds.as_ref().clone(); rt().block_on(None, new_self.truncate_table())?