From 2dfe9ee1b16f493c23c62e73f9fcc1ee05fcc48d Mon Sep 17 00:00:00 2001 From: yanghua Date: Fri, 30 May 2025 09:31:40 +0800 Subject: [PATCH 1/2] feat(java): support checkout version --- java/core/lance-jni/src/blocking_dataset.rs | 26 +++++++++++++++ .../main/java/com/lancedb/lance/Dataset.java | 25 ++++++++++++++ .../java/com/lancedb/lance/DatasetTest.java | 33 +++++++++++++++++++ 3 files changed, 84 insertions(+) diff --git a/java/core/lance-jni/src/blocking_dataset.rs b/java/core/lance-jni/src/blocking_dataset.rs index a3e7def71ac..7382e96b7a6 100644 --- a/java/core/lance-jni/src/blocking_dataset.rs +++ b/java/core/lance-jni/src/blocking_dataset.rs @@ -685,6 +685,32 @@ fn inner_latest_version(env: &mut JNIEnv, java_dataset: JObject) -> Result dataset_guard.latest_version() } +#[no_mangle] +pub extern "system" fn Java_com_lancedb_lance_Dataset_nativeCheckoutVersion<'local>( + mut env: JNIEnv<'local>, + java_dataset: JObject, + version: jlong, +) -> JObject<'local> { + ok_or_throw!(env, inner_checkout_version(&mut env, java_dataset, version)) +} + +fn inner_checkout_version<'local>( + env: &mut JNIEnv<'local>, + java_dataset: JObject, + version: jlong, +) -> Result> { + let new_dataset = { + let dataset_guard = + unsafe { env.get_rust_field::<_, _, BlockingDataset>(java_dataset, NATIVE_DATASET) }?; + + let version_u64 = version as u64; + RT.block_on(dataset_guard.inner.checkout_version(version_u64))? + }; + + let blocking_dataset = BlockingDataset { inner: new_dataset }; + blocking_dataset.into_java(env) +} + #[no_mangle] pub extern "system" fn Java_com_lancedb_lance_Dataset_nativeCountRows( mut env: JNIEnv, diff --git a/java/core/src/main/java/com/lancedb/lance/Dataset.java b/java/core/src/main/java/com/lancedb/lance/Dataset.java index 9cb6d27ccf4..e4de13d01d0 100644 --- a/java/core/src/main/java/com/lancedb/lance/Dataset.java +++ b/java/core/src/main/java/com/lancedb/lance/Dataset.java @@ -472,6 +472,31 @@ public long latestVersion() { private native long nativeLatestVersion(); + /** + * Checks out a specific version of the dataset. If the version is already checked out, it returns + * the current instance. + * + * @param version the version to check out + * @return a new Dataset instance with the specified version checked out + */ + public Dataset checkoutVersion(long version) { + if (version < 1) { + throw new IllegalArgumentException("Version must be greater than 0"); + } + + try (LockManager.ReadLock readLock = lockManager.acquireReadLock()) { + Preconditions.checkArgument(nativeDatasetHandle != 0, "Dataset is closed"); + + if (this.version() == version) { + return this; + } + + return nativeCheckoutVersion(version); + } + } + + private native Dataset nativeCheckoutVersion(long version); + /** * Creates a new index on the dataset. Only vector indexes are supported. * diff --git a/java/core/src/test/java/com/lancedb/lance/DatasetTest.java b/java/core/src/test/java/com/lancedb/lance/DatasetTest.java index f624d7c3227..7e0921256fe 100644 --- a/java/core/src/test/java/com/lancedb/lance/DatasetTest.java +++ b/java/core/src/test/java/com/lancedb/lance/DatasetTest.java @@ -155,6 +155,39 @@ void testDatasetVersion() { } } + @Test + void testDatasetCheckoutVersion() { + String datasetPath = tempDir.resolve("dataset_checkout_version").toString(); + try (RootAllocator allocator = new RootAllocator(Long.MAX_VALUE)) { + TestUtils.SimpleTestDataset testDataset = + new TestUtils.SimpleTestDataset(allocator, datasetPath); + + // version 1, empty dataset + try (Dataset dataset = testDataset.createEmptyDataset()) { + assertEquals(1, dataset.version()); + assertEquals(1, dataset.latestVersion()); + assertEquals(0, dataset.countRows()); + } + + // write first batch of data, version 2 + try (Dataset dataset2 = testDataset.write(1, 5)) { + assertEquals(1, dataset.version()); + assertEquals(2, dataset.latestVersion()); + assertEquals(0, dataset.countRows()); + assertEquals(2, dataset2.version()); + assertEquals(2, dataset2.latestVersion()); + assertEquals(5, dataset2.countRows()); + } + + // checkout the dataset at version 1 + try (Dataset checkoutV1 = dataset.checkoutVersion(1)) { + assertEquals(1, checkoutV1.version()); + assertEquals(2, checkoutV1.latestVersion()); + assertEquals(0, checkoutV1.countRows()); + } + } + } + @Test void testDatasetUri() { String datasetPath = tempDir.resolve("dataset_uri").toString(); From 080ab334b4d70953a8d7c98aa0cf6e242769f7a3 Mon Sep 17 00:00:00 2001 From: yanghua Date: Fri, 30 May 2025 09:42:33 +0800 Subject: [PATCH 2/2] feat(java): support checkout version --- .../test/java/com/lancedb/lance/DatasetTest.java | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/java/core/src/test/java/com/lancedb/lance/DatasetTest.java b/java/core/src/test/java/com/lancedb/lance/DatasetTest.java index 7e0921256fe..46434121055 100644 --- a/java/core/src/test/java/com/lancedb/lance/DatasetTest.java +++ b/java/core/src/test/java/com/lancedb/lance/DatasetTest.java @@ -171,19 +171,16 @@ void testDatasetCheckoutVersion() { // write first batch of data, version 2 try (Dataset dataset2 = testDataset.write(1, 5)) { - assertEquals(1, dataset.version()); - assertEquals(2, dataset.latestVersion()); - assertEquals(0, dataset.countRows()); assertEquals(2, dataset2.version()); assertEquals(2, dataset2.latestVersion()); assertEquals(5, dataset2.countRows()); - } - // checkout the dataset at version 1 - try (Dataset checkoutV1 = dataset.checkoutVersion(1)) { - assertEquals(1, checkoutV1.version()); - assertEquals(2, checkoutV1.latestVersion()); - assertEquals(0, checkoutV1.countRows()); + // checkout the dataset at version 1 + try (Dataset checkoutV1 = dataset2.checkoutVersion(1)) { + assertEquals(1, checkoutV1.version()); + assertEquals(2, checkoutV1.latestVersion()); + assertEquals(0, checkoutV1.countRows()); + } } } }