Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
71 changes: 71 additions & 0 deletions java/core/lance-jni/src/blocking_dataset.rs
Original file line number Diff line number Diff line change
Expand Up @@ -239,6 +239,19 @@ impl BlockingDataset {
Ok(())
}

pub fn replace_schema_metadata(&mut self, metadata: HashMap<String, String>) -> Result<()> {
RT.block_on(self.inner.replace_schema_metadata(metadata))?;
Ok(())
}

pub fn replace_field_metadata(
&mut self,
metadata_map: HashMap<u32, HashMap<String, String>>,
) -> Result<()> {
RT.block_on(self.inner.replace_field_metadata(metadata_map))?;
Ok(())
}

pub fn close(&self) {}
}

Expand Down Expand Up @@ -1603,3 +1616,61 @@ fn inner_get_version_by_tag(
{ unsafe { env.get_rust_field::<_, _, BlockingDataset>(java_dataset, NATIVE_DATASET) }? };
dataset_guard.get_version(tag.as_str())
}

#[no_mangle]
pub extern "system" fn Java_com_lancedb_lance_Dataset_nativeReplaceSchemaMetadata(
mut env: JNIEnv,
java_dataset: JObject,
jschema_metadata: JObject,
) {
ok_or_throw_without_return!(
env,
inner_replace_schema_metadata(&mut env, java_dataset, jschema_metadata)
)
}

fn inner_replace_schema_metadata(
env: &mut JNIEnv,
java_dataset: JObject,
jschema_metadata: JObject,
) -> Result<()> {
let jmap = JMap::from_env(env, &jschema_metadata)?;
let schema_metadata = to_rust_map(env, &jmap)?;
let mut dataset_guard =
{ unsafe { env.get_rust_field::<_, _, BlockingDataset>(java_dataset, NATIVE_DATASET) }? };
dataset_guard.replace_schema_metadata(schema_metadata)
}

#[no_mangle]
pub extern "system" fn Java_com_lancedb_lance_Dataset_nativeReplaceFieldMetadata(
mut env: JNIEnv,
java_dataset: JObject,
jfield_metadata_map: JObject,
) {
ok_or_throw_without_return!(
env,
inner_replace_field_metadata(&mut env, java_dataset, jfield_metadata_map)
)
}

fn inner_replace_field_metadata(
env: &mut JNIEnv,
java_dataset: JObject,
jfield_metadata_map: JObject,
) -> Result<()> {
let jmap = JMap::from_env(env, &jfield_metadata_map)?;
let mut field_metadata_map = HashMap::new();
let mut iter = jmap.iter(env)?;
env.with_local_frame(16, |env| {
while let Some((key, value)) = iter.next(env)? {
let field_id = env.call_method(&key, "intValue", "()I", &[])?.i()? as u32;
let inner_map = JMap::from_env(env, &value)?;
let value_map = to_rust_map(env, &inner_map)?;
field_metadata_map.insert(field_id, value_map);
}
Ok::<(), Error>(())
})?;
let mut dataset_guard =
{ unsafe { env.get_rust_field::<_, _, BlockingDataset>(java_dataset, NATIVE_DATASET) }? };
dataset_guard.replace_field_metadata(field_metadata_map)
}
32 changes: 32 additions & 0 deletions java/core/src/main/java/com/lancedb/lance/Dataset.java
Original file line number Diff line number Diff line change
Expand Up @@ -787,6 +787,38 @@ public Tags tags() {
return new Tags();
}

/**
* Replace the schema metadata of the dataset.
*
* @param metadata the new table metadata
*/
public void replaceSchemaMetadata(Map<String, String> metadata) {
Comment thread
yanghua marked this conversation as resolved.
try (LockManager.WriteLock writeLock = lockManager.acquireWriteLock()) {
Preconditions.checkArgument(nativeDatasetHandle != 0, "Dataset is closed");
nativeReplaceSchemaMetadata(metadata);
}
}

private native void nativeReplaceSchemaMetadata(Map<String, String> metadata);

/**
* Replace target field metadata of the dataset. This method won't affect fields not in the map
*
* @param fieldMetadataMap field id to metadata map
*/
public void replaceFieldMetadata(Map<Integer, Map<String, String>> fieldMetadataMap) {
try (LockManager.WriteLock writeLock = lockManager.acquireWriteLock()) {
Preconditions.checkArgument(nativeDatasetHandle != 0, "Dataset is closed");
for (Integer fieldId : fieldMetadataMap.keySet()) {
Preconditions.checkArgument(fieldId >= 0, "Field id must be greater than 0");
}
nativeReplaceFieldMetadata(fieldMetadataMap);
}
}

private native void nativeReplaceFieldMetadata(
Map<Integer, Map<String, String>> fieldMetadataMap);

/** Tag operations of the dataset. */
public class Tags {

Expand Down
71 changes: 71 additions & 0 deletions java/core/src/test/java/com/lancedb/lance/DatasetTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@

import com.lancedb.lance.ipc.LanceScanner;
import com.lancedb.lance.schema.ColumnAlteration;
import com.lancedb.lance.schema.LanceField;
import com.lancedb.lance.schema.SqlExpressions;

import org.apache.arrow.c.ArrowArrayStream;
Expand Down Expand Up @@ -847,4 +848,74 @@ void testGetLanceSchema(@TempDir Path tempDir) {
assertEquals(testDataset.getSchema(), dataset.getLanceSchema().asArrowSchema());
}
}

@Test
void testReplaceSchemaMetadata(@TempDir Path tempDir) {
String testMethodName = new Object() {}.getClass().getEnclosingMethod().getName();
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can just use void testXXX(@TempDir Path tempDir) { ...}

Copy link
Copy Markdown
Contributor Author

@majin1102 majin1102 Jul 17, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes.

I think that would be better.

Do we need to use this pattern just in this PR or raise another PR to change the whole DatasetTest? Or just modify the whole DatasetTest in this PR. I used to keep PRs small, but not preferable.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added an issue tracking this, we can do it separately

String datasetPath = tempDir.resolve(testMethodName).toString();
try (RootAllocator allocator = new RootAllocator(Long.MAX_VALUE)) {
TestUtils.SimpleTestDataset testDataset =
new TestUtils.SimpleTestDataset(allocator, datasetPath);
dataset = testDataset.createEmptyDataset();
assertEquals(1, dataset.version());
Map<String, String> replaceMetadata = new HashMap<>();
replaceMetadata.put("key1", "value1");
replaceMetadata.put("key2", "value2");
dataset.replaceSchemaMetadata(replaceMetadata);
assertEquals(2, dataset.version());
Map<String, String> currentMetadata = dataset.getSchema().getCustomMetadata();
for (String configKey : currentMetadata.keySet()) {
assertEquals(currentMetadata.get(configKey), replaceMetadata.get(configKey));
}
assertEquals(replaceMetadata.size(), currentMetadata.size());

Map<String, String> replaceConfig2 = new HashMap<>();
replaceConfig2.put("key1", "value3");
dataset.replaceSchemaMetadata(replaceConfig2);
currentMetadata = dataset.getSchema().getCustomMetadata();
assertEquals(3, dataset.version());
assertEquals(1, currentMetadata.size());
assertEquals("value3", currentMetadata.get("key1"));
}
}

@Test
void testReplaceFieldConfig(@TempDir Path tempDir) {
String testMethodName = new Object() {}.getClass().getEnclosingMethod().getName();
String datasetPath = tempDir.resolve(testMethodName).toString();
try (RootAllocator allocator = new RootAllocator(Long.MAX_VALUE)) {
TestUtils.SimpleTestDataset testDataset =
new TestUtils.SimpleTestDataset(allocator, datasetPath);
dataset = testDataset.createEmptyDataset();
assertEquals(1, dataset.version());
LanceField field = dataset.getLanceSchema().fields().get(0);
Map<String, String> replaceMetadata = new HashMap<>();
replaceMetadata.put("key1", "value1");
replaceMetadata.put("key2", "value2");
dataset.replaceFieldMetadata(Collections.singletonMap(field.getId(), replaceMetadata));
assertEquals(2, dataset.version());
Map<String, String> currentMetadata = dataset.getSchema().getFields().get(0).getMetadata();
for (String configKey : currentMetadata.keySet()) {
assertEquals(currentMetadata.get(configKey), replaceMetadata.get(configKey));
}
assertEquals(replaceMetadata.size(), currentMetadata.size());

Map<String, String> replaceConfig2 = new HashMap<>();
Comment thread
jackye1995 marked this conversation as resolved.
replaceConfig2.put("key1", "value3");
dataset.replaceFieldMetadata(Collections.singletonMap(field.getId(), replaceConfig2));
currentMetadata = dataset.getSchema().getFields().get(0).getMetadata();
assertEquals(3, dataset.version());
assertEquals(1, currentMetadata.size());
assertEquals("value3", currentMetadata.get("key1"));

assertThrows(
IllegalArgumentException.class,
() ->
dataset.replaceFieldMetadata(
Collections.singletonMap(Integer.MAX_VALUE, replaceConfig2)));
assertThrows(
IllegalArgumentException.class,
() -> dataset.replaceFieldMetadata(Collections.singletonMap(-1, replaceConfig2)));
}
}
}
15 changes: 14 additions & 1 deletion rust/lance-table/src/format/manifest.rs
Original file line number Diff line number Diff line change
Expand Up @@ -214,9 +214,22 @@ impl Manifest {
/// Replaces the metadata of the field with the given id with the given key-value pairs.
///
/// If the field does not exist in the schema, this is a no-op.
pub fn replace_field_metadata(&mut self, field_id: i32, new_metadata: HashMap<String, String>) {
pub fn replace_field_metadata(
&mut self,
field_id: i32,
new_metadata: HashMap<String, String>,
) -> Result<()> {
if let Some(field) = self.schema.field_by_id_mut(field_id) {
field.metadata = new_metadata;
Ok(())
} else {
Err(Error::invalid_input(
format!(
"Field with id {} does not exist for replace_field_metadata",
field_id
),
location!(),
))
}
}

Expand Down
2 changes: 1 addition & 1 deletion rust/lance/src/dataset/transaction.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1553,7 +1553,7 @@ impl Transaction {
}
if let Some(field_metadata) = field_metadata {
for (field_id, metadata) in field_metadata {
manifest.replace_field_metadata(*field_id as i32, metadata.clone());
manifest.replace_field_metadata(*field_id as i32, metadata.clone())?;
}
}
}
Expand Down
Loading