diff --git a/java/lance-jni/src/file_writer.rs b/java/lance-jni/src/file_writer.rs index 57d6a2be570..600d7de2845 100644 --- a/java/lance-jni/src/file_writer.rs +++ b/java/lance-jni/src/file_writer.rs @@ -149,6 +149,35 @@ pub extern "system" fn Java_org_lance_file_LanceFileWriter_closeNative<'local>( JObject::null() } +#[no_mangle] +pub extern "system" fn Java_org_lance_file_LanceFileWriter_nativeAddSchemaMetadata<'local>( + mut env: JNIEnv<'local>, + writer: JObject, + schema_metadata: JObject, // Map +) -> JObject<'local> { + if let Err(e) = inner_add_schema_metadata(&mut env, writer, schema_metadata) { + e.throw(&mut env); + return JObject::null(); + } + JObject::null() +} + +fn inner_add_schema_metadata( + env: &mut JNIEnv<'_>, + writer: JObject, + schema_metadata: JObject, // Map +) -> Result<()> { + let metadata_map = JMap::from_env(env, &schema_metadata)?; + let metadata = to_rust_map(env, &metadata_map)?; + let writer_guard = + unsafe { env.get_rust_field::<_, _, BlockingFileWriter>(writer, NATIVE_WRITER) }?; + let mut writer = writer_guard.inner.lock().unwrap(); + metadata.into_iter().for_each(|(k, v)| { + writer.add_schema_metadata(k, v); + }); + Ok(()) +} + #[no_mangle] pub extern "system" fn Java_org_lance_file_LanceFileWriter_writeNative<'local>( mut env: JNIEnv<'local>, diff --git a/java/src/main/java/org/lance/file/LanceFileWriter.java b/java/src/main/java/org/lance/file/LanceFileWriter.java index 1ca8b47324e..0143daee1ff 100644 --- a/java/src/main/java/org/lance/file/LanceFileWriter.java +++ b/java/src/main/java/org/lance/file/LanceFileWriter.java @@ -120,6 +120,23 @@ public void write(VectorSchemaRoot batch) throws IOException { } } + /** + * Add a schema metadata map to underlying file, the provided key-value pairs will override + * existing ones with the same keys. User can retrieve those values from {@link + * LanceFileReader#schema() reader schema}. + * + *

Note that this method does not write metadata to underlying file immediately. These metadata + * will be maintained in an in-memory hashmap, and be flushed to file footer on close. + * + * @param metadata metadata + * @throws IOException IOException + */ + public void addSchemaMetadata(Map metadata) throws IOException { + nativeAddSchemaMetadata(metadata); + } + + private native void nativeAddSchemaMetadata(Map metadata) throws IOException; + /** * Close the LanceFileWriter * diff --git a/java/src/test/java/org/lance/FileReaderWriterTest.java b/java/src/test/java/org/lance/FileReaderWriterTest.java index 116c8689d75..c645acdcaa2 100644 --- a/java/src/test/java/org/lance/FileReaderWriterTest.java +++ b/java/src/test/java/org/lance/FileReaderWriterTest.java @@ -27,6 +27,7 @@ import org.apache.arrow.vector.types.pojo.Field; import org.apache.arrow.vector.types.pojo.Schema; import org.apache.arrow.vector.util.Text; +import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.io.TempDir; @@ -258,4 +259,49 @@ void testInvalidPath() { } catch (IOException e) { } } + + @Test + void testWriteSchemaMetadata(@TempDir Path tempDir) throws Exception { + String filePath = tempDir.resolve("write_schema_metadata.lance").toString(); + BufferAllocator allocator = new RootAllocator(); + try (LanceFileWriter writer = LanceFileWriter.open(filePath, allocator, null)) { + try (VectorSchemaRoot batch = createBatch(allocator)) { + writer.write(batch); + writer.addSchemaMetadata(Collections.singletonMap("testKey", "testValue")); + writer.write(batch); + // repeatedly write + writer.addSchemaMetadata(Collections.singletonMap("testKey1", "testValue1")); + // test override + writer.addSchemaMetadata(Collections.singletonMap("testKey", "newTestValue")); + } + } + + try (LanceFileReader reader = LanceFileReader.open(filePath, allocator)) { + Schema fileSchema = reader.schema(); + Map metadata = fileSchema.getCustomMetadata(); + + Assertions.assertTrue(metadata.containsKey("testKey")); + Assertions.assertEquals("newTestValue", metadata.get("testKey")); + + Assertions.assertTrue(metadata.containsKey("testKey1")); + Assertions.assertEquals("testValue1", metadata.get("testKey1")); + } + } + + @Test + void testWriteNullSchemaMetadata(@TempDir Path tempDir) throws Exception { + String filePath = tempDir.resolve("write_null_schema_metadata.lance").toString(); + BufferAllocator allocator = new RootAllocator(); + try (LanceFileWriter writer = LanceFileWriter.open(filePath, allocator, null)) { + try (VectorSchemaRoot batch = createBatch(allocator)) { + writer.write(batch); + Assertions.assertThrows( + Exception.class, + () -> writer.addSchemaMetadata(Collections.singletonMap("someKey", null))); + Assertions.assertThrows( + Exception.class, + () -> writer.addSchemaMetadata(Collections.singletonMap(null, "someValue"))); + } + } + } }