From 3e55652367c7ce2e9432a3825eb53f67f3e4a441 Mon Sep 17 00:00:00 2001 From: Roee Shlomo Date: Thu, 2 Sep 2021 15:05:49 +0300 Subject: [PATCH 01/21] ARROW-12965: [Java] C Data Interface implementation Add experimental support for C Data Interface Signed-off-by: roee88 Co-authored-by: Doron Chen --- java/ffi/CMakeLists.txt | 51 ++ java/ffi/README.md | 48 ++ java/ffi/pom.xml | 75 ++ java/ffi/src/main/cpp/abi.h | 103 +++ java/ffi/src/main/cpp/jni_wrapper.cc | 241 +++++++ .../org/apache/arrow/ffi/ArrayExporter.java | 149 ++++ .../org/apache/arrow/ffi/ArrayImporter.java | 154 ++++ .../java/org/apache/arrow/ffi/ArrowArray.java | 184 +++++ .../org/apache/arrow/ffi/ArrowSchema.java | 170 +++++ .../java/org/apache/arrow/ffi/BaseStruct.java | 43 ++ .../main/java/org/apache/arrow/ffi/FFI.java | 317 +++++++++ .../arrow/ffi/FFIDictionaryProvider.java | 72 ++ .../apache/arrow/ffi/FFIReferenceManager.java | 114 +++ .../main/java/org/apache/arrow/ffi/Flags.java | 51 ++ .../java/org/apache/arrow/ffi/Format.java | 334 +++++++++ .../java/org/apache/arrow/ffi/Metadata.java | 102 +++ .../java/org/apache/arrow/ffi/NativeUtil.java | 139 ++++ .../org/apache/arrow/ffi/SchemaExporter.java | 132 ++++ .../org/apache/arrow/ffi/SchemaImporter.java | 93 +++ .../org/apache/arrow/ffi/jni/JniLoader.java | 95 +++ .../org/apache/arrow/ffi/jni/JniWrapper.java | 41 ++ .../org/apache/arrow/ffi/jni/PrivateData.java | 31 + .../arrow/vector/StructVectorLoader.java | 143 ++++ .../arrow/vector/StructVectorUnloader.java | 107 +++ .../org/apache/arrow/ffi/DictionaryTest.java | 215 ++++++ .../java/org/apache/arrow/ffi/FlagsTest.java | 75 ++ .../java/org/apache/arrow/ffi/FormatTest.java | 143 ++++ .../org/apache/arrow/ffi/MetadataTest.java | 102 +++ .../org/apache/arrow/ffi/RoundtripTest.java | 665 ++++++++++++++++++ java/pom.xml | 8 + 30 files changed, 4197 insertions(+) create mode 100644 java/ffi/CMakeLists.txt create mode 100644 java/ffi/README.md create mode 100644 java/ffi/pom.xml create mode 100644 java/ffi/src/main/cpp/abi.h create mode 100644 java/ffi/src/main/cpp/jni_wrapper.cc create mode 100644 java/ffi/src/main/java/org/apache/arrow/ffi/ArrayExporter.java create mode 100644 java/ffi/src/main/java/org/apache/arrow/ffi/ArrayImporter.java create mode 100644 java/ffi/src/main/java/org/apache/arrow/ffi/ArrowArray.java create mode 100644 java/ffi/src/main/java/org/apache/arrow/ffi/ArrowSchema.java create mode 100644 java/ffi/src/main/java/org/apache/arrow/ffi/BaseStruct.java create mode 100644 java/ffi/src/main/java/org/apache/arrow/ffi/FFI.java create mode 100644 java/ffi/src/main/java/org/apache/arrow/ffi/FFIDictionaryProvider.java create mode 100644 java/ffi/src/main/java/org/apache/arrow/ffi/FFIReferenceManager.java create mode 100644 java/ffi/src/main/java/org/apache/arrow/ffi/Flags.java create mode 100644 java/ffi/src/main/java/org/apache/arrow/ffi/Format.java create mode 100644 java/ffi/src/main/java/org/apache/arrow/ffi/Metadata.java create mode 100644 java/ffi/src/main/java/org/apache/arrow/ffi/NativeUtil.java create mode 100644 java/ffi/src/main/java/org/apache/arrow/ffi/SchemaExporter.java create mode 100644 java/ffi/src/main/java/org/apache/arrow/ffi/SchemaImporter.java create mode 100644 java/ffi/src/main/java/org/apache/arrow/ffi/jni/JniLoader.java create mode 100644 java/ffi/src/main/java/org/apache/arrow/ffi/jni/JniWrapper.java create mode 100644 java/ffi/src/main/java/org/apache/arrow/ffi/jni/PrivateData.java create mode 100644 java/ffi/src/main/java/org/apache/arrow/vector/StructVectorLoader.java create mode 100644 java/ffi/src/main/java/org/apache/arrow/vector/StructVectorUnloader.java create mode 100644 java/ffi/src/test/java/org/apache/arrow/ffi/DictionaryTest.java create mode 100644 java/ffi/src/test/java/org/apache/arrow/ffi/FlagsTest.java create mode 100644 java/ffi/src/test/java/org/apache/arrow/ffi/FormatTest.java create mode 100644 java/ffi/src/test/java/org/apache/arrow/ffi/MetadataTest.java create mode 100644 java/ffi/src/test/java/org/apache/arrow/ffi/RoundtripTest.java diff --git a/java/ffi/CMakeLists.txt b/java/ffi/CMakeLists.txt new file mode 100644 index 00000000000..b45c82f48b7 --- /dev/null +++ b/java/ffi/CMakeLists.txt @@ -0,0 +1,51 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# +# arrow_ffi_java +# + +# Headers: top level +cmake_minimum_required(VERSION 3.11) +project(arrow_ffi_java) + +# Find java/jni +include(FindJava) +include(UseJava) +include(FindJNI) + +find_package(Java REQUIRED) +find_package(JNI REQUIRED) + +set(JNI_HEADERS_DIR "${CMAKE_CURRENT_BINARY_DIR}/generated") + +include_directories(${CMAKE_CURRENT_BINARY_DIR} ${CMAKE_CURRENT_SOURCE_DIR} + ${JNI_INCLUDE_DIRS} ${JNI_HEADERS_DIR}) + +add_jar(${PROJECT_NAME} + src/main/java/org/apache/arrow/ffi/jni/JniLoader.java + src/main/java/org/apache/arrow/ffi/jni/JniWrapper.java + src/main/java/org/apache/arrow/ffi/jni/PrivateData.java + GENERATE_NATIVE_HEADERS + arrow_ffi_java-native + DESTINATION + ${JNI_HEADERS_DIR}) + +set(SOURCES src/main/cpp/jni_wrapper.cc) +add_library(arrow_ffi_jni SHARED ${SOURCES}) +target_link_libraries(arrow_ffi_jni ${JAVA_JVM_LIBRARY}) +add_dependencies(arrow_ffi_jni ${PROJECT_NAME}) diff --git a/java/ffi/README.md b/java/ffi/README.md new file mode 100644 index 00000000000..c0dfee544b5 --- /dev/null +++ b/java/ffi/README.md @@ -0,0 +1,48 @@ + + +# Java FFI (C Data Interface) + +## Setup Build Environment + +install: + - Java 8 or later + - Maven 3.3 or later + - A C++ compiler + - CMake 3.11 or later + - Make or ninja build utilities + +## Building JNI wrapper shared library + +``` +mkdir -p ./target/build/ +pushd ./target/build/ +cmake ../.. +make +popd +``` + +To use ninja, pass `-GNinja` when calling cmake and then use the `ninja` command instead of `make`. + +## Building and running tests + +``` +cd java +mvn -Parrow-ffi install +``` diff --git a/java/ffi/pom.xml b/java/ffi/pom.xml new file mode 100644 index 00000000000..fef334445ef --- /dev/null +++ b/java/ffi/pom.xml @@ -0,0 +1,75 @@ + + + + + arrow-java-root + org.apache.arrow + 6.0.0-SNAPSHOT + + 4.0.0 + + arrow-ffi + Arrow Java C Data Interface + Java implementation of C Data Interface + jar + + ./target/build + + + + + org.apache.arrow + arrow-vector + ${project.version} + compile + ${arrow.vector.classifier} + + + org.apache.arrow + arrow-vector + ${project.version} + test-jar + test + + + org.apache.arrow + arrow-memory-core + ${project.version} + compile + + + org.apache.arrow + arrow-memory-netty + ${project.version} + test + + + com.google.guava + guava + ${dep.guava.version} + test + + + + + + ${arrow.ffi.cpp.build.dir} + + **/libarrow_ffi_jni.* + + + + + + diff --git a/java/ffi/src/main/cpp/abi.h b/java/ffi/src/main/cpp/abi.h new file mode 100644 index 00000000000..ea7179c725a --- /dev/null +++ b/java/ffi/src/main/cpp/abi.h @@ -0,0 +1,103 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +#define ARROW_FLAG_DICTIONARY_ORDERED 1 +#define ARROW_FLAG_NULLABLE 2 +#define ARROW_FLAG_MAP_KEYS_SORTED 4 + +struct ArrowSchema { + // Array type description + const char* format; + const char* name; + const char* metadata; + int64_t flags; + int64_t n_children; + struct ArrowSchema** children; + struct ArrowSchema* dictionary; + + // Release callback + void (*release)(struct ArrowSchema*); + // Opaque producer-specific data + void* private_data; +}; + +struct ArrowArray { + // Array data description + int64_t length; + int64_t null_count; + int64_t offset; + int64_t n_buffers; + int64_t n_children; + const void** buffers; + struct ArrowArray** children; + struct ArrowArray* dictionary; + + // Release callback + void (*release)(struct ArrowArray*); + // Opaque producer-specific data + void* private_data; +}; + +// EXPERIMENTAL: C stream interface + +struct ArrowArrayStream { + // Callback to get the stream type + // (will be the same for all arrays in the stream). + // + // Return value: 0 if successful, an `errno`-compatible error code otherwise. + // + // If successful, the ArrowSchema must be released independently from the stream. + int (*get_schema)(struct ArrowArrayStream*, struct ArrowSchema* out); + + // Callback to get the next array + // (if no error and the array is released, the stream has ended) + // + // Return value: 0 if successful, an `errno`-compatible error code otherwise. + // + // If successful, the ArrowArray must be released independently from the stream. + int (*get_next)(struct ArrowArrayStream*, struct ArrowArray* out); + + // Callback to get optional detailed error information. + // This must only be called if the last stream operation failed + // with a non-0 return code. + // + // Return value: pointer to a null-terminated character array describing + // the last error, or NULL if no description is available. + // + // The returned pointer is only valid until the next operation on this stream + // (including release). + const char* (*get_last_error)(struct ArrowArrayStream*); + + // Release callback: release the stream's own resources. + // Note that arrays returned by `get_next` must be individually released. + void (*release)(struct ArrowArrayStream*); + + // Opaque producer-specific data + void* private_data; +}; + +#ifdef __cplusplus +} +#endif \ No newline at end of file diff --git a/java/ffi/src/main/cpp/jni_wrapper.cc b/java/ffi/src/main/cpp/jni_wrapper.cc new file mode 100644 index 00000000000..c38e70b832f --- /dev/null +++ b/java/ffi/src/main/cpp/jni_wrapper.cc @@ -0,0 +1,241 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include +#include +#include +#include "abi.h" +#include +#include +#include + +#include "org_apache_arrow_ffi_jni_JniWrapper.h" + +namespace +{ + + jclass CreateGlobalClassReference(JNIEnv *env, const char *class_name) + { + jclass local_class = env->FindClass(class_name); + jclass global_class = (jclass)env->NewGlobalRef(local_class); + env->DeleteLocalRef(local_class); + return global_class; + } + + jclass illegal_access_exception_class; + jclass illegal_argument_exception_class; + jclass runtime_exception_class; + jclass private_data_class; + + jmethodID private_data_close_method; + + jint JNI_VERSION = JNI_VERSION_1_6; + + class JniPendingException : public std::runtime_error + { + public: + explicit JniPendingException(const std::string &arg) : runtime_error(arg) {} + }; + + void ThrowPendingException(const std::string &message) + { + throw JniPendingException(message); + } + + void JniThrow(std::string message) { ThrowPendingException(message); } + + jmethodID GetMethodID(JNIEnv* env, jclass this_class, const char* name, + const char* sig) { + jmethodID ret = env->GetMethodID(this_class, name, sig); + if (ret == nullptr) { + std::string error_message = "Unable to find method " + std::string(name) + + " within signature" + std::string(sig); + ThrowPendingException(error_message); + } + return ret; + } + + class InnerPrivateData + { + public: + InnerPrivateData(JavaVM* vm, jobject private_data) + : vm_(vm), j_private_data_(private_data) {} + + JavaVM* vm_; + jobject j_private_data_; + }; + + template + void release_exported(T* base) { + // This should not be called on already released structure + assert(base->release != nullptr); + + // Release children + for (int64_t i = 0; i < base->n_children; ++i) { + T* child = base->children[i]; + if (child->release != nullptr) { + child->release(child); + assert(child->release == nullptr); + } + } + + // Release dictionary + T* dict = base->dictionary; + if (dict != nullptr && dict->release != nullptr) { + dict->release(dict); + assert(dict->release == nullptr); + } + + // Release all data directly owned by the struct + InnerPrivateData* private_data = reinterpret_cast(base->private_data); + JNIEnv* env; + if (private_data->vm_->GetEnv(reinterpret_cast(&env), JNI_VERSION) != JNI_OK) { + ThrowPendingException("JNIEnv was not attached to current thread"); + } + env->CallObjectMethod(private_data->j_private_data_, private_data_close_method); + if (env->ExceptionCheck()) { + env->ExceptionDescribe(); + env->ExceptionClear(); + ThrowPendingException("Error calling close of private data"); + } + env->DeleteGlobalRef(private_data->j_private_data_); + delete private_data; + base->private_data = nullptr; + + // Mark released + base->release = nullptr; + } +} // namespace + +#define JNI_METHOD_START \ + try \ + { +// macro ended + +#define JNI_METHOD_END(fallback_expr) \ + } \ + catch (JniPendingException & e) \ + { \ + env->ThrowNew(runtime_exception_class, e.what()); \ + return fallback_expr; \ + } +// macro ended + +jint JNI_OnLoad(JavaVM *vm, void *reserved) +{ + JNIEnv *env; + if (vm->GetEnv(reinterpret_cast(&env), JNI_VERSION) != JNI_OK) + { + return JNI_ERR; + } + JNI_METHOD_START + illegal_access_exception_class = + CreateGlobalClassReference(env, "Ljava/lang/IllegalAccessException;"); + illegal_argument_exception_class = + CreateGlobalClassReference(env, "Ljava/lang/IllegalArgumentException;"); + runtime_exception_class = + CreateGlobalClassReference(env, "Ljava/lang/RuntimeException;"); + private_data_class = + CreateGlobalClassReference(env, "Lorg/apache/arrow/ffi/jni/PrivateData;"); + + private_data_close_method = + GetMethodID(env, private_data_class, "close", "()V"); + + return JNI_VERSION; + JNI_METHOD_END(JNI_ERR) +} + +void JNI_OnUnload(JavaVM *vm, void *reserved) +{ + JNIEnv *env; + vm->GetEnv(reinterpret_cast(&env), JNI_VERSION); + env->DeleteGlobalRef(illegal_access_exception_class); + env->DeleteGlobalRef(illegal_argument_exception_class); + env->DeleteGlobalRef(runtime_exception_class); +} + +/* + * Class: org_apache_arrow_ffi_jni_JniWrapper + * Method: releaseSchema + * Signature: (J)V + */ +JNIEXPORT void JNICALL Java_org_apache_arrow_ffi_jni_JniWrapper_releaseSchema( + JNIEnv *env, jobject, jlong address) { + JNI_METHOD_START + ArrowSchema* schema = reinterpret_cast(address); + if(schema->release != nullptr) { + schema->release(schema); + } + JNI_METHOD_END() +} + +/* + * Class: org_apache_arrow_ffi_jni_JniWrapper + * Method: releaseArray + * Signature: (J)V + */ +JNIEXPORT void JNICALL Java_org_apache_arrow_ffi_jni_JniWrapper_releaseArray( + JNIEnv *env, jobject, jlong address) { + JNI_METHOD_START + ArrowArray* array = reinterpret_cast(address); + if(array->release != nullptr) { + array->release(array); + } + JNI_METHOD_END() +} + +/* + * Class: org_apache_arrow_ffi_jni_JniWrapper + * Method: exportSchema + * Signature: (JLorg/apache/arrow/ffi/jni/PrivateData;)V + */ +JNIEXPORT void JNICALL Java_org_apache_arrow_ffi_jni_JniWrapper_exportSchema( + JNIEnv *env, jobject, jlong address, jobject private_data) { + JNI_METHOD_START + ArrowSchema* schema = reinterpret_cast(address); + + JavaVM* vm; + if (env->GetJavaVM(&vm) != JNI_OK) { + JniThrow("Unable to get JavaVM instance"); + } + jobject private_data_ref = env->NewGlobalRef(private_data); + + schema->private_data = new InnerPrivateData(vm, private_data_ref); + schema->release = &release_exported; + JNI_METHOD_END() +} + +/* + * Class: org_apache_arrow_ffi_jni_JniWrapper + * Method: exportArray + * Signature: (JLorg/apache/arrow/ffi/jni/PrivateData;)V + */ +JNIEXPORT void JNICALL Java_org_apache_arrow_ffi_jni_JniWrapper_exportArray( + JNIEnv *env, jobject, jlong address, jobject private_data) { + JNI_METHOD_START + ArrowArray* array = reinterpret_cast(address); + + JavaVM* vm; + if (env->GetJavaVM(&vm) != JNI_OK) { + JniThrow("Unable to get JavaVM instance"); + } + jobject private_data_ref = env->NewGlobalRef(private_data); + + array->private_data = new InnerPrivateData(vm, private_data_ref); + array->release = &release_exported; + JNI_METHOD_END() +} diff --git a/java/ffi/src/main/java/org/apache/arrow/ffi/ArrayExporter.java b/java/ffi/src/main/java/org/apache/arrow/ffi/ArrayExporter.java new file mode 100644 index 00000000000..46e037443d0 --- /dev/null +++ b/java/ffi/src/main/java/org/apache/arrow/ffi/ArrayExporter.java @@ -0,0 +1,149 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.ffi; + +import static org.apache.arrow.ffi.NativeUtil.NULL; +import static org.apache.arrow.ffi.NativeUtil.addressOrNull; +import static org.apache.arrow.util.Preconditions.checkNotNull; + +import java.util.ArrayList; +import java.util.List; + +import org.apache.arrow.ffi.jni.JniWrapper; +import org.apache.arrow.ffi.jni.PrivateData; +import org.apache.arrow.memory.ArrowBuf; +import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.vector.FieldVector; +import org.apache.arrow.vector.dictionary.Dictionary; +import org.apache.arrow.vector.dictionary.DictionaryProvider; +import org.apache.arrow.vector.types.pojo.DictionaryEncoding; + +/** + * Exporter for {@link ArrowArray}. + */ +final class ArrayExporter { + private final BufferAllocator allocator; + + public ArrayExporter(BufferAllocator allocator) { + this.allocator = allocator; + } + + /** + * Private data structure for exported arrays. + */ + static class ExportedArrayPrivateData implements PrivateData { + ArrowBuf buffers_ptrs; + List buffers; + ArrowBuf children_ptrs; + List children; + ArrowArray dictionary; + + @Override + public void close() { + NativeUtil.closeBuffer(buffers_ptrs); + + if (buffers != null) { + for (ArrowBuf buffer : buffers) { + NativeUtil.closeBuffer(buffer); + } + } + NativeUtil.closeBuffer(children_ptrs); + + if (children != null) { + for (ArrowArray child : children) { + child.close(); + } + } + + if (dictionary != null) { + dictionary.close(); + } + } + } + + void export(ArrowArray array, FieldVector vector, DictionaryProvider dictionaryProvider) { + List children = vector.getChildrenFromFields(); + List buffers = vector.getFieldBuffers(); + int valueCount = vector.getValueCount(); + int nullCount = vector.getNullCount(); + DictionaryEncoding dictionaryEncoding = vector.getField().getDictionary(); + + ExportedArrayPrivateData data = new ExportedArrayPrivateData(); + try { + if (children != null) { + data.children = new ArrayList<>(children.size()); + data.children_ptrs = allocator.buffer((long) children.size() * Long.BYTES); + for (int i = 0; i < children.size(); i++) { + ArrowArray child = ArrowArray.allocateNew(allocator); + data.children.add(child); + data.children_ptrs.writeLong(child.memoryAddress()); + } + } + + if (buffers != null) { + data.buffers = new ArrayList<>(buffers.size()); + data.buffers_ptrs = allocator.buffer((long) buffers.size() * Long.BYTES); + for (ArrowBuf arrowBuf : buffers) { + if (arrowBuf != null) { + arrowBuf.getReferenceManager().retain(); + data.buffers_ptrs.writeLong(arrowBuf.memoryAddress()); + } else { + data.buffers_ptrs.writeLong(NULL); + } + data.buffers.add(arrowBuf); + } + } + + if (dictionaryEncoding != null) { + Dictionary dictionary = dictionaryProvider.lookup(dictionaryEncoding.getId()); + checkNotNull(dictionary, "Dictionary lookup failed on export of dictionary encoded array"); + + data.dictionary = ArrowArray.allocateNew(allocator); + FieldVector dictionaryVector = dictionary.getVector(); + export(data.dictionary, dictionaryVector, dictionaryProvider); + } + + ArrowArray.Snapshot snapshot = new ArrowArray.Snapshot(); + snapshot.length = valueCount; + snapshot.null_count = nullCount; + snapshot.offset = 0; + snapshot.n_buffers = (data.buffers != null) ? data.buffers.size() : 0; + snapshot.n_children = (data.children != null) ? data.children.size() : 0; + snapshot.buffers = addressOrNull(data.buffers_ptrs); + snapshot.children = addressOrNull(data.children_ptrs); + snapshot.dictionary = addressOrNull(data.dictionary); + snapshot.release = NULL; + array.save(snapshot); + + // sets release and private data + JniWrapper.get().exportArray(array.memoryAddress(), data); + } catch (Exception e) { + data.close(); + throw e; + } + + // Export children + if (children != null) { + for (int i = 0; i < children.size(); i++) { + FieldVector childVector = children.get(i); + ArrowArray child = data.children.get(i); + export(child, childVector, dictionaryProvider); + } + } + } +} diff --git a/java/ffi/src/main/java/org/apache/arrow/ffi/ArrayImporter.java b/java/ffi/src/main/java/org/apache/arrow/ffi/ArrayImporter.java new file mode 100644 index 00000000000..f93b8b532f6 --- /dev/null +++ b/java/ffi/src/main/java/org/apache/arrow/ffi/ArrayImporter.java @@ -0,0 +1,154 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.ffi; + +import static org.apache.arrow.ffi.NativeUtil.NULL; +import static org.apache.arrow.memory.util.LargeMemoryUtil.checkedCastToInt; +import static org.apache.arrow.util.Preconditions.checkNotNull; +import static org.apache.arrow.util.Preconditions.checkState; + +import java.util.ArrayList; +import java.util.List; + +import org.apache.arrow.memory.ArrowBuf; +import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.memory.ReferenceManager; +import org.apache.arrow.vector.FieldVector; +import org.apache.arrow.vector.TypeLayout; +import org.apache.arrow.vector.dictionary.Dictionary; +import org.apache.arrow.vector.dictionary.DictionaryProvider; +import org.apache.arrow.vector.ipc.message.ArrowFieldNode; +import org.apache.arrow.vector.types.pojo.DictionaryEncoding; + +/** + * Importer for {@link ArrowArray}. + */ +final class ArrayImporter { + private static final int MAX_IMPORT_RECURSION_LEVEL = 64; + + private final BufferAllocator allocator; + private final FieldVector vector; + private final DictionaryProvider dictionaryProvider; + + private ReferenceManager referenceManager; + private int recursionLevel; + + ArrayImporter(BufferAllocator allocator, FieldVector vector, DictionaryProvider dictionaryProvider) { + this.allocator = allocator; + this.vector = vector; + this.dictionaryProvider = dictionaryProvider; + } + + void importArray(ArrowArray src) { + ArrowArray.Snapshot snapshot = src.snapshot(); + checkState(snapshot.release != NULL, "Cannot import released ArrowArray"); + + // Move imported array + ArrowArray ownedArray = ArrowArray.allocateNew(allocator); + ownedArray.save(snapshot); + src.markReleased(); + src.close(); + + recursionLevel = 0; + + // This keeps the array alive as long as there are any buffers that need it + referenceManager = new FFIReferenceManager(ownedArray); + try { + referenceManager.retain(); + doImport(snapshot); + } finally { + referenceManager.release(); + } + + } + + private void importChild(ArrayImporter parent, ArrowArray src) { + ArrowArray.Snapshot snapshot = src.snapshot(); + checkState(snapshot.release != NULL, "Cannot import released ArrowArray"); + recursionLevel = parent.recursionLevel + 1; + checkState(recursionLevel < MAX_IMPORT_RECURSION_LEVEL, "Recursion level in ArrowArray struct exceeded"); + // Child buffers will keep the entire parent import alive. + // Perhaps we can move the child structs on import, + // but that is another level of complication. + referenceManager = parent.referenceManager; + doImport(snapshot); + } + + private void doImport(ArrowArray.Snapshot snapshot) { + // First import children (required for reconstituting parent array data) + long[] children = NativeUtil.toJavaArray(snapshot.children, checkedCastToInt(snapshot.n_children)); + if (children != null) { + List childVectors = vector.getChildrenFromFields(); + checkState(children.length == childVectors.size(), "ArrowArray struct has %s children (expected %s)", + children.length, childVectors.size()); + for (int i = 0; i < children.length; i++) { + checkState(children[i] != NULL, "ArrowArray struct has NULL child at position %s", i); + ArrayImporter childImporter = new ArrayImporter(allocator, childVectors.get(i), dictionaryProvider); + childImporter.importChild(this, ArrowArray.wrap(children[i])); + } + } + + // Handle import of a dictionary encoded vector + if (snapshot.dictionary != NULL) { + DictionaryEncoding encoding = vector.getField().getDictionary(); + checkNotNull(encoding, "Missing encoding on import of ArrowArray with dictionary"); + + Dictionary dictionary = dictionaryProvider.lookup(encoding.getId()); + checkNotNull(dictionary, "Dictionary lookup failed on import of ArrowArray with dictionary"); + + // reset the dictionary vector to the initial state + dictionary.getVector().clear(); + + ArrayImporter dictionaryImporter = new ArrayImporter(allocator, dictionary.getVector(), dictionaryProvider); + dictionaryImporter.importChild(this, ArrowArray.wrap(snapshot.dictionary)); + } + + // Import main data + ArrowFieldNode fieldNode = new ArrowFieldNode(snapshot.length, snapshot.null_count); + List buffers = importBuffers(snapshot); + try { + vector.loadFieldBuffers(fieldNode, buffers); + } catch (RuntimeException e) { + throw new IllegalArgumentException( + "Could not load buffers for field " + vector.getField() + ". error message: " + e.getMessage(), e); + } + } + + private List importBuffers(ArrowArray.Snapshot snapshot) { + long[] buffers = NativeUtil.toJavaArray(snapshot.buffers, checkedCastToInt(snapshot.n_buffers)); + if (buffers == null) { + return new ArrayList<>(); + } + + int buffersCount = TypeLayout.getTypeBufferCount(vector.getField().getType()); + checkState(buffers.length == buffersCount, "Expected %d buffers for imported type %s, ArrowArray struct has %d", + buffersCount, vector.getField().getType().getTypeID(), buffers.length); + + List result = new ArrayList<>(buffersCount); + for (long bufferPtr : buffers) { + ArrowBuf buffer = null; + if (bufferPtr != NULL) { + // TODO(roee88): an API for getting the size for each buffer is not yet + // available + buffer = new ArrowBuf(referenceManager, null, Integer.MAX_VALUE, bufferPtr); + } + result.add(buffer); + } + return result; + } +} diff --git a/java/ffi/src/main/java/org/apache/arrow/ffi/ArrowArray.java b/java/ffi/src/main/java/org/apache/arrow/ffi/ArrowArray.java new file mode 100644 index 00000000000..87189827273 --- /dev/null +++ b/java/ffi/src/main/java/org/apache/arrow/ffi/ArrowArray.java @@ -0,0 +1,184 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.ffi; + +import static org.apache.arrow.ffi.NativeUtil.NULL; +import static org.apache.arrow.util.Preconditions.checkNotNull; + +import java.nio.ByteBuffer; +import java.nio.ByteOrder; + +import org.apache.arrow.ffi.jni.JniWrapper; +import org.apache.arrow.memory.ArrowBuf; +import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.memory.ReferenceManager; +import org.apache.arrow.memory.util.MemoryUtil; + +/** + * C Data Interface ArrowArray. + *

+ * Represents a wrapper for the following C structure: + * + *

+ * struct ArrowArray {
+ *     // Array data description
+ *     int64_t length;
+ *     int64_t null_count;
+ *     int64_t offset;
+ *     int64_t n_buffers;
+ *     int64_t n_children;
+ *     const void** buffers;
+ *     struct ArrowArray** children;
+ *     struct ArrowArray* dictionary;
+ * 
+ *     // Release callback
+ *     void (*release)(struct ArrowArray*);
+ *     // Opaque producer-specific data
+ *     void* private_data;
+ * };
+ * 
+ */ +public class ArrowArray implements BaseStruct { + private static final int SIZE_OF = 80; + private static final int INDEX_RELEASE_CALLBACK = 64; + + private ArrowBuf data; + + /** + * Snapshot of the ArrowArray raw data. + */ + public static class Snapshot { + public long length; + public long null_count; + public long offset; + public long n_buffers; + public long n_children; + public long buffers; + public long children; + public long dictionary; + public long release; + public long private_data; + + /** + * Initialize empty ArrowArray snapshot. + */ + public Snapshot() { + length = NULL; + null_count = NULL; + offset = NULL; + n_buffers = NULL; + n_children = NULL; + buffers = NULL; + children = NULL; + dictionary = NULL; + release = NULL; + private_data = NULL; + } + } + + /** + * Create ArrowArray from an existing memory address. + *

+ * The resulting ArrowArray does not own the memory. + * + * @param memoryAddress Memory address to wrap + * @return A new ArrowArray instance + */ + public static ArrowArray wrap(long memoryAddress) { + return new ArrowArray(new ArrowBuf(ReferenceManager.NO_OP, null, ArrowArray.SIZE_OF, memoryAddress)); + } + + /** + * Create ArrowArray by allocating memory. + *

+ * The resulting ArrowArray owns the memory. + * + * @param allocator Allocator for memory allocations + * @return A new ArrowArray instance + */ + public static ArrowArray allocateNew(BufferAllocator allocator) { + ArrowArray array = new ArrowArray(allocator.buffer(ArrowArray.SIZE_OF)); + array.markReleased(); + return array; + } + + ArrowArray(ArrowBuf data) { + checkNotNull(data, "ArrowArray initialized with a null buffer"); + this.data = data; + } + + /** + * Mark the array as released. + */ + public void markReleased() { + directBuffer().putLong(INDEX_RELEASE_CALLBACK, NULL); + } + + @Override + public long memoryAddress() { + checkNotNull(data, "ArrowArray is already closed"); + return data.memoryAddress(); + } + + @Override + public void release() { + long address = memoryAddress(); + JniWrapper.get().releaseArray(address); + } + + @Override + public void close() { + if (data != null) { + data.close(); + data = null; + } + } + + private ByteBuffer directBuffer() { + return MemoryUtil.directBuffer(memoryAddress(), ArrowArray.SIZE_OF).order(ByteOrder.nativeOrder()); + } + + /** + * Take a snapshot of the ArrowArray raw values. + * @return snapshot + */ + public Snapshot snapshot() { + ByteBuffer data = directBuffer(); + Snapshot snapshot = new Snapshot(); + snapshot.length = data.getLong(); + snapshot.null_count = data.getLong(); + snapshot.offset = data.getLong(); + snapshot.n_buffers = data.getLong(); + snapshot.n_children = data.getLong(); + snapshot.buffers = data.getLong(); + snapshot.children = data.getLong(); + snapshot.dictionary = data.getLong(); + snapshot.release = data.getLong(); + snapshot.private_data = data.getLong(); + return snapshot; + } + + /** + * Write values from Snapshot to the underlying ArrowArray memory buffer. + */ + public void save(Snapshot snapshot) { + directBuffer().putLong(snapshot.length).putLong(snapshot.null_count).putLong(snapshot.offset) + .putLong(snapshot.n_buffers).putLong(snapshot.n_children).putLong(snapshot.buffers).putLong(snapshot.children) + .putLong(snapshot.dictionary).putLong(snapshot.release).putLong(snapshot.private_data); + } +} diff --git a/java/ffi/src/main/java/org/apache/arrow/ffi/ArrowSchema.java b/java/ffi/src/main/java/org/apache/arrow/ffi/ArrowSchema.java new file mode 100644 index 00000000000..a9a7d4d506e --- /dev/null +++ b/java/ffi/src/main/java/org/apache/arrow/ffi/ArrowSchema.java @@ -0,0 +1,170 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.ffi; + +import static org.apache.arrow.ffi.NativeUtil.NULL; +import static org.apache.arrow.util.Preconditions.checkNotNull; + +import java.nio.ByteBuffer; +import java.nio.ByteOrder; + +import org.apache.arrow.ffi.jni.JniWrapper; +import org.apache.arrow.memory.ArrowBuf; +import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.memory.ReferenceManager; +import org.apache.arrow.memory.util.MemoryUtil; + +/** + * C Data Interface ArrowSchema. + *

+ * Represents a wrapper for the following C structure: + * + *

+ * struct ArrowSchema {
+ *     // Array type description
+ *     const char* format;
+ *     const char* name;
+ *     const char* metadata;
+ *     int64_t flags;
+ *     int64_t n_children;
+ *     struct ArrowSchema** children;
+ *     struct ArrowSchema* dictionary;
+ *      
+ *     // Release callback
+ *     void (*release)(struct ArrowSchema*);
+ *     // Opaque producer-specific data
+ *     void* private_data; 
+ * };
+ * 
+ */ +public class ArrowSchema implements BaseStruct { + private static final int SIZE_OF = 72; + + private ArrowBuf data; + + /** + * Snapshot of the ArrowSchema raw data. + */ + public static class Snapshot { + public long format; + public long name; + public long metadata; + public long flags; + public long n_children; + public long children; + public long dictionary; + public long release; + public long private_data; + + /** + * Initialize empty ArrowSchema snapshot. + */ + public Snapshot() { + format = NULL; + name = NULL; + metadata = NULL; + flags = NULL; + n_children = NULL; + children = NULL; + dictionary = NULL; + release = NULL; + private_data = NULL; + } + } + + /** + * Create ArrowSchema from an existing memory address. + *

+ * The resulting ArrowSchema does not own the memory. + * + * @param memoryAddress Memory address to wrap + * @return A new ArrowSchema instance + */ + public static ArrowSchema wrap(long memoryAddress) { + return new ArrowSchema(new ArrowBuf(ReferenceManager.NO_OP, null, ArrowSchema.SIZE_OF, memoryAddress)); + } + + /** + * Create ArrowSchema by allocating memory. + *

+ * The resulting ArrowSchema owns the memory. + * + * @param allocator Allocator for memory allocations + * @return A new ArrowSchema instance + */ + public static ArrowSchema allocateNew(BufferAllocator allocator) { + return new ArrowSchema(allocator.buffer(ArrowSchema.SIZE_OF)); + } + + ArrowSchema(ArrowBuf data) { + checkNotNull(data, "ArrowSchema initialized with a null buffer"); + this.data = data; + } + + @Override + public long memoryAddress() { + checkNotNull(data, "ArrowSchema is already closed"); + return data.memoryAddress(); + } + + @Override + public void release() { + long address = memoryAddress(); + JniWrapper.get().releaseSchema(address); + } + + @Override + public void close() { + if (data != null) { + data.close(); + data = null; + } + } + + private ByteBuffer directBuffer() { + return MemoryUtil.directBuffer(memoryAddress(), ArrowSchema.SIZE_OF).order(ByteOrder.nativeOrder()); + } + + /** + * Take a snapshot of the ArrowSchema raw values. + * @return snapshot + */ + public Snapshot snapshot() { + ByteBuffer data = directBuffer(); + Snapshot snapshot = new Snapshot(); + snapshot.format = data.getLong(); + snapshot.name = data.getLong(); + snapshot.metadata = data.getLong(); + snapshot.flags = data.getLong(); + snapshot.n_children = data.getLong(); + snapshot.children = data.getLong(); + snapshot.dictionary = data.getLong(); + snapshot.release = data.getLong(); + snapshot.private_data = data.getLong(); + return snapshot; + } + + /** + * Write values from Snapshot to the underlying ArrowSchema memory buffer. + */ + public void save(Snapshot snapshot) { + directBuffer().putLong(snapshot.format).putLong(snapshot.name).putLong(snapshot.metadata).putLong(snapshot.flags) + .putLong(snapshot.n_children).putLong(snapshot.children).putLong(snapshot.dictionary) + .putLong(snapshot.release).putLong(snapshot.private_data); + } +} diff --git a/java/ffi/src/main/java/org/apache/arrow/ffi/BaseStruct.java b/java/ffi/src/main/java/org/apache/arrow/ffi/BaseStruct.java new file mode 100644 index 00000000000..c23e12f978c --- /dev/null +++ b/java/ffi/src/main/java/org/apache/arrow/ffi/BaseStruct.java @@ -0,0 +1,43 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.ffi; + +/** + * Base interface for C Data Interface structures. + */ +public interface BaseStruct extends AutoCloseable { + /** + * Get memory address. + * + * @return Memory address + */ + long memoryAddress(); + + /** + * Call the release callback of an ArrowArray. + *

+ * This function must not be called for child arrays. + */ + void release(); + + /** + * Close to release the main buffer. + */ + @Override + void close(); +} diff --git a/java/ffi/src/main/java/org/apache/arrow/ffi/FFI.java b/java/ffi/src/main/java/org/apache/arrow/ffi/FFI.java new file mode 100644 index 00000000000..8e835b8dd6d --- /dev/null +++ b/java/ffi/src/main/java/org/apache/arrow/ffi/FFI.java @@ -0,0 +1,317 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.ffi; + +import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.vector.FieldVector; +import org.apache.arrow.vector.StructVectorLoader; +import org.apache.arrow.vector.StructVectorUnloader; +import org.apache.arrow.vector.VectorLoader; +import org.apache.arrow.vector.VectorSchemaRoot; +import org.apache.arrow.vector.VectorUnloader; +import org.apache.arrow.vector.complex.StructVector; +import org.apache.arrow.vector.dictionary.DictionaryProvider; +import org.apache.arrow.vector.ipc.message.ArrowRecordBatch; +import org.apache.arrow.vector.types.pojo.ArrowType; +import org.apache.arrow.vector.types.pojo.ArrowType.ArrowTypeID; +import org.apache.arrow.vector.types.pojo.Field; +import org.apache.arrow.vector.types.pojo.FieldType; +import org.apache.arrow.vector.types.pojo.Schema; + +/** + * Functions for working with the C data interface. + * This API is EXPERIMENTAL. + */ +public final class FFI { + + private FFI() { + } + + /** + * Export Java Field using the C data interface format. + * + * @param allocator Buffer allocator for allocating C data interface fields + * @param field Field object to export + * @param provider Dictionary provider for dictionary encoded fields (optional) + * @param out C struct where to export the field + */ + public static void exportField(BufferAllocator allocator, Field field, DictionaryProvider provider, ArrowSchema out) { + SchemaExporter exporter = new SchemaExporter(allocator); + exporter.export(out, field, provider); + } + + /** + * Export Java Schema using the C data interface format. + * + * @param allocator Buffer allocator for allocating C data interface fields + * @param schema Schema object to export + * @param provider Dictionary provider for dictionary encoded fields (optional) + * @param out C struct where to export the field + */ + public static void exportSchema(BufferAllocator allocator, Schema schema, DictionaryProvider provider, + ArrowSchema out) { + // Convert to a struct field equivalent to the input schema + FieldType fieldType = new FieldType(false, new ArrowType.Struct(), null, schema.getCustomMetadata()); + Field field = new Field("", fieldType, schema.getFields()); + exportField(allocator, field, provider, out); + } + + /** + * Export Java FieldVector using the C data interface format. + *

+ * The resulting ArrowArray struct keeps the array data and buffers alive until + * its release callback is called by the consumer. + * + * @param allocator Buffer allocator for allocating C data interface fields + * @param vector Vector object to export + * @param provider Dictionary provider for dictionary encoded vectors + * (optional) + * @param out C struct where to export the array + */ + public static void exportVector(BufferAllocator allocator, FieldVector vector, DictionaryProvider provider, + ArrowArray out) { + exportVector(allocator, vector, provider, out, null); + } + + /** + * Export Java FieldVector using the C data interface format. + *

+ * The resulting ArrowArray struct keeps the array data and buffers alive until + * its release callback is called by the consumer. + * + * @param allocator Buffer allocator for allocating C data interface fields + * @param vector Vector object to export + * @param provider Dictionary provider for dictionary encoded vectors + * (optional) + * @param out C struct where to export the array + * @param outSchema Optional C struct where to export the array type + */ + public static void exportVector(BufferAllocator allocator, FieldVector vector, DictionaryProvider provider, + ArrowArray out, ArrowSchema outSchema) { + if (outSchema != null) { + exportField(allocator, vector.getField(), provider, outSchema); + } + + ArrayExporter exporter = new ArrayExporter(allocator); + exporter.export(out, vector, provider); + } + + /** + * Export the current contents of a Java VectorSchemaRoot using the C data + * interface format. + *

+ * The vector schema root is exported as if it were a struct array. The + * resulting ArrowArray struct keeps the record batch data and buffers alive + * until its release callback is called by the consumer. + * + * @param allocator Buffer allocator for allocating C data interface fields + * @param vsr Vector schema root to export + * @param provider Dictionary provider for dictionary encoded vectors + * (optional) + * @param out C struct where to export the record batch + */ + public static void exportVectorSchemaRoot(BufferAllocator allocator, VectorSchemaRoot vsr, + DictionaryProvider provider, ArrowArray out) { + exportVectorSchemaRoot(allocator, vsr, provider, out, null); + } + + /** + * Export the current contents of a Java VectorSchemaRoot using the C data + * interface format. + *

+ * The vector schema root is exported as if it were a struct array. The + * resulting ArrowArray struct keeps the record batch data and buffers alive + * until its release callback is called by the consumer. + * + * @param allocator Buffer allocator for allocating C data interface fields + * @param vsr Vector schema root to export + * @param provider Dictionary provider for dictionary encoded vectors + * (optional) + * @param out C struct where to export the record batch + * @param outSchema Optional C struct where to export the record batch schema + */ + public static void exportVectorSchemaRoot(BufferAllocator allocator, VectorSchemaRoot vsr, + DictionaryProvider provider, ArrowArray out, ArrowSchema outSchema) { + if (outSchema != null) { + exportSchema(allocator, vsr.getSchema(), provider, outSchema); + } + + VectorUnloader unloader = new VectorUnloader(vsr); + try (ArrowRecordBatch recordBatch = unloader.getRecordBatch()) { + StructVectorLoader loader = new StructVectorLoader(vsr.getSchema()); + try (StructVector vector = loader.load(allocator, recordBatch)) { + exportVector(allocator, vector, provider, out); + } + } + } + + /** + * Import Java Field from the C data interface. + *

+ * The given ArrowSchema struct is released (as per the C data interface + * specification), even if this function fails. + * + * @param allocator Buffer allocator for allocating dictionary vectors + * @param schema C data interface struct representing the field [inout] + * @param provider A dictionary provider will be initialized with empty + * dictionary vectors (optional) + * @return Imported field object + */ + public static Field importField(BufferAllocator allocator, ArrowSchema schema, FFIDictionaryProvider provider) { + try { + SchemaImporter importer = new SchemaImporter(allocator); + return importer.importField(schema, provider); + } finally { + schema.release(); + schema.close(); + } + } + + /** + * Import Java Schema from the C data interface. + *

+ * The given ArrowSchema struct is released (as per the C data interface + * specification), even if this function fails. + * + * @param allocator Buffer allocator for allocating dictionary vectors + * @param schema C data interface struct representing the field + * @param provider A dictionary provider will be initialized with empty + * dictionary vectors (optional) + * @return Imported schema object + */ + public static Schema importSchema(BufferAllocator allocator, ArrowSchema schema, FFIDictionaryProvider provider) { + Field structField = importField(allocator, schema, provider); + if (structField.getType().getTypeID() != ArrowTypeID.Struct) { + throw new IllegalArgumentException("Cannot import schema: ArrowSchema describes non-struct type"); + } + return new Schema(structField.getChildren(), structField.getMetadata()); + } + + /** + * Import Java vector from the C data interface. + *

+ * The ArrowArray struct has its contents moved (as per the C data interface + * specification) to a private object held alive by the resulting array. + * + * @param allocator Buffer allocator + * @param array C data interface struct holding the array data + * @param vector Imported vector object [out] + * @param provider Dictionary provider to load dictionary vectors to (optional) + */ + public static void importIntoVector(BufferAllocator allocator, ArrowArray array, FieldVector vector, + DictionaryProvider provider) { + ArrayImporter importer = new ArrayImporter(allocator, vector, provider); + importer.importArray(array); + } + + /** + * Import Java vector and its type from the C data interface. + *

+ * The ArrowArray struct has its contents moved (as per the C data interface + * specification) to a private object held alive by the resulting vector. The + * ArrowSchema struct is released, even if this function fails. + * + * @param allocator Buffer allocator for allocating the output FieldVector + * @param array C data interface struct holding the array data + * @param schema C data interface struct holding the array type + * @param provider Dictionary provider to load dictionary vectors to (optional) + * @return Imported vector object + */ + public static FieldVector importVector(BufferAllocator allocator, ArrowArray array, ArrowSchema schema, + FFIDictionaryProvider provider) { + Field field = importField(allocator, schema, provider); + FieldVector vector = field.createVector(allocator); + importIntoVector(allocator, array, vector, provider); + return vector; + } + + /** + * Import record batch from the C data interface into vector schema root. + * + * The ArrowArray struct has its contents moved (as per the C data interface + * specification) to a private object held alive by the resulting vector schema + * root. + * + * The schema of the vector schema root must match the input array (undefined + * behavior otherwise). + * + * @param allocator Buffer allocator + * @param array C data interface struct holding the record batch data + * @param root vector schema root to load into + * @param provider Dictionary provider to load dictionary vectors to (optional) + */ + public static void importIntoVectorSchemaRoot(BufferAllocator allocator, ArrowArray array, VectorSchemaRoot root, + DictionaryProvider provider) { + try (StructVector structVector = StructVector.empty("", allocator)) { + for (Field field : root.getSchema().getFields()) { + structVector.addOrGet(field.getName(), field.getFieldType(), FieldVector.class); + } + importIntoVector(allocator, array, structVector, provider); + StructVectorUnloader unloader = new StructVectorUnloader(structVector); + VectorLoader loader = new VectorLoader(root); + try (ArrowRecordBatch recordBatch = unloader.getRecordBatch()) { + loader.load(recordBatch); + } + } + } + + /** + * Import Java vector schema root from a C data interface Schema. + * + * The type represented by the ArrowSchema struct must be a struct type array. + * + * The ArrowSchema struct is released, even if this function fails. + * + * @param allocator Buffer allocator for allocating the output VectorSchemaRoot + * @param schema C data interface struct holding the record batch schema + * @param provider Dictionary provider to load dictionary vectors to (optional) + * @return Imported vector schema root + */ + public static VectorSchemaRoot importVectorSchemaRoot(BufferAllocator allocator, ArrowSchema schema, + FFIDictionaryProvider provider) { + return importVectorSchemaRoot(allocator, schema, null, provider); + } + + /** + * Import Java vector schema root from the C data interface. + * + * The type represented by the ArrowSchema struct must be a struct type array. + * + * The ArrowArray struct has its contents moved (as per the C data interface + * specification) to a private object held alive by the resulting record batch. + * The ArrowSchema struct is released, even if this function fails. + * + * Prefer {@link #importIntoVectorSchemaRoot} for loading array data while + * reusing the same vector schema root. + * + * @param allocator Buffer allocator for allocating the output VectorSchemaRoot + * @param schema C data interface struct holding the record batch schema + * @param array Optional C data interface struct holding the record batch + * data + * @param provider Dictionary provider to load dictionary vectors to (optional) + * @return Imported vector schema root + */ + public static VectorSchemaRoot importVectorSchemaRoot(BufferAllocator allocator, ArrowSchema schema, ArrowArray array, + FFIDictionaryProvider provider) { + VectorSchemaRoot vsr = VectorSchemaRoot.create(importSchema(allocator, schema, provider), allocator); + if (array != null) { + importIntoVectorSchemaRoot(allocator, array, vsr, provider); + } + return vsr; + } +} diff --git a/java/ffi/src/main/java/org/apache/arrow/ffi/FFIDictionaryProvider.java b/java/ffi/src/main/java/org/apache/arrow/ffi/FFIDictionaryProvider.java new file mode 100644 index 00000000000..c65fc90b3cf --- /dev/null +++ b/java/ffi/src/main/java/org/apache/arrow/ffi/FFIDictionaryProvider.java @@ -0,0 +1,72 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.ffi; + +import java.util.HashMap; +import java.util.Map; +import java.util.Set; + +import org.apache.arrow.vector.dictionary.Dictionary; +import org.apache.arrow.vector.dictionary.DictionaryProvider; + +/** + * A DictionaryProvider that is used in FFI for imports. + *

+ * FFIDictionaryProvider is similar to + * {@link DictionaryProvider.MapDictionaryProvider} with a key difference that + * the dictionaries are owned by the provider so it must eventually be closed. + *

+ * The typical usage is to create the FFIDictionaryProvider and pass it to + * {@link FFI#importField} or {@link FFI#importSchema} to allocate empty + * dictionaries based on the information in {@link ArrowSchema}. Then you can + * re-use the same dictionary provider in any function that imports an + * {@link ArrowArray} that has the same schema. + */ +public class FFIDictionaryProvider implements DictionaryProvider, AutoCloseable { + + private final Map map; + + public FFIDictionaryProvider() { + this.map = new HashMap<>(); + } + + void put(Dictionary dictionary) { + Dictionary previous = map.put(dictionary.getEncoding().getId(), dictionary); + if (previous != null) { + previous.getVector().close(); + } + } + + public final Set getDictionaryIds() { + return map.keySet(); + } + + @Override + public Dictionary lookup(long id) { + return map.get(id); + } + + @Override + public void close() { + for (Dictionary dictionary : map.values()) { + dictionary.getVector().close(); + } + map.clear(); + } + +} diff --git a/java/ffi/src/main/java/org/apache/arrow/ffi/FFIReferenceManager.java b/java/ffi/src/main/java/org/apache/arrow/ffi/FFIReferenceManager.java new file mode 100644 index 00000000000..c52b73b378c --- /dev/null +++ b/java/ffi/src/main/java/org/apache/arrow/ffi/FFIReferenceManager.java @@ -0,0 +1,114 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.ffi; + +import java.util.concurrent.atomic.AtomicInteger; + +import org.apache.arrow.memory.ArrowBuf; +import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.memory.OwnershipTransferResult; +import org.apache.arrow.memory.ReferenceManager; +import org.apache.arrow.util.Preconditions; + +/** + * A ReferenceManager implementation that holds a {@link org.apache.arrow.ffi.BaseStruct}. + *

+ * A reference count is maintained and once it reaches zero the struct + * is released (as per the C data interface specification) and closed. + */ +final class FFIReferenceManager implements ReferenceManager { + private final AtomicInteger bufRefCnt = new AtomicInteger(0); + + private final BaseStruct struct; + + FFIReferenceManager(BaseStruct struct) { + this.struct = struct; + } + + @Override + public int getRefCount() { + return bufRefCnt.get(); + } + + @Override + public boolean release() { + return release(1); + } + + @Override + public boolean release(int decrement) { + Preconditions.checkState(decrement >= 1, "ref count decrement should be greater than or equal to 1"); + // decrement the ref count + final int refCnt; + synchronized (this) { + refCnt = bufRefCnt.addAndGet(-decrement); + if (refCnt == 0) { + // refcount of this reference manager has dropped to 0 + // release the underlying memory + struct.release(); + struct.close(); + } + } + // the new ref count should be >= 0 + Preconditions.checkState(refCnt >= 0, "RefCnt has gone negative"); + return refCnt == 0; + } + + @Override + public void retain() { + retain(1); + } + + @Override + public void retain(int increment) { + Preconditions.checkArgument(increment > 0, "retain(%s) argument is not positive", increment); + bufRefCnt.addAndGet(increment); + } + + @Override + public ArrowBuf retain(ArrowBuf srcBuffer, BufferAllocator targetAllocator) { + retain(); + return srcBuffer; + } + + @Override + public ArrowBuf deriveBuffer(ArrowBuf sourceBuffer, long index, long length) { + final long derivedBufferAddress = sourceBuffer.memoryAddress() + index; + return new ArrowBuf(this, null, length, derivedBufferAddress); + } + + @Override + public OwnershipTransferResult transferOwnership(ArrowBuf sourceBuffer, BufferAllocator targetAllocator) { + throw new UnsupportedOperationException(); + } + + @Override + public BufferAllocator getAllocator() { + return null; + } + + @Override + public long getSize() { + return 0L; + } + + @Override + public long getAccountedSize() { + return 0L; + } +} diff --git a/java/ffi/src/main/java/org/apache/arrow/ffi/Flags.java b/java/ffi/src/main/java/org/apache/arrow/ffi/Flags.java new file mode 100644 index 00000000000..c41ce771bcc --- /dev/null +++ b/java/ffi/src/main/java/org/apache/arrow/ffi/Flags.java @@ -0,0 +1,51 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.ffi; + +import org.apache.arrow.vector.types.pojo.ArrowType; +import org.apache.arrow.vector.types.pojo.ArrowType.ArrowTypeID; +import org.apache.arrow.vector.types.pojo.Field; + +/** + * Flags as defined in the C data interface specification. + */ +final class Flags { + static final int ARROW_FLAG_DICTIONARY_ORDERED = 1; + static final int ARROW_FLAG_NULLABLE = 2; + static final int ARROW_FLAG_MAP_KEYS_SORTED = 4; + + private Flags() { + } + + static long forField(Field field) { + long flags = 0L; + if (field.isNullable()) { + flags |= ARROW_FLAG_NULLABLE; + } + if (field.getDictionary() != null && field.getDictionary().isOrdered()) { + flags |= ARROW_FLAG_DICTIONARY_ORDERED; + } + if (field.getType().getTypeID() == ArrowTypeID.Map) { + ArrowType.Map map = (ArrowType.Map) field.getType(); + if (map.getKeysSorted()) { + flags |= ARROW_FLAG_MAP_KEYS_SORTED; + } + } + return flags; + } +} diff --git a/java/ffi/src/main/java/org/apache/arrow/ffi/Format.java b/java/ffi/src/main/java/org/apache/arrow/ffi/Format.java new file mode 100644 index 00000000000..cdc312c47f5 --- /dev/null +++ b/java/ffi/src/main/java/org/apache/arrow/ffi/Format.java @@ -0,0 +1,334 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.ffi; + +import java.util.Arrays; +import java.util.stream.Collectors; + +import org.apache.arrow.util.Preconditions; +import org.apache.arrow.vector.types.DateUnit; +import org.apache.arrow.vector.types.FloatingPointPrecision; +import org.apache.arrow.vector.types.IntervalUnit; +import org.apache.arrow.vector.types.TimeUnit; +import org.apache.arrow.vector.types.UnionMode; +import org.apache.arrow.vector.types.pojo.ArrowType; + +/** + * Conversion between {@link ArrowType} and string formats, as per C data + * interface specification. + */ +final class Format { + + private Format() { + } + + static String asString(ArrowType arrowType) { + switch (arrowType.getTypeID()) { + case Binary: + return "z"; + case Bool: + return "b"; + case Date: { + ArrowType.Date type = (ArrowType.Date) arrowType; + switch (type.getUnit()) { + case DAY: + return "tdD"; + case MILLISECOND: + return "tdm"; + default: + throw new UnsupportedOperationException( + String.format("Date type with unit %s is unsupported", type.getUnit())); + } + } + case Decimal: { + ArrowType.Decimal type = (ArrowType.Decimal) arrowType; + if (type.getBitWidth() == 128) { + return String.format("d:%d,%d", type.getPrecision(), type.getScale()); + } + return String.format("d:%d,%d,%d", type.getPrecision(), type.getScale(), type.getBitWidth()); + } + case Duration: { + ArrowType.Duration type = (ArrowType.Duration) arrowType; + switch (type.getUnit()) { + case SECOND: + return "tDs"; + case MILLISECOND: + return "tDm"; + case MICROSECOND: + return "tDu"; + case NANOSECOND: + return "tDn"; + default: + throw new UnsupportedOperationException( + String.format("Duration type with unit %s is unsupported", type.getUnit())); + } + } + case FixedSizeBinary: { + ArrowType.FixedSizeBinary type = (ArrowType.FixedSizeBinary) arrowType; + return String.format("w:%d", type.getByteWidth()); + } + case FixedSizeList: { + ArrowType.FixedSizeList type = (ArrowType.FixedSizeList) arrowType; + return String.format("+w:%d", type.getListSize()); + } + case FloatingPoint: { + ArrowType.FloatingPoint type = (ArrowType.FloatingPoint) arrowType; + switch (type.getPrecision()) { + case HALF: + return "e"; + case SINGLE: + return "f"; + case DOUBLE: + return "g"; + default: + throw new UnsupportedOperationException( + String.format("FloatingPoint type with precision %s is unsupported", type.getPrecision())); + } + } + case Int: { + String format; + ArrowType.Int type = (ArrowType.Int) arrowType; + switch (type.getBitWidth()) { + case Byte.SIZE: + format = "C"; + break; + case Short.SIZE: + format = "S"; + break; + case Integer.SIZE: + format = "I"; + break; + case Long.SIZE: + format = "L"; + break; + default: + throw new UnsupportedOperationException( + String.format("Int type with bitwidth %d is unsupported", type.getBitWidth())); + } + if (type.getIsSigned()) { + format = format.toLowerCase(); + } + return format; + } + case Interval: { + ArrowType.Interval type = (ArrowType.Interval) arrowType; + switch (type.getUnit()) { + case DAY_TIME: + return "tiD"; + case YEAR_MONTH: + return "tiM"; + default: + throw new UnsupportedOperationException( + String.format("Interval type with unit %s is unsupported", type.getUnit())); + } + } + case LargeBinary: + return "Z"; + case LargeList: + return "+L"; + case LargeUtf8: + return "U"; + case List: + return "+l"; + case Map: + return "+m"; + case Null: + return "n"; + case Struct: + return "+s"; + case Time: { + ArrowType.Time type = (ArrowType.Time) arrowType; + if (type.getUnit() == TimeUnit.SECOND && type.getBitWidth() == 32) { + return "tts"; + } else if (type.getUnit() == TimeUnit.MILLISECOND && type.getBitWidth() == 32) { + return "ttm"; + } else if (type.getUnit() == TimeUnit.MICROSECOND && type.getBitWidth() == 64) { + return "ttu"; + } else if (type.getUnit() == TimeUnit.NANOSECOND && type.getBitWidth() == 64) { + return "ttn"; + } else { + throw new UnsupportedOperationException(String.format("Time type with unit %s and bitwidth %d is unsupported", + type.getUnit(), type.getBitWidth())); + } + } + case Timestamp: { + String format; + ArrowType.Timestamp type = (ArrowType.Timestamp) arrowType; + switch (type.getUnit()) { + case SECOND: + format = "tss"; + break; + case MILLISECOND: + format = "tsm"; + break; + case MICROSECOND: + format = "tsu"; + break; + case NANOSECOND: + format = "tsn"; + break; + default: + throw new UnsupportedOperationException( + String.format("Timestamp type with unit %s is unsupported", type.getUnit())); + } + String timezone = type.getTimezone(); + return String.format("%s:%s", format, timezone == null ? "" : timezone); + } + case Union: + ArrowType.Union type = (ArrowType.Union) arrowType; + String typeIDs = Arrays.stream(type.getTypeIds()).mapToObj(String::valueOf).collect(Collectors.joining(",")); + switch (type.getMode()) { + case Dense: + return String.format("+ud:%s", typeIDs); + case Sparse: + return String.format("+us:%s", typeIDs); + default: + throw new UnsupportedOperationException( + String.format("Union type with mode %s is unsupported", type.getMode())); + } + case Utf8: + return "u"; + case NONE: + throw new IllegalArgumentException("Arrow type ID is NONE"); + default: + throw new UnsupportedOperationException(String.format("Unknown type id %s", arrowType.getTypeID())); + } + } + + static ArrowType asType(String format, long flags) + throws NumberFormatException, UnsupportedOperationException, IllegalStateException { + switch (format) { + case "n": + return new ArrowType.Null(); + case "b": + return new ArrowType.Bool(); + case "c": + return new ArrowType.Int(8, true); + case "C": + return new ArrowType.Int(8, false); + case "s": + return new ArrowType.Int(16, true); + case "S": + return new ArrowType.Int(16, false); + case "i": + return new ArrowType.Int(32, true); + case "I": + return new ArrowType.Int(32, false); + case "l": + return new ArrowType.Int(64, true); + case "L": + return new ArrowType.Int(64, false); + case "e": + return new ArrowType.FloatingPoint(FloatingPointPrecision.HALF); + case "f": + return new ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE); + case "g": + return new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE); + case "z": + return new ArrowType.Binary(); + case "Z": + return new ArrowType.LargeBinary(); + case "u": + return new ArrowType.Utf8(); + case "U": + return new ArrowType.LargeUtf8(); + case "tdD": + return new ArrowType.Date(DateUnit.DAY); + case "tdm": + return new ArrowType.Date(DateUnit.MILLISECOND); + case "tts": + return new ArrowType.Time(TimeUnit.SECOND, 32); + case "ttm": + return new ArrowType.Time(TimeUnit.MILLISECOND, 32); + case "ttu": + return new ArrowType.Time(TimeUnit.MICROSECOND, 64); + case "ttn": + return new ArrowType.Time(TimeUnit.NANOSECOND, 64); + case "tDs": + return new ArrowType.Duration(TimeUnit.SECOND); + case "tDm": + return new ArrowType.Duration(TimeUnit.MILLISECOND); + case "tDu": + return new ArrowType.Duration(TimeUnit.MICROSECOND); + case "tDn": + return new ArrowType.Duration(TimeUnit.NANOSECOND); + case "tiM": + return new ArrowType.Interval(IntervalUnit.YEAR_MONTH); + case "tiD": + return new ArrowType.Interval(IntervalUnit.DAY_TIME); + case "+l": + return new ArrowType.List(); + case "+L": + return new ArrowType.LargeList(); + case "+s": + return new ArrowType.Struct(); + case "+m": + boolean keysSorted = (flags & Flags.ARROW_FLAG_MAP_KEYS_SORTED) != 0; + return new ArrowType.Map(keysSorted); + default: + String[] parts = format.split(":", 2); + if (parts.length == 2) { + return parseComplexFormat(parts[0], parts[1]); + } + throw new UnsupportedOperationException(String.format("Format %s is not supported", format)); + } + } + + private static ArrowType parseComplexFormat(String format, String payload) + throws NumberFormatException, UnsupportedOperationException, IllegalStateException { + switch (format) { + case "d": { + int[] parts = payloadToIntArray(payload); + Preconditions.checkState(parts.length == 2 || parts.length == 3, "Format %s:%s is illegal", format, payload); + int precision = parts[0]; + int scale = parts[1]; + Integer bitWidth = (parts.length == 3) ? parts[2] : null; + return ArrowType.Decimal.createDecimal(precision, scale, bitWidth); + } + case "w": + return new ArrowType.FixedSizeBinary(Integer.parseInt(payload)); + case "+w": + return new ArrowType.FixedSizeList(Integer.parseInt(payload)); + case "+ud": + return new ArrowType.Union(UnionMode.Dense, payloadToIntArray(payload)); + case "+us": + return new ArrowType.Union(UnionMode.Sparse, payloadToIntArray(payload)); + case "tss": + return new ArrowType.Timestamp(TimeUnit.SECOND, payloadToTimezone(payload)); + case "tsm": + return new ArrowType.Timestamp(TimeUnit.MILLISECOND, payloadToTimezone(payload)); + case "tsu": + return new ArrowType.Timestamp(TimeUnit.MICROSECOND, payloadToTimezone(payload)); + case "tsn": + return new ArrowType.Timestamp(TimeUnit.NANOSECOND, payloadToTimezone(payload)); + default: + throw new UnsupportedOperationException(String.format("Format %s:%s is not supported", format, payload)); + } + } + + private static int[] payloadToIntArray(String payload) throws NumberFormatException { + return Arrays.stream(payload.split(",")).mapToInt(Integer::parseInt).toArray(); + } + + private static String payloadToTimezone(String payload) { + if (payload.isEmpty()) { + return null; + } + return payload; + } +} diff --git a/java/ffi/src/main/java/org/apache/arrow/ffi/Metadata.java b/java/ffi/src/main/java/org/apache/arrow/ffi/Metadata.java new file mode 100644 index 00000000000..e928336eb2f --- /dev/null +++ b/java/ffi/src/main/java/org/apache/arrow/ffi/Metadata.java @@ -0,0 +1,102 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.ffi; + +import static org.apache.arrow.ffi.NativeUtil.NULL; +import static org.apache.arrow.util.Preconditions.checkState; + +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import org.apache.arrow.memory.ArrowBuf; +import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.memory.util.MemoryUtil; + +/** + * Encode and decode metadata. + */ +final class Metadata { + + private Metadata() { + } + + static ArrowBuf encode(BufferAllocator allocator, Map metadata) { + if (metadata == null || metadata.size() == 0) { + return null; + } + + List buffers = new ArrayList<>(metadata.size() * 2); + int totalSize = 4 + metadata.size() * 8; // number of key/value pairs + buffer length fields + for (Map.Entry entry : metadata.entrySet()) { + byte[] keyBuffer = entry.getKey().getBytes(StandardCharsets.UTF_8); + byte[] valueBuffer = entry.getValue().getBytes(StandardCharsets.UTF_8); + totalSize += keyBuffer.length; + totalSize += valueBuffer.length; + buffers.add(keyBuffer); + buffers.add(valueBuffer); + } + + ArrowBuf result = allocator.buffer(totalSize); + ByteBuffer writer = MemoryUtil.directBuffer(result.memoryAddress(), totalSize).order(ByteOrder.nativeOrder()); + writer.putInt(metadata.size()); + for (byte[] buffer : buffers) { + writer.putInt(buffer.length); + writer.put(buffer); + } + return result.slice(0, totalSize); + } + + static Map decode(long bufferAddress) { + if (bufferAddress == NULL) { + return null; + } + + ByteBuffer reader = MemoryUtil.directBuffer(bufferAddress, Integer.MAX_VALUE).order(ByteOrder.nativeOrder()); + + int size = reader.getInt(); + checkState(size >= 0, "Metadata size must not be negative"); + if (size == 0) { + return null; + } + + Map result = new HashMap<>(size); + for (int i = 0; i < size; i++) { + String key = readString(reader); + String value = readString(reader); + result.put(key, value); + } + return result; + } + + private static String readString(ByteBuffer reader) { + int length = reader.getInt(); + checkState(length >= 0, "Metadata item length must not be negative"); + String result = ""; + if (length > 0) { + byte[] dst = new byte[length]; + reader.get(dst); + result = new String(dst, StandardCharsets.UTF_8); + } + return result; + } +} diff --git a/java/ffi/src/main/java/org/apache/arrow/ffi/NativeUtil.java b/java/ffi/src/main/java/org/apache/arrow/ffi/NativeUtil.java new file mode 100644 index 00000000000..51e097dafb5 --- /dev/null +++ b/java/ffi/src/main/java/org/apache/arrow/ffi/NativeUtil.java @@ -0,0 +1,139 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.ffi; + +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import java.nio.charset.StandardCharsets; + +import org.apache.arrow.memory.ArrowBuf; +import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.memory.util.MemoryUtil; + +/** + * Utility functions for working with native memory. + */ +public final class NativeUtil { + public static final byte NULL = 0; + static final int MAX_STRING_LENGTH = Short.MAX_VALUE; + + private NativeUtil() { + } + + /** + * Convert a pointer to a null terminated string into a Java String. + * + * @param cstringPtr pointer to C string + * @return Converted string + */ + public static String toJavaString(long cstringPtr) { + if (cstringPtr == NULL) { + return null; + } + ByteBuffer reader = MemoryUtil.directBuffer(cstringPtr, MAX_STRING_LENGTH).order(ByteOrder.nativeOrder()); + + int length = 0; + while (reader.get() != NULL) { + length++; + } + byte[] bytes = new byte[length]; + ((ByteBuffer) reader.rewind()).get(bytes); + return new String(bytes, 0, length, StandardCharsets.UTF_8); + } + + /** + * Convert a native array pointer (void**) to Java array of pointers. + * + * @param arrayPtr Array pointer + * @param size Array size + * @return Array of pointer values as longs + */ + public static long[] toJavaArray(long arrayPtr, int size) { + if (size == 0 || arrayPtr == NULL) { + return null; + } + if (size < 0) { + throw new IllegalArgumentException("Invalid native array size"); + } + + long[] result = new long[size]; + ByteBuffer reader = MemoryUtil.directBuffer(arrayPtr, Long.BYTES * size).order(ByteOrder.nativeOrder()); + for (int i = 0; i < size; i++) { + result[i] = reader.getLong(); + } + return result; + } + + /** + * Convert Java string to a null terminated string. + * + * @param allocator Buffer allocator for allocating the native string + * @param string Input String to convert + * @return Buffer with a null terminated string or null if the input is null + */ + public static ArrowBuf toNativeString(BufferAllocator allocator, String string) { + if (string == null) { + return null; + } + + byte[] bytes = string.getBytes(StandardCharsets.UTF_8); + ArrowBuf buffer = allocator.buffer(bytes.length + 1); + buffer.writeBytes(bytes); + buffer.writeByte(NULL); + return buffer; + } + + /** + * Close a buffer if it's not null. + * + * @param buf Buffer to close + */ + public static void closeBuffer(ArrowBuf buf) { + if (buf != null) { + buf.close(); + } + } + + /** + * Get the address of a buffer or {@value #NULL} if the input buffer is null. + * + * @param buf Buffer to get the address of + * @return Memory addresss or {@value #NULL} + */ + public static long addressOrNull(ArrowBuf buf) { + if (buf == null) { + return NULL; + } + return buf.memoryAddress(); + } + + /** + * Get the address of a C Data Interface struct or {@value #NULL} if the input + * struct is null. + * + * @param struct C Data Interface struct to get the address of + * @return Memory addresss or {@value #NULL} + */ + public static long addressOrNull(BaseStruct struct) { + if (struct == null) { + return NULL; + } + return struct.memoryAddress(); + } + +} diff --git a/java/ffi/src/main/java/org/apache/arrow/ffi/SchemaExporter.java b/java/ffi/src/main/java/org/apache/arrow/ffi/SchemaExporter.java new file mode 100644 index 00000000000..8b11a121fbb --- /dev/null +++ b/java/ffi/src/main/java/org/apache/arrow/ffi/SchemaExporter.java @@ -0,0 +1,132 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.ffi; + +import static org.apache.arrow.ffi.NativeUtil.NULL; +import static org.apache.arrow.ffi.NativeUtil.addressOrNull; +import static org.apache.arrow.util.Preconditions.checkNotNull; + +import java.util.ArrayList; +import java.util.List; + +import org.apache.arrow.ffi.jni.JniWrapper; +import org.apache.arrow.ffi.jni.PrivateData; +import org.apache.arrow.memory.ArrowBuf; +import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.vector.dictionary.Dictionary; +import org.apache.arrow.vector.dictionary.DictionaryProvider; +import org.apache.arrow.vector.types.pojo.DictionaryEncoding; +import org.apache.arrow.vector.types.pojo.Field; + +/** + * Exporter for {@link ArrowSchema}. + */ +final class SchemaExporter { + private final BufferAllocator allocator; + + public SchemaExporter(BufferAllocator allocator) { + this.allocator = allocator; + } + + /** + * Private data structure for exported schemas. + */ + static class ExportedSchemaPrivateData implements PrivateData { + ArrowBuf format; + ArrowBuf name; + ArrowBuf metadata; + ArrowBuf children_ptrs; + ArrowSchema dictionary; + List children; + + @Override + public void close() { + NativeUtil.closeBuffer(format); + NativeUtil.closeBuffer(name); + NativeUtil.closeBuffer(metadata); + NativeUtil.closeBuffer(children_ptrs); + if (dictionary != null) { + dictionary.close(); + } + if (children != null) { + for (ArrowSchema child : children) { + child.close(); + } + } + } + } + + void export(ArrowSchema schema, Field field, DictionaryProvider dictionaryProvider) { + String name = field.getName(); + String format = Format.asString(field.getType()); + long flags = Flags.forField(field); + List children = field.getChildren(); + DictionaryEncoding dictionaryEncoding = field.getDictionary(); + + ExportedSchemaPrivateData data = new ExportedSchemaPrivateData(); + try { + data.format = NativeUtil.toNativeString(allocator, format); + data.name = NativeUtil.toNativeString(allocator, name); + data.metadata = Metadata.encode(allocator, field.getMetadata()); + + if (children != null) { + data.children = new ArrayList<>(children.size()); + data.children_ptrs = allocator.buffer((long) children.size() * Long.BYTES); + for (int i = 0; i < children.size(); i++) { + ArrowSchema child = ArrowSchema.allocateNew(allocator); + data.children.add(child); + data.children_ptrs.writeLong(child.memoryAddress()); + } + } + + if (dictionaryEncoding != null) { + Dictionary dictionary = dictionaryProvider.lookup(dictionaryEncoding.getId()); + checkNotNull(dictionary, "Dictionary lookup failed on export of field with dictionary"); + + data.dictionary = ArrowSchema.allocateNew(allocator); + export(data.dictionary, dictionary.getVector().getField(), dictionaryProvider); + } + + ArrowSchema.Snapshot snapshot = new ArrowSchema.Snapshot(); + snapshot.format = data.format.memoryAddress(); + snapshot.name = addressOrNull(data.name); + snapshot.metadata = addressOrNull(data.metadata); + snapshot.flags = flags; + snapshot.n_children = (data.children != null) ? data.children.size() : 0; + snapshot.children = addressOrNull(data.children_ptrs); + snapshot.dictionary = addressOrNull(data.dictionary); + snapshot.release = NULL; + schema.save(snapshot); + + // sets release and private data + JniWrapper.get().exportSchema(schema.memoryAddress(), data); + } catch (Exception e) { + data.close(); + throw e; + } + + // Export children + if (children != null) { + for (int i = 0; i < children.size(); i++) { + Field childField = children.get(i); + ArrowSchema child = data.children.get(i); + export(child, childField, dictionaryProvider); + } + } + } +} diff --git a/java/ffi/src/main/java/org/apache/arrow/ffi/SchemaImporter.java b/java/ffi/src/main/java/org/apache/arrow/ffi/SchemaImporter.java new file mode 100644 index 00000000000..89b09379fd2 --- /dev/null +++ b/java/ffi/src/main/java/org/apache/arrow/ffi/SchemaImporter.java @@ -0,0 +1,93 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.ffi; + +import static org.apache.arrow.ffi.NativeUtil.NULL; +import static org.apache.arrow.memory.util.LargeMemoryUtil.checkedCastToInt; +import static org.apache.arrow.util.Preconditions.checkNotNull; +import static org.apache.arrow.util.Preconditions.checkState; + +import java.util.ArrayList; +import java.util.List; +import java.util.Map; + +import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.vector.dictionary.Dictionary; +import org.apache.arrow.vector.types.pojo.ArrowType; +import org.apache.arrow.vector.types.pojo.DictionaryEncoding; +import org.apache.arrow.vector.types.pojo.Field; +import org.apache.arrow.vector.types.pojo.FieldType; + +/** + * Importer for {@link ArrowSchema}. + */ +final class SchemaImporter { + private static final int MAX_IMPORT_RECURSION_LEVEL = 64; + private long nextDictionaryID = 1L; + + private final BufferAllocator allocator; + + public SchemaImporter(BufferAllocator allocator) { + this.allocator = allocator; + } + + Field importField(ArrowSchema schema, FFIDictionaryProvider provider) { + return importField(schema, provider, 0); + } + + private Field importField(ArrowSchema schema, FFIDictionaryProvider provider, + int recursionLevel) { + checkState(recursionLevel < MAX_IMPORT_RECURSION_LEVEL, "Recursion level in ArrowSchema struct exceeded"); + + ArrowSchema.Snapshot snapshot = schema.snapshot(); + checkState(snapshot.release != NULL, "Cannot import released ArrowSchema"); + + String name = NativeUtil.toJavaString(snapshot.name); + String format = NativeUtil.toJavaString(snapshot.format); + checkNotNull(format, "format field must not be null"); + ArrowType arrowType = Format.asType(format, snapshot.flags); + boolean nullable = (snapshot.flags & Flags.ARROW_FLAG_NULLABLE) != 0; + Map metadata = Metadata.decode(snapshot.metadata); + + // Handle dictionary encoded vectors + DictionaryEncoding dictionaryEncoding = null; + if (snapshot.dictionary != NULL && provider != null) { + boolean ordered = (snapshot.flags & Flags.ARROW_FLAG_DICTIONARY_ORDERED) != 0; + ArrowType.Int indexType = (ArrowType.Int) arrowType; + dictionaryEncoding = new DictionaryEncoding(nextDictionaryID++, ordered, indexType); + + ArrowSchema dictionarySchema = ArrowSchema.wrap(snapshot.dictionary); + Field dictionaryField = importField(dictionarySchema, provider, recursionLevel + 1); + provider.put(new Dictionary(dictionaryField.createVector(allocator), dictionaryEncoding)); + } + + FieldType fieldType = new FieldType(nullable, arrowType, dictionaryEncoding, metadata); + + List children = null; + long[] childrenIds = NativeUtil.toJavaArray(snapshot.children, checkedCastToInt(snapshot.n_children)); + if (childrenIds != null) { + children = new ArrayList<>(childrenIds.length); + for (long childAddress : childrenIds) { + ArrowSchema childSchema = ArrowSchema.wrap(childAddress); + Field field = importField(childSchema, provider, recursionLevel + 1); + children.add(field); + } + } + return new Field(name, fieldType, children); + } +} diff --git a/java/ffi/src/main/java/org/apache/arrow/ffi/jni/JniLoader.java b/java/ffi/src/main/java/org/apache/arrow/ffi/jni/JniLoader.java new file mode 100644 index 00000000000..893b736cc5a --- /dev/null +++ b/java/ffi/src/main/java/org/apache/arrow/ffi/jni/JniLoader.java @@ -0,0 +1,95 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.ffi.jni; + +import java.io.File; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.io.InputStream; +import java.nio.file.Files; +import java.nio.file.StandardCopyOption; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +/** + * The JniLoader for C Data Interface API's native implementation. + */ +public class JniLoader { + private static final JniLoader INSTANCE = new JniLoader(Collections.singletonList("arrow_ffi_jni")); + + public static JniLoader get() { + return INSTANCE; + } + + private final Set librariesToLoad; + + private JniLoader(List libraryNames) { + librariesToLoad = new HashSet<>(libraryNames); + } + + private boolean finished() { + return librariesToLoad.isEmpty(); + } + + /** + * If required JNI libraries are not loaded, then load them. + */ + public void ensureLoaded() { + if (finished()) { + return; + } + loadRemaining(); + } + + private synchronized void loadRemaining() { + // The method is protected by a mutex via synchronized, if more than one thread + // race to call + // loadRemaining, at same time only one will do the actual loading and the + // others will wait for + // the mutex to be acquired then check on the remaining list: if there are + // libraries that were not + // successfully loaded then the mutex owner will try to load them again. + if (finished()) { + return; + } + List libs = new ArrayList<>(librariesToLoad); + for (String lib : libs) { + load(lib); + librariesToLoad.remove(lib); + } + } + + private void load(String name) { + final String libraryToLoad = System.mapLibraryName(name); + try { + File temp = File.createTempFile("jnilib-", ".tmp", new File(System.getProperty("java.io.tmpdir"))); + try (final InputStream is = JniWrapper.class.getClassLoader().getResourceAsStream(libraryToLoad)) { + if (is == null) { + throw new FileNotFoundException(libraryToLoad); + } + Files.copy(is, temp.toPath(), StandardCopyOption.REPLACE_EXISTING); + System.load(temp.getAbsolutePath()); + } + } catch (IOException e) { + throw new IllegalStateException("error loading native libraries: " + e); + } + } +} diff --git a/java/ffi/src/main/java/org/apache/arrow/ffi/jni/JniWrapper.java b/java/ffi/src/main/java/org/apache/arrow/ffi/jni/JniWrapper.java new file mode 100644 index 00000000000..a32cc3b6a3f --- /dev/null +++ b/java/ffi/src/main/java/org/apache/arrow/ffi/jni/JniWrapper.java @@ -0,0 +1,41 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.ffi.jni; + +/** + * JniWrapper for C Data Interface API implementation. + */ +public class JniWrapper { + private static final JniWrapper INSTANCE = new JniWrapper(); + + public static JniWrapper get() { + return INSTANCE; + } + + private JniWrapper() { + JniLoader.get().ensureLoaded(); + } + + public native void releaseSchema(long memoryAddress); + + public native void releaseArray(long memoryAddress); + + public native void exportSchema(long memoryAddress, PrivateData privateData); + + public native void exportArray(long memoryAddress, PrivateData data); +} diff --git a/java/ffi/src/main/java/org/apache/arrow/ffi/jni/PrivateData.java b/java/ffi/src/main/java/org/apache/arrow/ffi/jni/PrivateData.java new file mode 100644 index 00000000000..7a9a68dba94 --- /dev/null +++ b/java/ffi/src/main/java/org/apache/arrow/ffi/jni/PrivateData.java @@ -0,0 +1,31 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.ffi.jni; + +import java.io.Closeable; + +/** + * Interface for Java objects stored in C data interface private data. + *

+ * This interface is used for exported structures. + */ +public interface PrivateData extends Closeable { + + @Override + void close(); +} diff --git a/java/ffi/src/main/java/org/apache/arrow/vector/StructVectorLoader.java b/java/ffi/src/main/java/org/apache/arrow/vector/StructVectorLoader.java new file mode 100644 index 00000000000..e92d2246c6a --- /dev/null +++ b/java/ffi/src/main/java/org/apache/arrow/vector/StructVectorLoader.java @@ -0,0 +1,143 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.vector; + +import static org.apache.arrow.util.Preconditions.checkArgument; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.Iterator; +import java.util.List; + +import org.apache.arrow.memory.ArrowBuf; +import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.util.Collections2; +import org.apache.arrow.vector.complex.StructVector; +import org.apache.arrow.vector.compression.CompressionCodec; +import org.apache.arrow.vector.compression.CompressionUtil; +import org.apache.arrow.vector.compression.NoCompressionCodec; +import org.apache.arrow.vector.ipc.message.ArrowFieldNode; +import org.apache.arrow.vector.ipc.message.ArrowRecordBatch; +import org.apache.arrow.vector.types.pojo.Field; +import org.apache.arrow.vector.types.pojo.Schema; + +/** + * Loads buffers into {@link StructVector}. + */ +public class StructVectorLoader { + + private final Schema schema; + private final CompressionCodec.Factory factory; + + /** + * A flag indicating if decompression is needed. This will affect the behavior + * of releasing buffers. + */ + private boolean decompressionNeeded; + + /** + * Construct with a schema. + * + * @param schema buffers are added based on schema. + */ + public StructVectorLoader(Schema schema) { + this(schema, NoCompressionCodec.Factory.INSTANCE); + } + + /** + * Construct with a schema and a compression codec factory. + * + * @param schema buffers are added based on schema. + * @param factory the factory to create codec. + */ + public StructVectorLoader(Schema schema, CompressionCodec.Factory factory) { + this.schema = schema; + this.factory = factory; + } + + /** + * Loads the record batch into the struct vector. + * + *

+ * This will not close the record batch. + * + * @param recordBatch the batch to load + */ + public StructVector load(BufferAllocator allocator, ArrowRecordBatch recordBatch) { + StructVector result = StructVector.empty("", allocator); + + Iterator buffers = recordBatch.getBuffers().iterator(); + Iterator nodes = recordBatch.getNodes().iterator(); + CompressionUtil.CodecType codecType = CompressionUtil.CodecType + .fromCompressionType(recordBatch.getBodyCompression().getCodec()); + decompressionNeeded = codecType != CompressionUtil.CodecType.NO_COMPRESSION; + CompressionCodec codec = decompressionNeeded ? factory.createCodec(codecType) : NoCompressionCodec.INSTANCE; + for (Field field : this.schema.getFields()) { + FieldVector fieldVector = result.addOrGet(field.getName(), field.getFieldType(), FieldVector.class); + loadBuffers(fieldVector, field, buffers, nodes, codec); + } + result.loadFieldBuffers(new ArrowFieldNode(recordBatch.getLength(), 0), Collections.singletonList(null)); + if (nodes.hasNext() || buffers.hasNext()) { + throw new IllegalArgumentException("not all nodes and buffers were consumed. nodes: " + + Collections2.toList(nodes).toString() + " buffers: " + Collections2.toList(buffers).toString()); + } + return result; + } + + private void loadBuffers(FieldVector vector, Field field, Iterator buffers, Iterator nodes, + CompressionCodec codec) { + checkArgument(nodes.hasNext(), "no more field nodes for for field %s and vector %s", field, vector); + ArrowFieldNode fieldNode = nodes.next(); + int bufferLayoutCount = TypeLayout.getTypeBufferCount(field.getType()); + List ownBuffers = new ArrayList<>(bufferLayoutCount); + for (int j = 0; j < bufferLayoutCount; j++) { + ArrowBuf nextBuf = buffers.next(); + // for vectors without nulls, the buffer is empty, so there is no need to + // decompress it. + ArrowBuf bufferToAdd = nextBuf.writerIndex() > 0 ? codec.decompress(vector.getAllocator(), nextBuf) : nextBuf; + ownBuffers.add(bufferToAdd); + if (decompressionNeeded) { + // decompression performed + nextBuf.getReferenceManager().retain(); + } + } + try { + vector.loadFieldBuffers(fieldNode, ownBuffers); + if (decompressionNeeded) { + for (ArrowBuf buf : ownBuffers) { + buf.close(); + } + } + } catch (RuntimeException e) { + throw new IllegalArgumentException( + "Could not load buffers for field " + field + ". error message: " + e.getMessage(), e); + } + List children = field.getChildren(); + if (children.size() > 0) { + List childrenFromFields = vector.getChildrenFromFields(); + checkArgument(children.size() == childrenFromFields.size(), + "should have as many children as in the schema: found %s expected %s", childrenFromFields.size(), + children.size()); + for (int i = 0; i < childrenFromFields.size(); i++) { + Field child = children.get(i); + FieldVector fieldVector = childrenFromFields.get(i); + loadBuffers(fieldVector, child, buffers, nodes, codec); + } + } + } +} diff --git a/java/ffi/src/main/java/org/apache/arrow/vector/StructVectorUnloader.java b/java/ffi/src/main/java/org/apache/arrow/vector/StructVectorUnloader.java new file mode 100644 index 00000000000..8196359e86c --- /dev/null +++ b/java/ffi/src/main/java/org/apache/arrow/vector/StructVectorUnloader.java @@ -0,0 +1,107 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.vector; + +import java.util.ArrayList; +import java.util.List; + +import org.apache.arrow.memory.ArrowBuf; +import org.apache.arrow.vector.complex.StructVector; +import org.apache.arrow.vector.compression.CompressionCodec; +import org.apache.arrow.vector.compression.CompressionUtil; +import org.apache.arrow.vector.compression.NoCompressionCodec; +import org.apache.arrow.vector.ipc.message.ArrowFieldNode; +import org.apache.arrow.vector.ipc.message.ArrowRecordBatch; + +/** + * Helper class that handles converting a {@link StructVector} + * to a {@link ArrowRecordBatch}. + */ +public class StructVectorUnloader { + + private final StructVector root; + private final boolean includeNullCount; + private final CompressionCodec codec; + private final boolean alignBuffers; + + /** + * Constructs a new instance of the given struct vector. + */ + public StructVectorUnloader(StructVector root) { + this(root, true, NoCompressionCodec.INSTANCE, true); + } + + /** + * Constructs a new instance. + * + * @param root The struct vector to serialize to an {@link ArrowRecordBatch}. + * @param includeNullCount Controls whether null count is copied to the {@link ArrowRecordBatch} + * @param alignBuffers Controls if buffers get aligned to 8-byte boundaries. + */ + public StructVectorUnloader( + StructVector root, boolean includeNullCount, boolean alignBuffers) { + this(root, includeNullCount, NoCompressionCodec.INSTANCE, alignBuffers); + } + + /** + * Constructs a new instance. + * + * @param root The struct vector to serialize to an {@link ArrowRecordBatch}. + * @param includeNullCount Controls whether null count is copied to the {@link ArrowRecordBatch} + * @param codec the codec for compressing data. If it is null, then no compression is needed. + * @param alignBuffers Controls if buffers get aligned to 8-byte boundaries. + */ + public StructVectorUnloader( + StructVector root, boolean includeNullCount, CompressionCodec codec, boolean alignBuffers) { + this.root = root; + this.includeNullCount = includeNullCount; + this.codec = codec; + this.alignBuffers = alignBuffers; + } + + /** + * Performs the depth first traversal of the Vectors to create an {@link ArrowRecordBatch} suitable + * for serialization. + */ + public ArrowRecordBatch getRecordBatch() { + List nodes = new ArrayList<>(); + List buffers = new ArrayList<>(); + for (FieldVector vector : root.getChildrenFromFields()) { + appendNodes(vector, nodes, buffers); + } + return new ArrowRecordBatch( + root.getValueCount(), nodes, buffers, CompressionUtil.createBodyCompression(codec), alignBuffers); + } + + private void appendNodes(FieldVector vector, List nodes, List buffers) { + nodes.add(new ArrowFieldNode(vector.getValueCount(), includeNullCount ? vector.getNullCount() : -1)); + List fieldBuffers = vector.getFieldBuffers(); + int expectedBufferCount = TypeLayout.getTypeBufferCount(vector.getField().getType()); + if (fieldBuffers.size() != expectedBufferCount) { + throw new IllegalArgumentException(String.format( + "wrong number of buffers for field %s in vector %s. found: %s", + vector.getField(), vector.getClass().getSimpleName(), fieldBuffers)); + } + for (ArrowBuf buf : fieldBuffers) { + buffers.add(codec.compress(vector.getAllocator(), buf)); + } + for (FieldVector child : vector.getChildrenFromFields()) { + appendNodes(child, nodes, buffers); + } + } +} diff --git a/java/ffi/src/test/java/org/apache/arrow/ffi/DictionaryTest.java b/java/ffi/src/test/java/org/apache/arrow/ffi/DictionaryTest.java new file mode 100644 index 00000000000..8a3e7dd05b7 --- /dev/null +++ b/java/ffi/src/test/java/org/apache/arrow/ffi/DictionaryTest.java @@ -0,0 +1,215 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.ffi; + +import static org.junit.jupiter.api.Assertions.assertTrue; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.nio.channels.Channels; +import java.util.Collections; + +import org.apache.arrow.memory.RootAllocator; +import org.apache.arrow.util.AutoCloseables; +import org.apache.arrow.vector.FieldVector; +import org.apache.arrow.vector.IntVector; +import org.apache.arrow.vector.ValueVector; +import org.apache.arrow.vector.VarCharVector; +import org.apache.arrow.vector.VectorSchemaRoot; +import org.apache.arrow.vector.compare.VectorEqualsVisitor; +import org.apache.arrow.vector.dictionary.Dictionary; +import org.apache.arrow.vector.dictionary.DictionaryEncoder; +import org.apache.arrow.vector.dictionary.DictionaryProvider; +import org.apache.arrow.vector.ipc.ArrowStreamReader; +import org.apache.arrow.vector.ipc.ArrowStreamWriter; +import org.apache.arrow.vector.types.pojo.DictionaryEncoding; +import org.apache.arrow.vector.types.pojo.Schema; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +public class DictionaryTest { + private RootAllocator allocator = null; + + @BeforeEach + public void setUp() { + allocator = new RootAllocator(Long.MAX_VALUE); + } + + @AfterEach + public void tearDown() { + allocator.close(); + } + + void roundtrip(FieldVector vector, DictionaryProvider provider, Class clazz) { + // Consumer allocates empty structures + try (ArrowSchema consumerArrowSchema = ArrowSchema.allocateNew(allocator); + ArrowArray consumerArrowArray = ArrowArray.allocateNew(allocator)) { + + // Producer creates structures from existing memory pointers + try (ArrowSchema arrowSchema = ArrowSchema.wrap(consumerArrowSchema.memoryAddress()); + ArrowArray arrowArray = ArrowArray.wrap(consumerArrowArray.memoryAddress())) { + // Producer exports vector into the FFI structures + FFI.exportVector(allocator, vector, provider, arrowArray, arrowSchema); + } + + // Consumer imports vector + try (FFIDictionaryProvider ffiDictionaryProvider = new FFIDictionaryProvider(); + FieldVector imported = FFI.importVector(allocator, consumerArrowArray, consumerArrowSchema, + ffiDictionaryProvider);) { + assertTrue(clazz.isInstance(imported), String.format("expected %s but was %s", clazz, imported.getClass())); + assertTrue(VectorEqualsVisitor.vectorEquals(vector, imported), "vectors are not equivalent"); + for (long id : ffiDictionaryProvider.getDictionaryIds()) { + ValueVector exportedDictionaryVector = provider.lookup(id).getVector(); + ValueVector importedDictionaryVector = ffiDictionaryProvider.lookup(id).getVector(); + assertTrue(VectorEqualsVisitor.vectorEquals(exportedDictionaryVector, importedDictionaryVector), + String.format("Dictionary vectors for ID %d are not equivalent", id)); + } + } + } + } + + @Test + public void testWithDictionary() throws Exception { + DictionaryProvider.MapDictionaryProvider provider = new DictionaryProvider.MapDictionaryProvider(); + // create dictionary and provider + final VarCharVector dictVector = new VarCharVector("dict", allocator); + dictVector.allocateNewSafe(); + dictVector.setSafe(0, "aa".getBytes()); + dictVector.setSafe(1, "bb".getBytes()); + dictVector.setSafe(2, "cc".getBytes()); + dictVector.setValueCount(3); + + Dictionary dictionary = new Dictionary(dictVector, new DictionaryEncoding(1L, false, /* indexType= */null)); + provider.put(dictionary); + + // create vector and encode it + final VarCharVector vector = new VarCharVector("vector", allocator); + vector.allocateNewSafe(); + vector.setSafe(0, "bb".getBytes()); + vector.setSafe(1, "bb".getBytes()); + vector.setSafe(2, "cc".getBytes()); + vector.setSafe(3, "aa".getBytes()); + vector.setValueCount(4); + + // get the encoded vector + IntVector encodedVector = (IntVector) DictionaryEncoder.encode(vector, dictionary); + + // FFI roundtrip + roundtrip(encodedVector, provider, IntVector.class); + + // Close all + AutoCloseables.close((AutoCloseable) vector, encodedVector, dictVector); + } + + @Test + public void testRoundtripMultipleBatches() throws IOException { + try (ArrowStreamReader reader = createMultiBatchReader(); + ArrowSchema consumerArrowSchema = ArrowSchema.allocateNew(allocator)) { + // Load first batch + reader.loadNextBatch(); + // Producer fills consumer schema stucture + FFI.exportSchema(allocator, reader.getVectorSchemaRoot().getSchema(), reader, consumerArrowSchema); + // Consumer loads it as an empty vector schema root + try (FFIDictionaryProvider consumerDictionaryProvider = new FFIDictionaryProvider(); + VectorSchemaRoot consumerRoot = FFI.importVectorSchemaRoot(allocator, consumerArrowSchema, + consumerDictionaryProvider)) { + do { + try (ArrowArray consumerArray = ArrowArray.allocateNew(allocator)) { + // Producer exports next data + FFI.exportVectorSchemaRoot(allocator, reader.getVectorSchemaRoot(), reader, consumerArray); + // Consumer loads next data + FFI.importIntoVectorSchemaRoot(allocator, consumerArray, consumerRoot, consumerDictionaryProvider); + + // Roundtrip validation + assertTrue(consumerRoot.equals(reader.getVectorSchemaRoot()), "vector schema roots are not equivalent"); + for (long id : consumerDictionaryProvider.getDictionaryIds()) { + ValueVector exportedDictionaryVector = reader.lookup(id).getVector(); + ValueVector importedDictionaryVector = consumerDictionaryProvider.lookup(id).getVector(); + assertTrue(VectorEqualsVisitor.vectorEquals(exportedDictionaryVector, importedDictionaryVector), + String.format("Dictionary vectors for ID %d are not equivalent", id)); + } + } + } + while (reader.loadNextBatch()); + } + } + } + + private ArrowStreamReader createMultiBatchReader() throws IOException { + ByteArrayOutputStream os = new ByteArrayOutputStream(); + try (final VarCharVector dictVector = new VarCharVector("dict", allocator); + IntVector vector = new IntVector("foo", allocator)) { + // create dictionary and provider + DictionaryProvider.MapDictionaryProvider provider = new DictionaryProvider.MapDictionaryProvider(); + dictVector.allocateNewSafe(); + dictVector.setSafe(0, "aa".getBytes()); + dictVector.setSafe(1, "bb".getBytes()); + dictVector.setSafe(2, "cc".getBytes()); + dictVector.setSafe(3, "dd".getBytes()); + dictVector.setSafe(4, "ee".getBytes()); + dictVector.setValueCount(5); + Dictionary dictionary = new Dictionary(dictVector, new DictionaryEncoding(1L, false, /* indexType= */null)); + provider.put(dictionary); + + Schema schema = new Schema(Collections.singletonList(vector.getField())); + try ( + VectorSchemaRoot root = new VectorSchemaRoot(schema, Collections.singletonList(vector), + vector.getValueCount()); + ArrowStreamWriter writer = new ArrowStreamWriter(root, provider, Channels.newChannel(os));) { + + writer.start(); + + // Batch 1 + vector.setNull(0); + vector.setSafe(1, 1); + vector.setSafe(2, 2); + vector.setNull(3); + vector.setSafe(4, 1); + vector.setValueCount(5); + root.setRowCount(5); + writer.writeBatch(); + + // Batch 2 + vector.setNull(0); + vector.setSafe(1, 1); + vector.setSafe(2, 2); + vector.setValueCount(3); + root.setRowCount(3); + writer.writeBatch(); + + // Batch 3 + vector.setSafe(0, 0); + vector.setSafe(1, 1); + vector.setSafe(2, 2); + vector.setSafe(3, 3); + vector.setSafe(4, 4); + vector.setValueCount(5); + root.setRowCount(5); + writer.writeBatch(); + + writer.end(); + } + } + + ByteArrayInputStream in = new ByteArrayInputStream(os.toByteArray()); + return new ArrowStreamReader(in, allocator); + } + +} diff --git a/java/ffi/src/test/java/org/apache/arrow/ffi/FlagsTest.java b/java/ffi/src/test/java/org/apache/arrow/ffi/FlagsTest.java new file mode 100644 index 00000000000..8dad4ee7032 --- /dev/null +++ b/java/ffi/src/test/java/org/apache/arrow/ffi/FlagsTest.java @@ -0,0 +1,75 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.ffi; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +import java.util.ArrayList; + +import org.apache.arrow.vector.types.pojo.ArrowType; +import org.apache.arrow.vector.types.pojo.DictionaryEncoding; +import org.apache.arrow.vector.types.pojo.Field; +import org.apache.arrow.vector.types.pojo.FieldType; +import org.junit.jupiter.api.Test; + +public class FlagsTest { + @Test + public void testForFieldNullableOrderedDict() { + FieldType fieldType = new FieldType(true, ArrowType.Binary.INSTANCE, + new DictionaryEncoding(123L, true, new ArrowType.Int(8, true))); + + assertEquals(Flags.ARROW_FLAG_DICTIONARY_ORDERED | Flags.ARROW_FLAG_NULLABLE, + Flags.forField(new Field("Name", fieldType, new ArrayList<>()))); + } + + @Test + public void testForFieldOrderedDict() { + FieldType fieldType = new FieldType(false, ArrowType.Binary.INSTANCE, + new DictionaryEncoding(123L, true, new ArrowType.Int(8, true))); + assertEquals(Flags.ARROW_FLAG_DICTIONARY_ORDERED, Flags.forField(new Field("Name", fieldType, new ArrayList<>()))); + } + + @Test + public void testForFieldNullableDict() { + FieldType fieldType = new FieldType(true, ArrowType.Binary.INSTANCE, + new DictionaryEncoding(123L, false, new ArrowType.Int(8, true))); + assertEquals(Flags.ARROW_FLAG_NULLABLE, Flags.forField(new Field("Name", fieldType, new ArrayList<>()))); + } + + @Test + public void testForFieldNullable() { + FieldType fieldType = new FieldType(true, ArrowType.Binary.INSTANCE, null); + assertEquals(Flags.ARROW_FLAG_NULLABLE, Flags.forField(new Field("Name", fieldType, new ArrayList<>()))); + } + + @Test + public void testForFieldNullableOrderedSortedMap() { + ArrowType.Map type = new ArrowType.Map(true); + FieldType fieldType = new FieldType(true, type, new DictionaryEncoding(123L, true, new ArrowType.Int(8, true))); + assertEquals(Flags.ARROW_FLAG_DICTIONARY_ORDERED | Flags.ARROW_FLAG_NULLABLE | Flags.ARROW_FLAG_MAP_KEYS_SORTED, + Flags.forField(new Field("Name", fieldType, new ArrayList<>()))); + } + + @Test + public void testForFieldNullableOrderedMap() { + ArrowType.Map type = new ArrowType.Map(false); + FieldType fieldType = new FieldType(true, type, new DictionaryEncoding(123L, true, new ArrowType.Int(8, true))); + assertEquals(Flags.ARROW_FLAG_DICTIONARY_ORDERED | Flags.ARROW_FLAG_NULLABLE, + Flags.forField(new Field("Name", fieldType, new ArrayList<>()))); + } +} diff --git a/java/ffi/src/test/java/org/apache/arrow/ffi/FormatTest.java b/java/ffi/src/test/java/org/apache/arrow/ffi/FormatTest.java new file mode 100644 index 00000000000..40b5eff45ea --- /dev/null +++ b/java/ffi/src/test/java/org/apache/arrow/ffi/FormatTest.java @@ -0,0 +1,143 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.ffi; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import org.apache.arrow.vector.types.DateUnit; +import org.apache.arrow.vector.types.FloatingPointPrecision; +import org.apache.arrow.vector.types.IntervalUnit; +import org.apache.arrow.vector.types.TimeUnit; +import org.apache.arrow.vector.types.UnionMode; +import org.apache.arrow.vector.types.pojo.ArrowType; +import org.junit.jupiter.api.Test; + +public class FormatTest { + @Test + public void testAsString() { + assertEquals("z", Format.asString(new ArrowType.Binary())); + assertEquals("b", Format.asString(new ArrowType.Bool())); + assertEquals("tdD", Format.asString(new ArrowType.Date(DateUnit.DAY))); + assertEquals("tdm", Format.asString(new ArrowType.Date(DateUnit.MILLISECOND))); + assertEquals("d:1,1", Format.asString(new ArrowType.Decimal(1, 1, 128))); + assertEquals("d:1,1,1", Format.asString(new ArrowType.Decimal(1, 1, 1))); + assertEquals("d:9,1,1", Format.asString(new ArrowType.Decimal(9, 1, 1))); + assertEquals("tDs", Format.asString(new ArrowType.Duration(TimeUnit.SECOND))); + assertEquals("tDm", Format.asString(new ArrowType.Duration(TimeUnit.MILLISECOND))); + assertEquals("tDu", Format.asString(new ArrowType.Duration(TimeUnit.MICROSECOND))); + assertEquals("tDn", Format.asString(new ArrowType.Duration(TimeUnit.NANOSECOND))); + assertEquals("w:1", Format.asString(new ArrowType.FixedSizeBinary(1))); + assertEquals("+w:3", Format.asString(new ArrowType.FixedSizeList(3))); + assertEquals("e", Format.asString(new ArrowType.FloatingPoint(FloatingPointPrecision.HALF))); + assertEquals("f", Format.asString(new ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE))); + assertEquals("g", Format.asString(new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE))); + assertEquals("c", Format.asString(new ArrowType.Int(Byte.SIZE, true))); + assertEquals("C", Format.asString(new ArrowType.Int(Byte.SIZE, false))); + assertEquals("s", Format.asString(new ArrowType.Int(Short.SIZE, true))); + assertEquals("S", Format.asString(new ArrowType.Int(Short.SIZE, false))); + assertEquals("i", Format.asString(new ArrowType.Int(Integer.SIZE, true))); + assertEquals("I", Format.asString(new ArrowType.Int(Integer.SIZE, false))); + assertEquals("l", Format.asString(new ArrowType.Int(Long.SIZE, true))); + assertEquals("L", Format.asString(new ArrowType.Int(Long.SIZE, false))); + assertEquals("tiD", Format.asString(new ArrowType.Interval(IntervalUnit.DAY_TIME))); + assertEquals("tiM", Format.asString(new ArrowType.Interval(IntervalUnit.YEAR_MONTH))); + assertEquals("Z", Format.asString(new ArrowType.LargeBinary())); + assertEquals("+L", Format.asString(new ArrowType.LargeList())); + assertEquals("U", Format.asString(new ArrowType.LargeUtf8())); + assertEquals("+l", Format.asString(new ArrowType.List())); + assertEquals("+m", Format.asString(new ArrowType.Map(true))); + assertEquals("n", Format.asString(new ArrowType.Null())); + assertEquals("+s", Format.asString(new ArrowType.Struct())); + assertEquals("tts", Format.asString(new ArrowType.Time(TimeUnit.SECOND, 32))); + assertEquals("ttm", Format.asString(new ArrowType.Time(TimeUnit.MILLISECOND, 32))); + assertEquals("ttu", Format.asString(new ArrowType.Time(TimeUnit.MICROSECOND, 64))); + assertEquals("ttn", Format.asString(new ArrowType.Time(TimeUnit.NANOSECOND, 64))); + assertEquals("tss:Timezone", Format.asString(new ArrowType.Timestamp(TimeUnit.SECOND, "Timezone"))); + assertEquals("tsm:Timezone", Format.asString(new ArrowType.Timestamp(TimeUnit.MILLISECOND, "Timezone"))); + assertEquals("tsu:Timezone", Format.asString(new ArrowType.Timestamp(TimeUnit.MICROSECOND, "Timezone"))); + assertEquals("tsn:Timezone", Format.asString(new ArrowType.Timestamp(TimeUnit.NANOSECOND, "Timezone"))); + assertEquals("+us:1,1,1", + Format.asString(new ArrowType.Union(UnionMode.Sparse, new int[]{1, 1, 1}))); + assertEquals("+ud:1,1,1", + Format.asString(new ArrowType.Union(UnionMode.Dense, new int[]{1, 1, 1}))); + assertEquals("u", Format.asString(new ArrowType.Utf8())); + + assertThrows(UnsupportedOperationException.class, () -> + Format.asString(new ArrowType.Int(1, true))); + assertThrows(UnsupportedOperationException.class, () -> + Format.asString(new ArrowType.Time(TimeUnit.SECOND, 1))); + assertThrows(UnsupportedOperationException.class, () -> + Format.asString(new ArrowType.Time(TimeUnit.MILLISECOND, 64))); + } + + @Test + public void testAsType() throws IllegalStateException, NumberFormatException, UnsupportedOperationException { + assertTrue(Format.asType("n", 0L) instanceof ArrowType.Null); + assertTrue(Format.asType("b", 0L) instanceof ArrowType.Bool); + assertEquals(new ArrowType.Int(Byte.SIZE, true), Format.asType("c", 0L)); + assertEquals(new ArrowType.Int(Byte.SIZE, false), Format.asType("C", 0L)); + assertEquals(new ArrowType.Int(Short.SIZE, true), Format.asType("s", 0L)); + assertEquals(new ArrowType.Int(Short.SIZE, false), Format.asType("S", 0L)); + assertEquals(new ArrowType.Int(Integer.SIZE, true), Format.asType("i", 0L)); + assertEquals(new ArrowType.Int(Integer.SIZE, false), Format.asType("I", 0L)); + assertEquals(new ArrowType.Int(Long.SIZE, true), Format.asType("l", 0L)); + assertEquals(new ArrowType.Int(Long.SIZE, false), Format.asType("L", 0L)); + assertEquals(new ArrowType.FloatingPoint(FloatingPointPrecision.HALF), Format.asType("e", 0L)); + assertEquals(new ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE), Format.asType("f", 0L)); + assertEquals(new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE), Format.asType("g", 0L)); + assertTrue(Format.asType("z", 0L) instanceof ArrowType.Binary); + assertTrue(Format.asType("Z", 0L) instanceof ArrowType.LargeBinary); + assertTrue(Format.asType("u", 0L) instanceof ArrowType.Utf8); + assertTrue(Format.asType("U", 0L) instanceof ArrowType.LargeUtf8); + assertEquals(new ArrowType.Date(DateUnit.DAY), Format.asType("tdD", 0L)); + assertEquals(new ArrowType.Date(DateUnit.MILLISECOND), Format.asType("tdm", 0L)); + assertEquals(new ArrowType.Time(TimeUnit.SECOND, Integer.SIZE), Format.asType("tts", 0L)); + assertEquals(new ArrowType.Time(TimeUnit.MILLISECOND, Integer.SIZE), Format.asType("ttm", 0L)); + assertEquals(new ArrowType.Time(TimeUnit.MICROSECOND, Long.SIZE), Format.asType("ttu", 0L)); + assertEquals(new ArrowType.Time(TimeUnit.NANOSECOND, Long.SIZE), Format.asType("ttn", 0L)); + assertEquals(new ArrowType.Duration(TimeUnit.SECOND), Format.asType("tDs", 0L)); + assertEquals(new ArrowType.Duration(TimeUnit.MILLISECOND), Format.asType("tDm", 0L)); + assertEquals(new ArrowType.Duration(TimeUnit.MICROSECOND), Format.asType("tDu", 0L)); + assertEquals(new ArrowType.Duration(TimeUnit.NANOSECOND), Format.asType("tDn", 0L)); + assertEquals(new ArrowType.Interval(IntervalUnit.YEAR_MONTH), Format.asType("tiM", 0L)); + assertEquals(new ArrowType.Interval(IntervalUnit.DAY_TIME), Format.asType("tiD", 0L)); + assertTrue(Format.asType("+l", 0L) instanceof ArrowType.List); + assertTrue(Format.asType("+L", 0L) instanceof ArrowType.LargeList); + assertTrue(Format.asType("+s", 0L) instanceof ArrowType.Struct); + assertEquals(new ArrowType.Map(false), Format.asType("+m", 0L)); + assertEquals(new ArrowType.Map(true), Format.asType("+m", Flags.ARROW_FLAG_MAP_KEYS_SORTED)); + assertEquals(new ArrowType.Decimal(1, 1, 128), Format.asType("d:1,1", 0L)); + assertEquals(new ArrowType.Decimal(1, 1, 1), Format.asType("d:1,1,1", 0L)); + assertEquals(new ArrowType.Decimal(9, 1, 1), Format.asType("d:9,1,1", 0L)); + assertEquals(new ArrowType.FixedSizeBinary(1), Format.asType("w:1", 0L)); + assertEquals(new ArrowType.FixedSizeList(3), Format.asType("+w:3", 0L)); + assertEquals(new ArrowType.Union(UnionMode.Dense, new int[]{1, 1, 1}), Format.asType("+ud:1,1,1", 0L)); + assertEquals(new ArrowType.Union(UnionMode.Sparse, new int[]{1, 1, 1}), Format.asType("+us:1,1,1", 0L)); + assertEquals(new ArrowType.Timestamp(TimeUnit.SECOND, "Timezone"), Format.asType("tss:Timezone", 0L)); + assertEquals(new ArrowType.Timestamp(TimeUnit.MILLISECOND, "Timezone"), Format.asType("tsm:Timezone", 0L)); + assertEquals(new ArrowType.Timestamp(TimeUnit.MICROSECOND, "Timezone"), Format.asType("tsu:Timezone", 0L)); + assertEquals(new ArrowType.Timestamp(TimeUnit.NANOSECOND, "Timezone"), Format.asType("tsn:Timezone", 0L)); + + assertThrows(UnsupportedOperationException.class, () -> Format.asType("Format", 0L)); + assertThrows(UnsupportedOperationException.class, () -> Format.asType(":", 0L)); + assertThrows(NumberFormatException.class, () -> Format.asType("w:1,2,3", 0L)); + } +} + diff --git a/java/ffi/src/test/java/org/apache/arrow/ffi/MetadataTest.java b/java/ffi/src/test/java/org/apache/arrow/ffi/MetadataTest.java new file mode 100644 index 00000000000..019f2d63a63 --- /dev/null +++ b/java/ffi/src/test/java/org/apache/arrow/ffi/MetadataTest.java @@ -0,0 +1,102 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.ffi; + +import static org.junit.jupiter.api.Assertions.*; + +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import java.util.HashMap; +import java.util.Map; + +import org.apache.arrow.memory.ArrowBuf; +import org.apache.arrow.memory.RootAllocator; +import org.apache.arrow.memory.util.LargeMemoryUtil; +import org.apache.arrow.memory.util.MemoryUtil; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +public class MetadataTest { + private RootAllocator allocator = null; + + private static Map metadata; + private static byte[] encoded; + + @BeforeAll + static void beforeAll() { + metadata = new HashMap<>(); + metadata.put("key1", ""); + metadata.put("key2", "bar"); + + if (ByteOrder.nativeOrder() == ByteOrder.LITTLE_ENDIAN) { + encoded = new byte[] { 2, 0, 0, 0, 4, 0, 0, 0, 'k', 'e', 'y', '1', 0, 0, 0, 0, 4, 0, 0, 0, 'k', 'e', 'y', '2', 3, + 0, 0, 0, 'b', 'a', 'r' }; + } else { + encoded = new byte[] { 0, 0, 0, 2, 0, 0, 0, 4, 'k', 'e', 'y', '1', 0, 0, 0, 0, 0, 0, 0, 4, 'k', 'e', 'y', '2', 0, + 0, 0, 3, 'b', 'a', 'r' }; + } + } + + @BeforeEach + public void setUp() { + allocator = new RootAllocator(Long.MAX_VALUE); + } + + @AfterEach + public void tearDown() { + allocator.close(); + } + + @Test + public void testEncode() { + try (ArrowBuf buffer = Metadata.encode(allocator, metadata)) { + int totalSize = LargeMemoryUtil.checkedCastToInt(buffer.readableBytes()); + ByteBuffer reader = MemoryUtil.directBuffer(buffer.memoryAddress(), totalSize).order(ByteOrder.nativeOrder()); + byte[] result = new byte[totalSize]; + reader.get(result); + assertArrayEquals(encoded, result); + } + } + + @Test + public void testDecode() { + try (ArrowBuf buffer = allocator.buffer(31)) { + buffer.setBytes(0, encoded); + Map decoded = Metadata.decode(buffer.memoryAddress()); + assertNotNull(decoded); + assertEquals(metadata, decoded); + } + } + + @Test + public void testEncodeEmpty() { + Map metadata = new HashMap<>(); + try (ArrowBuf encoded = Metadata.encode(allocator, metadata)) { + assertNull(encoded); + } + } + + @Test + public void testDecodeEmpty() { + Map decoded = Metadata.decode(NativeUtil.NULL); + assertNull(decoded); + } + +} diff --git a/java/ffi/src/test/java/org/apache/arrow/ffi/RoundtripTest.java b/java/ffi/src/test/java/org/apache/arrow/ffi/RoundtripTest.java new file mode 100644 index 00000000000..600a8fb16bc --- /dev/null +++ b/java/ffi/src/test/java/org/apache/arrow/ffi/RoundtripTest.java @@ -0,0 +1,665 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.ffi; + +import static org.apache.arrow.vector.testing.ValueVectorDataPopulator.setVector; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; + +import org.apache.arrow.memory.ArrowBuf; +import org.apache.arrow.memory.RootAllocator; +import org.apache.arrow.vector.BigIntVector; +import org.apache.arrow.vector.BitVector; +import org.apache.arrow.vector.DateDayVector; +import org.apache.arrow.vector.DateMilliVector; +import org.apache.arrow.vector.DecimalVector; +import org.apache.arrow.vector.DurationVector; +import org.apache.arrow.vector.FieldVector; +import org.apache.arrow.vector.FixedSizeBinaryVector; +import org.apache.arrow.vector.Float4Vector; +import org.apache.arrow.vector.Float8Vector; +import org.apache.arrow.vector.IntVector; +import org.apache.arrow.vector.IntervalDayVector; +import org.apache.arrow.vector.IntervalYearVector; +import org.apache.arrow.vector.LargeVarBinaryVector; +import org.apache.arrow.vector.LargeVarCharVector; +import org.apache.arrow.vector.NullVector; +import org.apache.arrow.vector.SmallIntVector; +import org.apache.arrow.vector.TimeMicroVector; +import org.apache.arrow.vector.TimeMilliVector; +import org.apache.arrow.vector.TimeNanoVector; +import org.apache.arrow.vector.TimeSecVector; +import org.apache.arrow.vector.TimeStampMicroTZVector; +import org.apache.arrow.vector.TimeStampMicroVector; +import org.apache.arrow.vector.TimeStampMilliTZVector; +import org.apache.arrow.vector.TimeStampMilliVector; +import org.apache.arrow.vector.TimeStampNanoTZVector; +import org.apache.arrow.vector.TimeStampNanoVector; +import org.apache.arrow.vector.TimeStampSecTZVector; +import org.apache.arrow.vector.TimeStampSecVector; +import org.apache.arrow.vector.TinyIntVector; +import org.apache.arrow.vector.UInt1Vector; +import org.apache.arrow.vector.UInt2Vector; +import org.apache.arrow.vector.UInt4Vector; +import org.apache.arrow.vector.UInt8Vector; +import org.apache.arrow.vector.ValueVector; +import org.apache.arrow.vector.VarBinaryVector; +import org.apache.arrow.vector.VarCharVector; +import org.apache.arrow.vector.VectorSchemaRoot; +import org.apache.arrow.vector.ZeroVector; +import org.apache.arrow.vector.compare.VectorEqualsVisitor; +import org.apache.arrow.vector.complex.FixedSizeListVector; +import org.apache.arrow.vector.complex.LargeListVector; +import org.apache.arrow.vector.complex.ListVector; +import org.apache.arrow.vector.complex.MapVector; +import org.apache.arrow.vector.complex.StructVector; +import org.apache.arrow.vector.complex.UnionVector; +import org.apache.arrow.vector.complex.impl.UnionMapWriter; +import org.apache.arrow.vector.holders.IntervalDayHolder; +import org.apache.arrow.vector.holders.NullableLargeVarBinaryHolder; +import org.apache.arrow.vector.holders.NullableUInt4Holder; +import org.apache.arrow.vector.types.TimeUnit; +import org.apache.arrow.vector.types.Types.MinorType; +import org.apache.arrow.vector.types.pojo.ArrowType; +import org.apache.arrow.vector.types.pojo.Field; +import org.apache.arrow.vector.types.pojo.FieldType; +import org.apache.arrow.vector.types.pojo.Schema; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +public class RoundtripTest { + private static final String EMPTY_SCHEMA_PATH = ""; + private RootAllocator allocator = null; + + @BeforeEach + public void setUp() { + allocator = new RootAllocator(Long.MAX_VALUE); + } + + @AfterEach + public void tearDown() { + allocator.close(); + } + + FieldVector vectorRoundtrip(FieldVector vector) { + // Consumer allocates empty structures + try (ArrowSchema consumerArrowSchema = ArrowSchema.allocateNew(allocator); + ArrowArray consumerArrowArray = ArrowArray.allocateNew(allocator)) { + + // Producer creates structures from existing memory pointers + try (ArrowSchema arrowSchema = ArrowSchema.wrap(consumerArrowSchema.memoryAddress()); + ArrowArray arrowArray = ArrowArray.wrap(consumerArrowArray.memoryAddress())) { + // Producer exports vector into the FFI structures + FFI.exportVector(allocator, vector, null, arrowArray, arrowSchema); + } + + // Consumer imports vector + return FFI.importVector(allocator, consumerArrowArray, consumerArrowSchema, null); + } + } + + boolean roundtrip(FieldVector vector, Class clazz) { + try (ValueVector imported = vectorRoundtrip(vector)) { + assertTrue(clazz.isInstance(imported), String.format("expected %s but was %s", clazz, imported.getClass())); + return VectorEqualsVisitor.vectorEquals(vector, imported); + } + } + + @Test + public void testBitVector() { + BitVector imported; + + try (final BitVector vector = new BitVector(EMPTY_SCHEMA_PATH, allocator)) { + vector.allocateNew(1024); + vector.setValueCount(1024); + + // Put and set a few values + vector.set(0, 1); + vector.set(1, 0); + vector.set(100, 0); + vector.set(1022, 1); + + vector.setValueCount(1024); + + imported = (BitVector) vectorRoundtrip(vector); + assertTrue(VectorEqualsVisitor.vectorEquals(vector, imported)); + } + + assertEquals(1, imported.get(0)); + assertEquals(0, imported.get(1)); + assertEquals(0, imported.get(100)); + assertEquals(1, imported.get(1022)); + assertEquals(1020, imported.getNullCount()); + imported.close(); + } + + @Test + public void testIntVector() { + IntVector imported; + try (final IntVector vector = new IntVector("v", allocator)) { + setVector(vector, 1, 2, 3, null); + imported = (IntVector) vectorRoundtrip(vector); + assertTrue(VectorEqualsVisitor.vectorEquals(vector, imported)); + } + assertEquals(1, imported.get(0)); + assertEquals(2, imported.get(1)); + assertEquals(3, imported.get(2)); + assertEquals(4, imported.getValueCount()); + assertEquals(1, imported.getNullCount()); + imported.close(); + } + + @Test + public void testBigIntVector() { + BigIntVector imported; + try (final BigIntVector vector = new BigIntVector("v", allocator)) { + setVector(vector, 1L, 2L, 3L, null); + imported = (BigIntVector) vectorRoundtrip(vector); + assertTrue(VectorEqualsVisitor.vectorEquals(vector, imported)); + } + assertEquals(1, imported.get(0)); + assertEquals(2, imported.get(1)); + assertEquals(3, imported.get(2)); + assertEquals(4, imported.getValueCount()); + assertEquals(1, imported.getNullCount()); + imported.close(); + } + + @Test + public void testDateDayVector() { + DateDayVector imported; + try (final DateDayVector vector = new DateDayVector("v", allocator)) { + setVector(vector, 1, 2, 3, null); + imported = (DateDayVector) vectorRoundtrip(vector); + assertTrue(VectorEqualsVisitor.vectorEquals(vector, imported)); + } + assertEquals(1, imported.get(0)); + assertEquals(2, imported.get(1)); + assertEquals(3, imported.get(2)); + assertEquals(4, imported.getValueCount()); + assertEquals(1, imported.getNullCount()); + imported.close(); + } + + @Test + public void testDateMilliVector() { + DateMilliVector imported; + try (final DateMilliVector vector = new DateMilliVector("v", allocator)) { + setVector(vector, 1L, 2L, 3L, null); + imported = (DateMilliVector) vectorRoundtrip(vector); + assertTrue(VectorEqualsVisitor.vectorEquals(vector, imported)); + } + assertEquals(1, imported.get(0)); + assertEquals(2, imported.get(1)); + assertEquals(3, imported.get(2)); + assertEquals(4, imported.getValueCount()); + assertEquals(1, imported.getNullCount()); + imported.close(); + } + + @Test + public void testDecimalVector() { + try (final DecimalVector vector = new DecimalVector("v", allocator, 1, 1)) { + setVector(vector, 1L, 2L, 3L, null); + assertTrue(roundtrip(vector, DecimalVector.class)); + } + } + + @Test + public void testDurationVector() { + for (TimeUnit unit : TimeUnit.values()) { + final FieldType fieldType = FieldType.nullable(new ArrowType.Duration(unit)); + try (final DurationVector vector = new DurationVector("v", fieldType, allocator)) { + setVector(vector, 1L, 2L, 3L, null); + assertTrue(roundtrip(vector, DurationVector.class)); + } + } + } + + @Test + public void testZeroVectorEquals() { + try (final ZeroVector vector = new ZeroVector()) { + // A ZeroVector is imported as a NullVector + assertTrue(roundtrip(vector, NullVector.class)); + } + } + + @Test + public void testFixedSizeBinaryVector() { + try (final FixedSizeBinaryVector vector = new FixedSizeBinaryVector("v", allocator, 2)) { + setVector(vector, new byte[] { 0b0000, 0b0001 }, new byte[] { 0b0010, 0b0011 }); + assertTrue(roundtrip(vector, FixedSizeBinaryVector.class)); + } + } + + @Test + public void testFloat4Vector() { + try (final Float4Vector vector = new Float4Vector("v", allocator)) { + setVector(vector, 0.1f, 0.2f, 0.3f, null); + assertTrue(roundtrip(vector, Float4Vector.class)); + } + } + + @Test + public void testFloat8Vector() { + try (final Float8Vector vector = new Float8Vector("v", allocator)) { + setVector(vector, 0.1d, 0.2d, 0.3d, null); + assertTrue(roundtrip(vector, Float8Vector.class)); + } + } + + @Test + public void testIntervalDayVector() { + try (final IntervalDayVector vector = new IntervalDayVector("v", allocator)) { + IntervalDayHolder value = new IntervalDayHolder(); + value.days = 5; + value.milliseconds = 100; + setVector(vector, value, null); + assertTrue(roundtrip(vector, IntervalDayVector.class)); + } + } + + @Test + public void testIntervalYearVector() { + try (final IntervalYearVector vector = new IntervalYearVector("v", allocator)) { + setVector(vector, 1990, 2000, 2010, 2020, null); + assertTrue(roundtrip(vector, IntervalYearVector.class)); + } + } + + @Test + public void testSmallIntVector() { + try (final SmallIntVector vector = new SmallIntVector("v", allocator)) { + setVector(vector, (short) 0, (short) 256, null); + assertTrue(roundtrip(vector, SmallIntVector.class)); + } + } + + @Test + public void testTimeMicroVector() { + try (final TimeMicroVector vector = new TimeMicroVector("v", allocator)) { + setVector(vector, 0L, 1L, 2L, 3L, null); + assertTrue(roundtrip(vector, TimeMicroVector.class)); + } + } + + @Test + public void testTimeMilliVector() { + try (final TimeMilliVector vector = new TimeMilliVector("v", allocator)) { + setVector(vector, 0, 1, 2, 3, null); + assertTrue(roundtrip(vector, TimeMilliVector.class)); + } + } + + @Test + public void testTimeNanoVector() { + try (final TimeNanoVector vector = new TimeNanoVector("v", allocator)) { + setVector(vector, 0L, 1L, 2L, 3L, null); + assertTrue(roundtrip(vector, TimeNanoVector.class)); + } + } + + @Test + public void testTimeSecVector() { + try (final TimeSecVector vector = new TimeSecVector("v", allocator)) { + setVector(vector, 0, 1, 2, 3, null); + assertTrue(roundtrip(vector, TimeSecVector.class)); + } + } + + @Test + public void testTimeStampMicroTZVector() { + try (final TimeStampMicroTZVector vector = new TimeStampMicroTZVector("v", allocator, "UTC")) { + setVector(vector, 0L, 1L, 2L, 3L, null); + assertTrue(roundtrip(vector, TimeStampMicroTZVector.class)); + } + } + + @Test + public void testTimeStampMicroVector() { + try (final TimeStampMicroVector vector = new TimeStampMicroVector("v", allocator)) { + setVector(vector, 0L, 1L, 2L, 3L, null); + assertTrue(roundtrip(vector, TimeStampMicroVector.class)); + } + } + + @Test + public void testTimeStampMilliTZVector() { + try (final TimeStampMilliTZVector vector = new TimeStampMilliTZVector("v", allocator, "UTC")) { + setVector(vector, 0L, 1L, 2L, 3L, null); + assertTrue(roundtrip(vector, TimeStampMilliTZVector.class)); + } + } + + @Test + public void testTimeStampMilliVector() { + try (final TimeStampMilliVector vector = new TimeStampMilliVector("v", allocator)) { + setVector(vector, 0L, 1L, 2L, 3L, null); + assertTrue(roundtrip(vector, TimeStampMilliVector.class)); + } + } + + @Test + public void testTimeTimeStampNanoTZVector() { + try (final TimeStampNanoTZVector vector = new TimeStampNanoTZVector("v", allocator, "UTC")) { + setVector(vector, 0L, 1L, 2L, 3L, null); + assertTrue(roundtrip(vector, TimeStampNanoTZVector.class)); + } + } + + @Test + public void testTimeStampNanoVector() { + try (final TimeStampNanoVector vector = new TimeStampNanoVector("v", allocator)) { + setVector(vector, 0L, 1L, 2L, 3L, null); + assertTrue(roundtrip(vector, TimeStampNanoVector.class)); + } + } + + @Test + public void testTimeStampSecTZVector() { + try (final TimeStampSecTZVector vector = new TimeStampSecTZVector("v", allocator, "UTC")) { + setVector(vector, 0L, 1L, 2L, 3L, null); + assertTrue(roundtrip(vector, TimeStampSecTZVector.class)); + } + } + + @Test + public void testTimeStampSecVector() { + try (final TimeStampSecVector vector = new TimeStampSecVector("v", allocator)) { + setVector(vector, 0L, 1L, 2L, 3L, null); + assertTrue(roundtrip(vector, TimeStampSecVector.class)); + } + } + + @Test + public void testTinyIntVector() { + try (final TinyIntVector vector = new TinyIntVector("v", allocator)) { + setVector(vector, (byte) 0, (byte) 1, null); + assertTrue(roundtrip(vector, TinyIntVector.class)); + } + } + + @Test + public void testUInt1Vector() { + try (final UInt1Vector vector = new UInt1Vector("v", allocator)) { + setVector(vector, (byte) 0, (byte) 1, null); + assertTrue(roundtrip(vector, UInt1Vector.class)); + } + } + + @Test + public void testUInt2Vector() { + try (final UInt2Vector vector = new UInt2Vector("v", allocator)) { + setVector(vector, '0', '1', null); + assertTrue(roundtrip(vector, UInt2Vector.class)); + } + } + + @Test + public void testUInt4Vector() { + try (final UInt4Vector vector = new UInt4Vector("v", allocator)) { + setVector(vector, 0, 1, null); + assertTrue(roundtrip(vector, UInt4Vector.class)); + } + } + + @Test + public void testUInt8Vector() { + try (final UInt8Vector vector = new UInt8Vector("v", allocator)) { + setVector(vector, 0L, 1L, null); + assertTrue(roundtrip(vector, UInt8Vector.class)); + } + } + + @Test + public void testVarBinaryVector() { + try (final VarBinaryVector vector = new VarBinaryVector("v", allocator)) { + setVector(vector, "abc".getBytes(), "def".getBytes(), null); + assertTrue(roundtrip(vector, VarBinaryVector.class)); + } + } + + @Test + public void testVarCharVector() { + try (final VarCharVector vector = new VarCharVector("v", allocator)) { + setVector(vector, "abc", "def", null); + assertTrue(roundtrip(vector, VarCharVector.class)); + } + } + + @Test + public void testLargeVarBinaryVector() { + try (final LargeVarBinaryVector vector = new LargeVarBinaryVector("", allocator)) { + vector.allocateNew(5, 1); + + NullableLargeVarBinaryHolder nullHolder = new NullableLargeVarBinaryHolder(); + nullHolder.isSet = 0; + + NullableLargeVarBinaryHolder binHolder = new NullableLargeVarBinaryHolder(); + binHolder.isSet = 1; + + String str = "hello world"; + try (ArrowBuf buf = allocator.buffer(16)) { + buf.setBytes(0, str.getBytes()); + binHolder.start = 0; + binHolder.end = str.length(); + binHolder.buffer = buf; + vector.setSafe(0, binHolder); + vector.setSafe(1, nullHolder); + + assertTrue(roundtrip(vector, LargeVarBinaryVector.class)); + } + } + } + + @Test + public void testLargeVarCharVector() { + try (final LargeVarCharVector vector = new LargeVarCharVector("v", allocator)) { + setVector(vector, "abc", "def", null); + assertTrue(roundtrip(vector, LargeVarCharVector.class)); + } + } + + @Test + public void testListVector() { + try (final ListVector vector = ListVector.empty("v", allocator)) { + setVector(vector, Arrays.stream(new int[] { 1, 2 }).boxed().collect(Collectors.toList()), + Arrays.stream(new int[] { 3, 4 }).boxed().collect(Collectors.toList()), new ArrayList()); + assertTrue(roundtrip(vector, ListVector.class)); + } + } + + @Test + public void testLargeListVector() { + try (final LargeListVector vector = LargeListVector.empty("v", allocator)) { + setVector(vector, Arrays.stream(new int[] { 1, 2 }).boxed().collect(Collectors.toList()), + Arrays.stream(new int[] { 3, 4 }).boxed().collect(Collectors.toList()), new ArrayList()); + assertTrue(roundtrip(vector, LargeListVector.class)); + } + } + + @Test + public void testFixedSizeListVector() { + try (final FixedSizeListVector vector = FixedSizeListVector.empty("v", 2, allocator)) { + setVector(vector, Arrays.stream(new int[] { 1, 2 }).boxed().collect(Collectors.toList()), + Arrays.stream(new int[] { 3, 4 }).boxed().collect(Collectors.toList())); + assertTrue(roundtrip(vector, FixedSizeListVector.class)); + } + } + + @Test + public void testMapVector() { + int count = 5; + try (final MapVector vector = MapVector.empty("v", allocator, false)) { + vector.allocateNew(); + UnionMapWriter mapWriter = vector.getWriter(); + for (int i = 0; i < count; i++) { + mapWriter.startMap(); + for (int j = 0; j < i + 1; j++) { + mapWriter.startEntry(); + mapWriter.key().bigInt().writeBigInt(j); + mapWriter.value().integer().writeInt(j); + mapWriter.endEntry(); + } + mapWriter.endMap(); + } + mapWriter.setValueCount(count); + + assertTrue(roundtrip(vector, MapVector.class)); + } + } + + @Test + public void testUnionVector() { + + final NullableUInt4Holder uInt4Holder = new NullableUInt4Holder(); + uInt4Holder.value = 100; + uInt4Holder.isSet = 1; + + try (UnionVector vector = UnionVector.empty("v", allocator)) { + vector.allocateNew(); + + // write some data + vector.setType(0, MinorType.UINT4); + vector.setSafe(0, uInt4Holder); + vector.setType(2, MinorType.UINT4); + vector.setSafe(2, uInt4Holder); + vector.setValueCount(4); + + assertTrue(roundtrip(vector, UnionVector.class)); + } + } + + @Test + public void testStructVector() { + try (final StructVector vector = StructVector.empty("v", allocator)) { + Map> data = new HashMap<>(); + data.put("col_1", Arrays.stream(new int[] { 1, 2 }).boxed().collect(Collectors.toList())); + data.put("col_2", Arrays.stream(new int[] { 3, 4 }).boxed().collect(Collectors.toList())); + setVector(vector, data); + assertTrue(roundtrip(vector, StructVector.class)); + } + } + + @Test + public void testVectorSchemaRoot() { + VectorSchemaRoot imported; + + // Consumer allocates empty structures + try (ArrowSchema consumerArrowSchema = ArrowSchema.allocateNew(allocator); + ArrowArray consumerArrowArray = ArrowArray.allocateNew(allocator)) { + try (VectorSchemaRoot vsr = createTestVSR()) { + // Producer creates structures from existing memory pointers + try (ArrowSchema arrowSchema = ArrowSchema.wrap(consumerArrowSchema.memoryAddress()); + ArrowArray arrowArray = ArrowArray.wrap(consumerArrowArray.memoryAddress())) { + // Producer exports vector into the FFI structures + FFI.exportVectorSchemaRoot(allocator, vsr, null, arrowArray, arrowSchema); + } + } + // Consumer imports vector + imported = FFI.importVectorSchemaRoot(allocator, consumerArrowSchema, consumerArrowArray, null); + } + + // Ensure that imported VectorSchemaRoot is valid even after FFI structures + // closed + try (VectorSchemaRoot original = createTestVSR()) { + assertTrue(imported.equals(original)); + } + imported.close(); + } + + @Test + public void testSchema() { + Field decimalField = new Field("inner1", FieldType.nullable(new ArrowType.Decimal(19, 4, 128)), null); + Field strField = new Field("inner2", FieldType.nullable(new ArrowType.Utf8()), null); + Field itemField = new Field("col1", FieldType.nullable(new ArrowType.Struct()), + Arrays.asList(decimalField, strField)); + Field intField = new Field("col2", FieldType.nullable(new ArrowType.Int(32, true)), null); + Schema schema = new Schema(Arrays.asList(itemField, intField)); + // Consumer allocates empty ArrowSchema + try (ArrowSchema consumerArrowSchema = ArrowSchema.allocateNew(allocator)) { + // Producer fills the schema with data + try (ArrowSchema arrowSchema = ArrowSchema.wrap(consumerArrowSchema.memoryAddress())) { + FFI.exportSchema(allocator, schema, null, arrowSchema); + } + // Consumer imports schema + Schema importedSchema = FFI.importSchema(allocator, consumerArrowSchema, null); + assertEquals(schema.toJson(), importedSchema.toJson()); + } + } + + @Test + public void testImportReleasedArray() { + // Consumer allocates empty structures + try (ArrowSchema consumerArrowSchema = ArrowSchema.allocateNew(allocator); + ArrowArray consumerArrowArray = ArrowArray.allocateNew(allocator)) { + // Producer creates structures from existing memory pointers + try (ArrowSchema arrowSchema = ArrowSchema.wrap(consumerArrowSchema.memoryAddress()); + ArrowArray arrowArray = ArrowArray.wrap(consumerArrowArray.memoryAddress())) { + // Producer exports vector into the FFI structures + try (final NullVector vector = new NullVector()) { + FFI.exportVector(allocator, vector, null, arrowArray, arrowSchema); + } + } + + // Release array structure + consumerArrowArray.markReleased(); + + // Consumer tried to imports vector but fails + Exception e = assertThrows(IllegalStateException.class, () -> { + FFI.importVector(allocator, consumerArrowArray, consumerArrowSchema, null); + }); + + assertEquals("Cannot import released ArrowArray", e.getMessage()); + } + } + + private VectorSchemaRoot createTestVSR() { + BitVector bitVector = new BitVector("boolean", allocator); + + Map metadata = new HashMap<>(); + metadata.put("key", "value"); + FieldType fieldType = new FieldType(true, ArrowType.Utf8.INSTANCE, null, metadata); + VarCharVector varCharVector = new VarCharVector("varchar", fieldType, allocator); + + bitVector.allocateNew(); + varCharVector.allocateNew(); + for (int i = 0; i < 10; i++) { + bitVector.setSafe(i, i % 2 == 0 ? 0 : 1); + varCharVector.setSafe(i, ("test" + i).getBytes(StandardCharsets.UTF_8)); + } + bitVector.setValueCount(10); + varCharVector.setValueCount(10); + + List fields = Arrays.asList(bitVector.getField(), varCharVector.getField()); + List vectors = Arrays.asList(bitVector, varCharVector); + + return new VectorSchemaRoot(fields, vectors); + } + +} diff --git a/java/pom.xml b/java/pom.xml index cfea1195533..ddc3822f549 100644 --- a/java/pom.xml +++ b/java/pom.xml @@ -696,6 +696,14 @@ + + + arrow-ffi + + ffi + + + arrow-jni From 88a45768a51bdd23677659b27c90286f14fbb53e Mon Sep 17 00:00:00 2001 From: roee88 Date: Thu, 2 Sep 2021 23:14:07 +0300 Subject: [PATCH 02/21] Added ffi package to dev/release Signed-off-by: roee88 --- dev/release/01-prepare-test.rb | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/dev/release/01-prepare-test.rb b/dev/release/01-prepare-test.rb index 098e7f47d69..ec9730bf88c 100644 --- a/dev/release/01-prepare-test.rb +++ b/dev/release/01-prepare-test.rb @@ -213,6 +213,9 @@ def test_version_pre_tag { hunks: [["- #{@snapshot_version}", "+ #{@release_version}"]], path: "java/dataset/pom.xml" }, + { hunks: [["- #{@snapshot_version}", + "+ #{@release_version}"]], + path: "java/ffi/pom.xml" }, { hunks: [["- #{@snapshot_version}", "+ #{@release_version}"]], path: "java/flight/flight-core/pom.xml" }, @@ -411,6 +414,9 @@ def test_version_post_tag { hunks: [["- #{@snapshot_version}", "+ #{@next_snapshot_version}"]], path: "java/dataset/pom.xml" }, + { hunks: [["- #{@snapshot_version}", + "+ #{@next_snapshot_version}"]], + path: "java/ffi/pom.xml" }, { hunks: [["- #{@snapshot_version}", "+ #{@next_snapshot_version}"]], path: "java/flight/flight-core/pom.xml" }, From b0121b6ffef3cc2c4c867bae5f4f71abb512c415 Mon Sep 17 00:00:00 2001 From: roee88 Date: Sun, 5 Sep 2021 16:38:00 +0300 Subject: [PATCH 03/21] Add support for extension types Signed-off-by: roee88 --- .../java/org/apache/arrow/ffi/Format.java | 5 + .../org/apache/arrow/ffi/SchemaImporter.java | 18 +++ .../org/apache/arrow/ffi/RoundtripTest.java | 132 +++++++++++++++++- 3 files changed, 154 insertions(+), 1 deletion(-) diff --git a/java/ffi/src/main/java/org/apache/arrow/ffi/Format.java b/java/ffi/src/main/java/org/apache/arrow/ffi/Format.java index cdc312c47f5..5f98f73c3fa 100644 --- a/java/ffi/src/main/java/org/apache/arrow/ffi/Format.java +++ b/java/ffi/src/main/java/org/apache/arrow/ffi/Format.java @@ -27,6 +27,7 @@ import org.apache.arrow.vector.types.TimeUnit; import org.apache.arrow.vector.types.UnionMode; import org.apache.arrow.vector.types.pojo.ArrowType; +import org.apache.arrow.vector.types.pojo.ArrowType.ExtensionType; /** * Conversion between {@link ArrowType} and string formats, as per C data @@ -38,6 +39,10 @@ private Format() { } static String asString(ArrowType arrowType) { + if (arrowType instanceof ExtensionType) { + arrowType = ((ExtensionType) arrowType).storageType(); + } + switch (arrowType.getTypeID()) { case Binary: return "z"; diff --git a/java/ffi/src/main/java/org/apache/arrow/ffi/SchemaImporter.java b/java/ffi/src/main/java/org/apache/arrow/ffi/SchemaImporter.java index 89b09379fd2..e7565d607ce 100644 --- a/java/ffi/src/main/java/org/apache/arrow/ffi/SchemaImporter.java +++ b/java/ffi/src/main/java/org/apache/arrow/ffi/SchemaImporter.java @@ -29,7 +29,11 @@ import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.vector.dictionary.Dictionary; import org.apache.arrow.vector.types.pojo.ArrowType; +import org.apache.arrow.vector.types.pojo.ArrowType.ExtensionType; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.arrow.vector.types.pojo.DictionaryEncoding; +import org.apache.arrow.vector.types.pojo.ExtensionTypeRegistry; import org.apache.arrow.vector.types.pojo.Field; import org.apache.arrow.vector.types.pojo.FieldType; @@ -37,6 +41,8 @@ * Importer for {@link ArrowSchema}. */ final class SchemaImporter { + private static final Logger logger = LoggerFactory.getLogger(SchemaImporter.class); + private static final int MAX_IMPORT_RECURSION_LEVEL = 64; private long nextDictionaryID = 1L; @@ -64,6 +70,18 @@ private Field importField(ArrowSchema schema, FFIDictionaryProvider provider, boolean nullable = (snapshot.flags & Flags.ARROW_FLAG_NULLABLE) != 0; Map metadata = Metadata.decode(snapshot.metadata); + if (metadata != null && metadata.containsKey(ExtensionType.EXTENSION_METADATA_KEY_NAME)) { + final String extensionName = metadata.get(ExtensionType.EXTENSION_METADATA_KEY_NAME); + final String extensionMetadata = metadata.getOrDefault(ExtensionType.EXTENSION_METADATA_KEY_METADATA, ""); + ExtensionType extensionType = ExtensionTypeRegistry.lookup(extensionName); + if (extensionType != null) { + arrowType = extensionType.deserialize(arrowType, extensionMetadata); + } else { + // Otherwise, we haven't registered the type + logger.info("Unrecognized extension type: {}", extensionName); + } + } + // Handle dictionary encoded vectors DictionaryEncoding dictionaryEncoding = null; if (snapshot.dictionary != NULL && provider != null) { diff --git a/java/ffi/src/test/java/org/apache/arrow/ffi/RoundtripTest.java b/java/ffi/src/test/java/org/apache/arrow/ffi/RoundtripTest.java index 600a8fb16bc..07fa18e33eb 100644 --- a/java/ffi/src/test/java/org/apache/arrow/ffi/RoundtripTest.java +++ b/java/ffi/src/test/java/org/apache/arrow/ffi/RoundtripTest.java @@ -22,22 +22,28 @@ import static org.junit.jupiter.api.Assertions.assertThrows; import static org.junit.jupiter.api.Assertions.assertTrue; +import java.nio.ByteBuffer; import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.Arrays; +import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.UUID; import java.util.stream.Collectors; import org.apache.arrow.memory.ArrowBuf; +import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.RootAllocator; +import org.apache.arrow.memory.util.hash.ArrowBufHasher; import org.apache.arrow.vector.BigIntVector; import org.apache.arrow.vector.BitVector; import org.apache.arrow.vector.DateDayVector; import org.apache.arrow.vector.DateMilliVector; import org.apache.arrow.vector.DecimalVector; import org.apache.arrow.vector.DurationVector; +import org.apache.arrow.vector.ExtensionTypeVector; import org.apache.arrow.vector.FieldVector; import org.apache.arrow.vector.FixedSizeBinaryVector; import org.apache.arrow.vector.Float4Vector; @@ -85,6 +91,8 @@ import org.apache.arrow.vector.types.TimeUnit; import org.apache.arrow.vector.types.Types.MinorType; import org.apache.arrow.vector.types.pojo.ArrowType; +import org.apache.arrow.vector.types.pojo.ExtensionTypeRegistry; +import org.apache.arrow.vector.types.pojo.ArrowType.ExtensionType; import org.apache.arrow.vector.types.pojo.Field; import org.apache.arrow.vector.types.pojo.FieldType; import org.apache.arrow.vector.types.pojo.Schema; @@ -123,6 +131,23 @@ FieldVector vectorRoundtrip(FieldVector vector) { } } + VectorSchemaRoot vectorSchemaRootRoundtrip(VectorSchemaRoot root) { + // Consumer allocates empty structures + try (ArrowSchema consumerArrowSchema = ArrowSchema.allocateNew(allocator); + ArrowArray consumerArrowArray = ArrowArray.allocateNew(allocator)) { + + // Producer creates structures from existing memory pointers + try (ArrowSchema arrowSchema = ArrowSchema.wrap(consumerArrowSchema.memoryAddress()); + ArrowArray arrowArray = ArrowArray.wrap(consumerArrowArray.memoryAddress())) { + // Producer exports vector into the FFI structures + FFI.exportVectorSchemaRoot(allocator, root, null, arrowArray, arrowSchema); + } + + // Consumer imports vector + return FFI.importVectorSchemaRoot(allocator, consumerArrowSchema, consumerArrowArray, null); + } + } + boolean roundtrip(FieldVector vector, Class clazz) { try (ValueVector imported = vectorRoundtrip(vector)) { assertTrue(clazz.isInstance(imported), String.format("expected %s but was %s", clazz, imported.getClass())); @@ -536,7 +561,6 @@ public void testMapVector() { @Test public void testUnionVector() { - final NullableUInt4Holder uInt4Holder = new NullableUInt4Holder(); uInt4Holder.value = 100; uInt4Holder.isSet = 1; @@ -566,6 +590,47 @@ public void testStructVector() { } } + @Test + public void testExtensionTypeVector() { + ExtensionTypeRegistry.register(new UuidType()); + final Schema schema = new Schema(Collections.singletonList(Field.nullable("a", new UuidType()))); + try (final VectorSchemaRoot root = VectorSchemaRoot.create(schema, allocator)) { + // Fill with data + UUID u1 = UUID.randomUUID(); + UUID u2 = UUID.randomUUID(); + UuidVector vector = (UuidVector) root.getVector("a"); + vector.setValueCount(3); + vector.set(0, u1); + vector.set(1, u2); + vector.set(2, null); + root.setRowCount(3); + + // Roundtrip (export + import) + VectorSchemaRoot importedRoot = vectorSchemaRootRoundtrip(root); + + // Verify correctness + assertEquals(root.getSchema(), importedRoot.getSchema()); + + final Field field = importedRoot.getSchema().getFields().get(0); + final UuidType expectedType = new UuidType(); + assertEquals(field.getMetadata().get(ExtensionType.EXTENSION_METADATA_KEY_NAME), + expectedType.extensionName()); + assertEquals(field.getMetadata().get(ExtensionType.EXTENSION_METADATA_KEY_METADATA), + expectedType.serialize()); + + final UuidVector deserialized = (UuidVector) importedRoot.getFieldVectors().get(0); + assertEquals(vector.getValueCount(), deserialized.getValueCount()); + for (int i = 0; i < vector.getValueCount(); i++) { + assertEquals(vector.isNull(i), deserialized.isNull(i)); + if (!vector.isNull(i)) { + assertEquals(vector.getObject(i), deserialized.getObject(i)); + } + } + + importedRoot.close(); + } + } + @Test public void testVectorSchemaRoot() { VectorSchemaRoot imported; @@ -662,4 +727,69 @@ private VectorSchemaRoot createTestVSR() { return new VectorSchemaRoot(fields, vectors); } + static class UuidType extends ExtensionType { + + @Override + public ArrowType storageType() { + return new ArrowType.FixedSizeBinary(16); + } + + @Override + public String extensionName() { + return "uuid"; + } + + @Override + public boolean extensionEquals(ExtensionType other) { + return other instanceof UuidType; + } + + @Override + public ArrowType deserialize(ArrowType storageType, String serializedData) { + if (!storageType.equals(storageType())) { + throw new UnsupportedOperationException("Cannot construct UuidType from underlying type " + storageType); + } + return new UuidType(); + } + + @Override + public String serialize() { + return ""; + } + + @Override + public FieldVector getNewVector(String name, FieldType fieldType, BufferAllocator allocator) { + return new UuidVector(name, allocator, new FixedSizeBinaryVector(name, allocator, 16)); + } + } + + static class UuidVector extends ExtensionTypeVector { + + public UuidVector(String name, BufferAllocator allocator, FixedSizeBinaryVector underlyingVector) { + super(name, allocator, underlyingVector); + } + + @Override + public UUID getObject(int index) { + final ByteBuffer bb = ByteBuffer.wrap(getUnderlyingVector().getObject(index)); + return new UUID(bb.getLong(), bb.getLong()); + } + + @Override + public int hashCode(int index) { + return hashCode(index, null); + } + + @Override + public int hashCode(int index, ArrowBufHasher hasher) { + return getUnderlyingVector().hashCode(index, hasher); + } + + public void set(int index, UUID uuid) { + ByteBuffer bb = ByteBuffer.allocate(16); + bb.putLong(uuid.getMostSignificantBits()); + bb.putLong(uuid.getLeastSignificantBits()); + getUnderlyingVector().set(index, bb.array()); + } + } } From bc84be3fc3400b9ecc7c97092a86c54bfe1b4009 Mon Sep 17 00:00:00 2001 From: roee88 Date: Mon, 6 Sep 2021 15:18:06 +0300 Subject: [PATCH 04/21] style fixes Signed-off-by: roee88 --- java/ffi/README.md | 2 +- java/ffi/pom.xml | 2 +- java/ffi/src/main/cpp/jni_wrapper.cc | 13 ++++++------- .../java/org/apache/arrow/ffi/SchemaImporter.java | 4 ++-- .../java/org/apache/arrow/ffi/RoundtripTest.java | 7 +++---- 5 files changed, 13 insertions(+), 15 deletions(-) diff --git a/java/ffi/README.md b/java/ffi/README.md index c0dfee544b5..c2d30247077 100644 --- a/java/ffi/README.md +++ b/java/ffi/README.md @@ -24,7 +24,7 @@ install: - Java 8 or later - Maven 3.3 or later - - A C++ compiler + - A C++11-enabled compiler - CMake 3.11 or later - Make or ninja build utilities diff --git a/java/ffi/pom.xml b/java/ffi/pom.xml index fef334445ef..a9413eab665 100644 --- a/java/ffi/pom.xml +++ b/java/ffi/pom.xml @@ -50,7 +50,7 @@ org.apache.arrow - arrow-memory-netty + arrow-memory-unsafe ${project.version} test diff --git a/java/ffi/src/main/cpp/jni_wrapper.cc b/java/ffi/src/main/cpp/jni_wrapper.cc index c38e70b832f..872c79bce0f 100644 --- a/java/ffi/src/main/cpp/jni_wrapper.cc +++ b/java/ffi/src/main/cpp/jni_wrapper.cc @@ -16,13 +16,12 @@ // under the License. #include -#include + #include -#include "abi.h" -#include +#include #include -#include +#include "abi.h" #include "org_apache_arrow_ffi_jni_JniWrapper.h" namespace @@ -48,7 +47,7 @@ namespace class JniPendingException : public std::runtime_error { public: - explicit JniPendingException(const std::string &arg) : runtime_error(arg) {} + explicit JniPendingException(const std::string &arg) : std::runtime_error(arg) {} }; void ThrowPendingException(const std::string &message) @@ -63,7 +62,7 @@ namespace jmethodID ret = env->GetMethodID(this_class, name, sig); if (ret == nullptr) { std::string error_message = "Unable to find method " + std::string(name) + - " within signature" + std::string(sig); + " within signature " + std::string(sig); ThrowPendingException(error_message); } return ret; @@ -236,6 +235,6 @@ JNIEXPORT void JNICALL Java_org_apache_arrow_ffi_jni_JniWrapper_exportArray( jobject private_data_ref = env->NewGlobalRef(private_data); array->private_data = new InnerPrivateData(vm, private_data_ref); - array->release = &release_exported; + array->release = &release_exported; JNI_METHOD_END() } diff --git a/java/ffi/src/main/java/org/apache/arrow/ffi/SchemaImporter.java b/java/ffi/src/main/java/org/apache/arrow/ffi/SchemaImporter.java index e7565d607ce..8bb06104e74 100644 --- a/java/ffi/src/main/java/org/apache/arrow/ffi/SchemaImporter.java +++ b/java/ffi/src/main/java/org/apache/arrow/ffi/SchemaImporter.java @@ -30,12 +30,12 @@ import org.apache.arrow.vector.dictionary.Dictionary; import org.apache.arrow.vector.types.pojo.ArrowType; import org.apache.arrow.vector.types.pojo.ArrowType.ExtensionType; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; import org.apache.arrow.vector.types.pojo.DictionaryEncoding; import org.apache.arrow.vector.types.pojo.ExtensionTypeRegistry; import org.apache.arrow.vector.types.pojo.Field; import org.apache.arrow.vector.types.pojo.FieldType; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /** * Importer for {@link ArrowSchema}. diff --git a/java/ffi/src/test/java/org/apache/arrow/ffi/RoundtripTest.java b/java/ffi/src/test/java/org/apache/arrow/ffi/RoundtripTest.java index 07fa18e33eb..03a52e047f0 100644 --- a/java/ffi/src/test/java/org/apache/arrow/ffi/RoundtripTest.java +++ b/java/ffi/src/test/java/org/apache/arrow/ffi/RoundtripTest.java @@ -91,8 +91,8 @@ import org.apache.arrow.vector.types.TimeUnit; import org.apache.arrow.vector.types.Types.MinorType; import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.types.pojo.ExtensionTypeRegistry; import org.apache.arrow.vector.types.pojo.ArrowType.ExtensionType; +import org.apache.arrow.vector.types.pojo.ExtensionTypeRegistry; import org.apache.arrow.vector.types.pojo.Field; import org.apache.arrow.vector.types.pojo.FieldType; import org.apache.arrow.vector.types.pojo.Schema; @@ -599,11 +599,10 @@ public void testExtensionTypeVector() { UUID u1 = UUID.randomUUID(); UUID u2 = UUID.randomUUID(); UuidVector vector = (UuidVector) root.getVector("a"); - vector.setValueCount(3); + vector.setValueCount(2); vector.set(0, u1); vector.set(1, u2); - vector.set(2, null); - root.setRowCount(3); + root.setRowCount(2); // Roundtrip (export + import) VectorSchemaRoot importedRoot = vectorSchemaRootRoundtrip(root); From 3f9b2b4ae684b3c70a624d3b3c1a9383d80eb844 Mon Sep 17 00:00:00 2001 From: roee88 Date: Thu, 9 Sep 2021 10:51:24 +0300 Subject: [PATCH 05/21] address review comments Signed-off-by: roee88 --- java/ffi/.gitignore | 1 + java/ffi/README.md | 8 ++-- java/ffi/pom.xml | 2 +- java/ffi/src/main/cpp/jni_wrapper.cc | 43 +++++++++++++++++-- .../org/apache/arrow/ffi/ArrayImporter.java | 2 +- .../apache/arrow/ffi/FFIReferenceManager.java | 21 +++++---- .../java/org/apache/arrow/ffi/Format.java | 3 +- .../org/apache/arrow/ffi/SchemaImporter.java | 2 +- 8 files changed, 62 insertions(+), 20 deletions(-) create mode 100644 java/ffi/.gitignore diff --git a/java/ffi/.gitignore b/java/ffi/.gitignore new file mode 100644 index 00000000000..e390b124a6b --- /dev/null +++ b/java/ffi/.gitignore @@ -0,0 +1 @@ +build/** \ No newline at end of file diff --git a/java/ffi/README.md b/java/ffi/README.md index c2d30247077..b3c6b93ffed 100644 --- a/java/ffi/README.md +++ b/java/ffi/README.md @@ -31,9 +31,9 @@ install: ## Building JNI wrapper shared library ``` -mkdir -p ./target/build/ -pushd ./target/build/ -cmake ../.. +mkdir -p build +pushd build +cmake .. make popd ``` @@ -42,6 +42,8 @@ To use ninja, pass `-GNinja` when calling cmake and then use the `ninja` command ## Building and running tests +To build Apache Arrow (Java) with this module enabled run the following from the project root directory: + ``` cd java mvn -Parrow-ffi install diff --git a/java/ffi/pom.xml b/java/ffi/pom.xml index a9413eab665..1b021559ad3 100644 --- a/java/ffi/pom.xml +++ b/java/ffi/pom.xml @@ -24,7 +24,7 @@ Java implementation of C Data Interface jar - ./target/build + ./build diff --git a/java/ffi/src/main/cpp/jni_wrapper.cc b/java/ffi/src/main/cpp/jni_wrapper.cc index 872c79bce0f..f5559ca6708 100644 --- a/java/ffi/src/main/cpp/jni_wrapper.cc +++ b/java/ffi/src/main/cpp/jni_wrapper.cc @@ -19,6 +19,7 @@ #include #include +#include #include #include "abi.h" @@ -78,6 +79,40 @@ namespace jobject j_private_data_; }; + class JNIEnvGuard + { + public: + explicit JNIEnvGuard(JavaVM* vm) : vm_(vm), should_deattach_(false) { + JNIEnv* env; + jint code = vm->GetEnv(reinterpret_cast(&env), JNI_VERSION); + if (code == JNI_EDETACHED) { + JavaVMAttachArgs args; + args.version = JNI_VERSION; + args.name = NULL; + args.group = NULL; + code = vm->AttachCurrentThread(reinterpret_cast(&env), &args); + should_deattach_ = (code == JNI_OK); + } + if (code != JNI_OK) { + ThrowPendingException("Failed to attach the current thread to a Java VM"); + } + env_ = env; + } + + JNIEnv* env() { return env_; } + + ~JNIEnvGuard() { + if (should_deattach_) { + vm_->DetachCurrentThread(); + should_deattach_ = false; + } + } + private: + bool should_deattach_; + JavaVM* vm_; + JNIEnv* env_; + }; + template void release_exported(T* base) { // This should not be called on already released structure @@ -101,10 +136,10 @@ namespace // Release all data directly owned by the struct InnerPrivateData* private_data = reinterpret_cast(base->private_data); - JNIEnv* env; - if (private_data->vm_->GetEnv(reinterpret_cast(&env), JNI_VERSION) != JNI_OK) { - ThrowPendingException("JNIEnv was not attached to current thread"); - } + + JNIEnvGuard guard(private_data->vm_); + JNIEnv* env = guard.env(); + env->CallObjectMethod(private_data->j_private_data_, private_data_close_method); if (env->ExceptionCheck()) { env->ExceptionDescribe(); diff --git a/java/ffi/src/main/java/org/apache/arrow/ffi/ArrayImporter.java b/java/ffi/src/main/java/org/apache/arrow/ffi/ArrayImporter.java index f93b8b532f6..6871a5e7c0b 100644 --- a/java/ffi/src/main/java/org/apache/arrow/ffi/ArrayImporter.java +++ b/java/ffi/src/main/java/org/apache/arrow/ffi/ArrayImporter.java @@ -81,7 +81,7 @@ private void importChild(ArrayImporter parent, ArrowArray src) { ArrowArray.Snapshot snapshot = src.snapshot(); checkState(snapshot.release != NULL, "Cannot import released ArrowArray"); recursionLevel = parent.recursionLevel + 1; - checkState(recursionLevel < MAX_IMPORT_RECURSION_LEVEL, "Recursion level in ArrowArray struct exceeded"); + checkState(recursionLevel <= MAX_IMPORT_RECURSION_LEVEL, "Recursion level in ArrowArray struct exceeded"); // Child buffers will keep the entire parent import alive. // Perhaps we can move the child structs on import, // but that is another level of complication. diff --git a/java/ffi/src/main/java/org/apache/arrow/ffi/FFIReferenceManager.java b/java/ffi/src/main/java/org/apache/arrow/ffi/FFIReferenceManager.java index c52b73b378c..858e31eb721 100644 --- a/java/ffi/src/main/java/org/apache/arrow/ffi/FFIReferenceManager.java +++ b/java/ffi/src/main/java/org/apache/arrow/ffi/FFIReferenceManager.java @@ -54,18 +54,17 @@ public boolean release() { public boolean release(int decrement) { Preconditions.checkState(decrement >= 1, "ref count decrement should be greater than or equal to 1"); // decrement the ref count - final int refCnt; - synchronized (this) { - refCnt = bufRefCnt.addAndGet(-decrement); - if (refCnt == 0) { - // refcount of this reference manager has dropped to 0 - // release the underlying memory + final int refCnt = bufRefCnt.addAndGet(-decrement); + // the new ref count should be >= 0 + Preconditions.checkState(refCnt >= 0, "RefCnt has gone negative"); + if (refCnt == 0) { + // refcount of this reference manager has dropped to 0 + // release the underlying memory + synchronized (this) { struct.release(); struct.close(); } } - // the new ref count should be >= 0 - Preconditions.checkState(refCnt >= 0, "RefCnt has gone negative"); return refCnt == 0; } @@ -83,7 +82,11 @@ public void retain(int increment) { @Override public ArrowBuf retain(ArrowBuf srcBuffer, BufferAllocator targetAllocator) { retain(); - return srcBuffer; + + ArrowBuf targetArrowBuf = this.deriveBuffer(srcBuffer, 0, srcBuffer.capacity()); + targetArrowBuf.readerIndex(srcBuffer.readerIndex()); + targetArrowBuf.writerIndex(srcBuffer.writerIndex()); + return targetArrowBuf; } @Override diff --git a/java/ffi/src/main/java/org/apache/arrow/ffi/Format.java b/java/ffi/src/main/java/org/apache/arrow/ffi/Format.java index 5f98f73c3fa..087608b5f54 100644 --- a/java/ffi/src/main/java/org/apache/arrow/ffi/Format.java +++ b/java/ffi/src/main/java/org/apache/arrow/ffi/Format.java @@ -40,7 +40,8 @@ private Format() { static String asString(ArrowType arrowType) { if (arrowType instanceof ExtensionType) { - arrowType = ((ExtensionType) arrowType).storageType(); + ArrowType innerType = ((ExtensionType) arrowType).storageType(); + return asString(innerType); } switch (arrowType.getTypeID()) { diff --git a/java/ffi/src/main/java/org/apache/arrow/ffi/SchemaImporter.java b/java/ffi/src/main/java/org/apache/arrow/ffi/SchemaImporter.java index 8bb06104e74..49ac82ed00c 100644 --- a/java/ffi/src/main/java/org/apache/arrow/ffi/SchemaImporter.java +++ b/java/ffi/src/main/java/org/apache/arrow/ffi/SchemaImporter.java @@ -58,7 +58,7 @@ Field importField(ArrowSchema schema, FFIDictionaryProvider provider) { private Field importField(ArrowSchema schema, FFIDictionaryProvider provider, int recursionLevel) { - checkState(recursionLevel < MAX_IMPORT_RECURSION_LEVEL, "Recursion level in ArrowSchema struct exceeded"); + checkState(recursionLevel <= MAX_IMPORT_RECURSION_LEVEL, "Recursion level in ArrowSchema struct exceeded"); ArrowSchema.Snapshot snapshot = schema.snapshot(); checkState(snapshot.release != NULL, "Cannot import released ArrowSchema"); From d98d5ae69af1606b615fad157b09e3f453048c57 Mon Sep 17 00:00:00 2001 From: roee88 Date: Thu, 9 Sep 2021 11:57:45 +0300 Subject: [PATCH 06/21] code style fixes Signed-off-by: roee88 --- java/ffi/README.md | 8 +- java/ffi/src/main/cpp/abi.h | 2 +- java/ffi/src/main/cpp/jni_wrapper.cc | 264 +++++++++++++-------------- 3 files changed, 134 insertions(+), 140 deletions(-) diff --git a/java/ffi/README.md b/java/ffi/README.md index b3c6b93ffed..3302e128ee7 100644 --- a/java/ffi/README.md +++ b/java/ffi/README.md @@ -42,7 +42,13 @@ To use ninja, pass `-GNinja` when calling cmake and then use the `ninja` command ## Building and running tests -To build Apache Arrow (Java) with this module enabled run the following from the project root directory: +Run tests with + +``` +mvn test +``` + +To install Apache Arrow (Java) with this module enabled run the following from the project root directory: ``` cd java diff --git a/java/ffi/src/main/cpp/abi.h b/java/ffi/src/main/cpp/abi.h index ea7179c725a..a78170dbdbc 100644 --- a/java/ffi/src/main/cpp/abi.h +++ b/java/ffi/src/main/cpp/abi.h @@ -100,4 +100,4 @@ struct ArrowArrayStream { #ifdef __cplusplus } -#endif \ No newline at end of file +#endif diff --git a/java/ffi/src/main/cpp/jni_wrapper.cc b/java/ffi/src/main/cpp/jni_wrapper.cc index f5559ca6708..c4a29b0e93e 100644 --- a/java/ffi/src/main/cpp/jni_wrapper.cc +++ b/java/ffi/src/main/cpp/jni_wrapper.cc @@ -22,158 +22,148 @@ #include #include -#include "abi.h" +#include "./abi.h" #include "org_apache_arrow_ffi_jni_JniWrapper.h" -namespace -{ +namespace { - jclass CreateGlobalClassReference(JNIEnv *env, const char *class_name) - { - jclass local_class = env->FindClass(class_name); - jclass global_class = (jclass)env->NewGlobalRef(local_class); - env->DeleteLocalRef(local_class); - return global_class; - } +jclass CreateGlobalClassReference(JNIEnv* env, const char* class_name) { + jclass local_class = env->FindClass(class_name); + jclass global_class = (jclass)env->NewGlobalRef(local_class); + env->DeleteLocalRef(local_class); + return global_class; +} - jclass illegal_access_exception_class; - jclass illegal_argument_exception_class; - jclass runtime_exception_class; - jclass private_data_class; +jclass illegal_access_exception_class; +jclass illegal_argument_exception_class; +jclass runtime_exception_class; +jclass private_data_class; - jmethodID private_data_close_method; +jmethodID private_data_close_method; - jint JNI_VERSION = JNI_VERSION_1_6; +jint JNI_VERSION = JNI_VERSION_1_6; - class JniPendingException : public std::runtime_error - { - public: - explicit JniPendingException(const std::string &arg) : std::runtime_error(arg) {} - }; +class JniPendingException : public std::runtime_error { + public: + explicit JniPendingException(const std::string& arg) : std::runtime_error(arg) {} +}; - void ThrowPendingException(const std::string &message) - { - throw JniPendingException(message); - } +void ThrowPendingException(const std::string& message) { + throw JniPendingException(message); +} - void JniThrow(std::string message) { ThrowPendingException(message); } +void JniThrow(std::string message) { ThrowPendingException(message); } - jmethodID GetMethodID(JNIEnv* env, jclass this_class, const char* name, - const char* sig) { - jmethodID ret = env->GetMethodID(this_class, name, sig); - if (ret == nullptr) { - std::string error_message = "Unable to find method " + std::string(name) + - " within signature " + std::string(sig); - ThrowPendingException(error_message); +jmethodID GetMethodID(JNIEnv* env, jclass this_class, const char* name, const char* sig) { + jmethodID ret = env->GetMethodID(this_class, name, sig); + if (ret == nullptr) { + std::string error_message = "Unable to find method " + std::string(name) + + " within signature " + std::string(sig); + ThrowPendingException(error_message); + } + return ret; +} + +class InnerPrivateData { + public: + InnerPrivateData(JavaVM* vm, jobject private_data) + : vm_(vm), j_private_data_(private_data) {} + + JavaVM* vm_; + jobject j_private_data_; +}; + +class JNIEnvGuard { + public: + explicit JNIEnvGuard(JavaVM* vm) : vm_(vm), should_deattach_(false) { + JNIEnv* env; + jint code = vm->GetEnv(reinterpret_cast(&env), JNI_VERSION); + if (code == JNI_EDETACHED) { + JavaVMAttachArgs args; + args.version = JNI_VERSION; + args.name = NULL; + args.group = NULL; + code = vm->AttachCurrentThread(reinterpret_cast(&env), &args); + should_deattach_ = (code == JNI_OK); } - return ret; + if (code != JNI_OK) { + ThrowPendingException("Failed to attach the current thread to a Java VM"); + } + env_ = env; } - class InnerPrivateData - { - public: - InnerPrivateData(JavaVM* vm, jobject private_data) - : vm_(vm), j_private_data_(private_data) {} - - JavaVM* vm_; - jobject j_private_data_; - }; - - class JNIEnvGuard - { - public: - explicit JNIEnvGuard(JavaVM* vm) : vm_(vm), should_deattach_(false) { - JNIEnv* env; - jint code = vm->GetEnv(reinterpret_cast(&env), JNI_VERSION); - if (code == JNI_EDETACHED) { - JavaVMAttachArgs args; - args.version = JNI_VERSION; - args.name = NULL; - args.group = NULL; - code = vm->AttachCurrentThread(reinterpret_cast(&env), &args); - should_deattach_ = (code == JNI_OK); - } - if (code != JNI_OK) { - ThrowPendingException("Failed to attach the current thread to a Java VM"); - } - env_ = env; - } - - JNIEnv* env() { return env_; } - - ~JNIEnvGuard() { - if (should_deattach_) { - vm_->DetachCurrentThread(); - should_deattach_ = false; - } - } - private: - bool should_deattach_; - JavaVM* vm_; - JNIEnv* env_; - }; - - template - void release_exported(T* base) { - // This should not be called on already released structure - assert(base->release != nullptr); - - // Release children - for (int64_t i = 0; i < base->n_children; ++i) { - T* child = base->children[i]; - if (child->release != nullptr) { - child->release(child); - assert(child->release == nullptr); - } + JNIEnv* env() { return env_; } + + ~JNIEnvGuard() { + if (should_deattach_) { + vm_->DetachCurrentThread(); + should_deattach_ = false; } + } - // Release dictionary - T* dict = base->dictionary; - if (dict != nullptr && dict->release != nullptr) { - dict->release(dict); - assert(dict->release == nullptr); + private: + bool should_deattach_; + JavaVM* vm_; + JNIEnv* env_; +}; + +template +void release_exported(T* base) { + // This should not be called on already released structure + assert(base->release != nullptr); + + // Release children + for (int64_t i = 0; i < base->n_children; ++i) { + T* child = base->children[i]; + if (child->release != nullptr) { + child->release(child); + assert(child->release == nullptr); } + } - // Release all data directly owned by the struct - InnerPrivateData* private_data = reinterpret_cast(base->private_data); + // Release dictionary + T* dict = base->dictionary; + if (dict != nullptr && dict->release != nullptr) { + dict->release(dict); + assert(dict->release == nullptr); + } - JNIEnvGuard guard(private_data->vm_); - JNIEnv* env = guard.env(); + // Release all data directly owned by the struct + InnerPrivateData* private_data = + reinterpret_cast(base->private_data); - env->CallObjectMethod(private_data->j_private_data_, private_data_close_method); - if (env->ExceptionCheck()) { - env->ExceptionDescribe(); - env->ExceptionClear(); - ThrowPendingException("Error calling close of private data"); - } - env->DeleteGlobalRef(private_data->j_private_data_); - delete private_data; - base->private_data = nullptr; - - // Mark released - base->release = nullptr; + JNIEnvGuard guard(private_data->vm_); + JNIEnv* env = guard.env(); + + env->CallObjectMethod(private_data->j_private_data_, private_data_close_method); + if (env->ExceptionCheck()) { + env->ExceptionDescribe(); + env->ExceptionClear(); + ThrowPendingException("Error calling close of private data"); } -} // namespace + env->DeleteGlobalRef(private_data->j_private_data_); + delete private_data; + base->private_data = nullptr; -#define JNI_METHOD_START \ - try \ - { + // Mark released + base->release = nullptr; +} +} // namespace + +#define JNI_METHOD_START try { // macro ended #define JNI_METHOD_END(fallback_expr) \ } \ - catch (JniPendingException & e) \ - { \ + catch (JniPendingException & e) { \ env->ThrowNew(runtime_exception_class, e.what()); \ return fallback_expr; \ } // macro ended -jint JNI_OnLoad(JavaVM *vm, void *reserved) -{ - JNIEnv *env; - if (vm->GetEnv(reinterpret_cast(&env), JNI_VERSION) != JNI_OK) - { +jint JNI_OnLoad(JavaVM* vm, void* reserved) { + JNIEnv* env; + if (vm->GetEnv(reinterpret_cast(&env), JNI_VERSION) != JNI_OK) { return JNI_ERR; } JNI_METHOD_START @@ -183,20 +173,18 @@ jint JNI_OnLoad(JavaVM *vm, void *reserved) CreateGlobalClassReference(env, "Ljava/lang/IllegalArgumentException;"); runtime_exception_class = CreateGlobalClassReference(env, "Ljava/lang/RuntimeException;"); - private_data_class = + private_data_class = CreateGlobalClassReference(env, "Lorg/apache/arrow/ffi/jni/PrivateData;"); - - private_data_close_method = - GetMethodID(env, private_data_class, "close", "()V"); + + private_data_close_method = GetMethodID(env, private_data_class, "close", "()V"); return JNI_VERSION; JNI_METHOD_END(JNI_ERR) } -void JNI_OnUnload(JavaVM *vm, void *reserved) -{ - JNIEnv *env; - vm->GetEnv(reinterpret_cast(&env), JNI_VERSION); +void JNI_OnUnload(JavaVM* vm, void* reserved) { + JNIEnv* env; + vm->GetEnv(reinterpret_cast(&env), JNI_VERSION); env->DeleteGlobalRef(illegal_access_exception_class); env->DeleteGlobalRef(illegal_argument_exception_class); env->DeleteGlobalRef(runtime_exception_class); @@ -208,12 +196,12 @@ void JNI_OnUnload(JavaVM *vm, void *reserved) * Signature: (J)V */ JNIEXPORT void JNICALL Java_org_apache_arrow_ffi_jni_JniWrapper_releaseSchema( - JNIEnv *env, jobject, jlong address) { + JNIEnv* env, jobject, jlong address) { JNI_METHOD_START ArrowSchema* schema = reinterpret_cast(address); - if(schema->release != nullptr) { + if (schema->release != nullptr) { schema->release(schema); - } + } JNI_METHOD_END() } @@ -223,10 +211,10 @@ JNIEXPORT void JNICALL Java_org_apache_arrow_ffi_jni_JniWrapper_releaseSchema( * Signature: (J)V */ JNIEXPORT void JNICALL Java_org_apache_arrow_ffi_jni_JniWrapper_releaseArray( - JNIEnv *env, jobject, jlong address) { + JNIEnv* env, jobject, jlong address) { JNI_METHOD_START ArrowArray* array = reinterpret_cast(address); - if(array->release != nullptr) { + if (array->release != nullptr) { array->release(array); } JNI_METHOD_END() @@ -238,7 +226,7 @@ JNIEXPORT void JNICALL Java_org_apache_arrow_ffi_jni_JniWrapper_releaseArray( * Signature: (JLorg/apache/arrow/ffi/jni/PrivateData;)V */ JNIEXPORT void JNICALL Java_org_apache_arrow_ffi_jni_JniWrapper_exportSchema( - JNIEnv *env, jobject, jlong address, jobject private_data) { + JNIEnv* env, jobject, jlong address, jobject private_data) { JNI_METHOD_START ArrowSchema* schema = reinterpret_cast(address); @@ -249,7 +237,7 @@ JNIEXPORT void JNICALL Java_org_apache_arrow_ffi_jni_JniWrapper_exportSchema( jobject private_data_ref = env->NewGlobalRef(private_data); schema->private_data = new InnerPrivateData(vm, private_data_ref); - schema->release = &release_exported; + schema->release = &release_exported; JNI_METHOD_END() } @@ -259,7 +247,7 @@ JNIEXPORT void JNICALL Java_org_apache_arrow_ffi_jni_JniWrapper_exportSchema( * Signature: (JLorg/apache/arrow/ffi/jni/PrivateData;)V */ JNIEXPORT void JNICALL Java_org_apache_arrow_ffi_jni_JniWrapper_exportArray( - JNIEnv *env, jobject, jlong address, jobject private_data) { + JNIEnv* env, jobject, jlong address, jobject private_data) { JNI_METHOD_START ArrowArray* array = reinterpret_cast(address); From a5b7fcfdaae33b3d82ca0ab55e2aa31fb23b3f0d Mon Sep 17 00:00:00 2001 From: roee88 Date: Thu, 9 Sep 2021 12:18:32 +0300 Subject: [PATCH 07/21] Added missing dependency for slf4j-api Signed-off-by: roee88 --- java/ffi/pom.xml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/java/ffi/pom.xml b/java/ffi/pom.xml index 1b021559ad3..b9c98a88f57 100644 --- a/java/ffi/pom.xml +++ b/java/ffi/pom.xml @@ -48,6 +48,10 @@ ${project.version} compile + + org.slf4j + slf4j-api + org.apache.arrow arrow-memory-unsafe From e7c74a9edc599c7d89aaa2f71cd7d70d95eab86b Mon Sep 17 00:00:00 2001 From: roee88 Date: Fri, 10 Sep 2021 09:16:42 +0300 Subject: [PATCH 08/21] Added NativeUtil tests Changed toJavaArray to return empty array instead of null when needed. Signed-off-by: roee88 --- .../org/apache/arrow/ffi/ArrayImporter.java | 4 +- .../java/org/apache/arrow/ffi/NativeUtil.java | 2 +- .../org/apache/arrow/ffi/SchemaImporter.java | 2 +- .../org/apache/arrow/ffi/NativeUtilTest.java | 81 +++++++++++++++++++ 4 files changed, 85 insertions(+), 4 deletions(-) create mode 100644 java/ffi/src/test/java/org/apache/arrow/ffi/NativeUtilTest.java diff --git a/java/ffi/src/main/java/org/apache/arrow/ffi/ArrayImporter.java b/java/ffi/src/main/java/org/apache/arrow/ffi/ArrayImporter.java index 6871a5e7c0b..fb6f97d9111 100644 --- a/java/ffi/src/main/java/org/apache/arrow/ffi/ArrayImporter.java +++ b/java/ffi/src/main/java/org/apache/arrow/ffi/ArrayImporter.java @@ -92,7 +92,7 @@ private void importChild(ArrayImporter parent, ArrowArray src) { private void doImport(ArrowArray.Snapshot snapshot) { // First import children (required for reconstituting parent array data) long[] children = NativeUtil.toJavaArray(snapshot.children, checkedCastToInt(snapshot.n_children)); - if (children != null) { + if (children != null && children.length > 0) { List childVectors = vector.getChildrenFromFields(); checkState(children.length == childVectors.size(), "ArrowArray struct has %s children (expected %s)", children.length, childVectors.size()); @@ -131,7 +131,7 @@ private void doImport(ArrowArray.Snapshot snapshot) { private List importBuffers(ArrowArray.Snapshot snapshot) { long[] buffers = NativeUtil.toJavaArray(snapshot.buffers, checkedCastToInt(snapshot.n_buffers)); - if (buffers == null) { + if (buffers == null || buffers.length == 0) { return new ArrayList<>(); } diff --git a/java/ffi/src/main/java/org/apache/arrow/ffi/NativeUtil.java b/java/ffi/src/main/java/org/apache/arrow/ffi/NativeUtil.java index 51e097dafb5..daa66829f67 100644 --- a/java/ffi/src/main/java/org/apache/arrow/ffi/NativeUtil.java +++ b/java/ffi/src/main/java/org/apache/arrow/ffi/NativeUtil.java @@ -64,7 +64,7 @@ public static String toJavaString(long cstringPtr) { * @return Array of pointer values as longs */ public static long[] toJavaArray(long arrayPtr, int size) { - if (size == 0 || arrayPtr == NULL) { + if (arrayPtr == NULL) { return null; } if (size < 0) { diff --git a/java/ffi/src/main/java/org/apache/arrow/ffi/SchemaImporter.java b/java/ffi/src/main/java/org/apache/arrow/ffi/SchemaImporter.java index 49ac82ed00c..dc1326aab24 100644 --- a/java/ffi/src/main/java/org/apache/arrow/ffi/SchemaImporter.java +++ b/java/ffi/src/main/java/org/apache/arrow/ffi/SchemaImporter.java @@ -98,7 +98,7 @@ private Field importField(ArrowSchema schema, FFIDictionaryProvider provider, List children = null; long[] childrenIds = NativeUtil.toJavaArray(snapshot.children, checkedCastToInt(snapshot.n_children)); - if (childrenIds != null) { + if (childrenIds != null && childrenIds.length > 0) { children = new ArrayList<>(childrenIds.length); for (long childAddress : childrenIds) { ArrowSchema childSchema = ArrowSchema.wrap(childAddress); diff --git a/java/ffi/src/test/java/org/apache/arrow/ffi/NativeUtilTest.java b/java/ffi/src/test/java/org/apache/arrow/ffi/NativeUtilTest.java new file mode 100644 index 00000000000..931e4fce269 --- /dev/null +++ b/java/ffi/src/test/java/org/apache/arrow/ffi/NativeUtilTest.java @@ -0,0 +1,81 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.ffi; + +import static org.junit.jupiter.api.Assertions.assertArrayEquals; +import static org.junit.jupiter.api.Assertions.assertEquals; + +import java.nio.ByteBuffer; +import java.nio.ByteOrder; + +import org.apache.arrow.memory.ArrowBuf; +import org.apache.arrow.memory.RootAllocator; +import org.apache.arrow.memory.util.LargeMemoryUtil; +import org.apache.arrow.memory.util.MemoryUtil; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +public class NativeUtilTest { + + private RootAllocator allocator = null; + + @BeforeEach + public void setUp() { + allocator = new RootAllocator(Long.MAX_VALUE); + } + + @AfterEach + public void tearDown() { + allocator.close(); + } + + @Test + public void testString() { + String javaString = "abc"; + byte[] nativeString = new byte[] { 97, 98, 99, 0 }; + try (ArrowBuf buffer = NativeUtil.toNativeString(allocator, javaString)) { + int totalSize = LargeMemoryUtil.checkedCastToInt(buffer.readableBytes()); + ByteBuffer reader = MemoryUtil.directBuffer(buffer.memoryAddress(), totalSize).order(ByteOrder.nativeOrder()); + byte[] result = new byte[totalSize]; + reader.get(result); + assertArrayEquals(nativeString, result); + + assertEquals(javaString, NativeUtil.toJavaString(buffer.memoryAddress())); + } + } + + @Test + public void testToJavaArray() { + long[] nativeArray = new long[] {1, 2, 3}; + try (ArrowBuf buffer = allocator.buffer(Long.BYTES * nativeArray.length, null)) { + for (long value : nativeArray) { + buffer.writeLong(value); + } + long[] actual = NativeUtil.toJavaArray(buffer.memoryAddress(), nativeArray.length); + assertArrayEquals(nativeArray, actual); + } + } + + @Test + public void testToZeroJavaArray() { + long[] actual = NativeUtil.toJavaArray(0xDEADBEEF, 0); + assertEquals(0, actual.length); + } + +} From a976dbb875ea6947fb78c71b323abefb89ddb669 Mon Sep 17 00:00:00 2001 From: roee88 Date: Fri, 10 Sep 2021 09:23:57 +0300 Subject: [PATCH 09/21] fix: FFIReferenceManager ABA issue Signed-off-by: roee88 --- .../main/java/org/apache/arrow/ffi/ArrayImporter.java | 8 +++----- .../java/org/apache/arrow/ffi/FFIReferenceManager.java | 10 +++++++++- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/java/ffi/src/main/java/org/apache/arrow/ffi/ArrayImporter.java b/java/ffi/src/main/java/org/apache/arrow/ffi/ArrayImporter.java index fb6f97d9111..4e52deba302 100644 --- a/java/ffi/src/main/java/org/apache/arrow/ffi/ArrayImporter.java +++ b/java/ffi/src/main/java/org/apache/arrow/ffi/ArrayImporter.java @@ -27,7 +27,6 @@ import org.apache.arrow.memory.ArrowBuf; import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.ReferenceManager; import org.apache.arrow.vector.FieldVector; import org.apache.arrow.vector.TypeLayout; import org.apache.arrow.vector.dictionary.Dictionary; @@ -45,7 +44,7 @@ final class ArrayImporter { private final FieldVector vector; private final DictionaryProvider dictionaryProvider; - private ReferenceManager referenceManager; + private FFIReferenceManager referenceManager; private int recursionLevel; ArrayImporter(BufferAllocator allocator, FieldVector vector, DictionaryProvider dictionaryProvider) { @@ -69,12 +68,11 @@ void importArray(ArrowArray src) { // This keeps the array alive as long as there are any buffers that need it referenceManager = new FFIReferenceManager(ownedArray); try { - referenceManager.retain(); + referenceManager.increment(); doImport(snapshot); } finally { referenceManager.release(); } - } private void importChild(ArrayImporter parent, ArrowArray src) { @@ -110,7 +108,7 @@ private void doImport(ArrowArray.Snapshot snapshot) { Dictionary dictionary = dictionaryProvider.lookup(encoding.getId()); checkNotNull(dictionary, "Dictionary lookup failed on import of ArrowArray with dictionary"); - + // reset the dictionary vector to the initial state dictionary.getVector().clear(); diff --git a/java/ffi/src/main/java/org/apache/arrow/ffi/FFIReferenceManager.java b/java/ffi/src/main/java/org/apache/arrow/ffi/FFIReferenceManager.java index 858e31eb721..38bded3fd6f 100644 --- a/java/ffi/src/main/java/org/apache/arrow/ffi/FFIReferenceManager.java +++ b/java/ffi/src/main/java/org/apache/arrow/ffi/FFIReferenceManager.java @@ -50,6 +50,13 @@ public boolean release() { return release(1); } + /** + * Increment the reference count without any safety checks. + */ + void increment() { + bufRefCnt.incrementAndGet(); + } + @Override public boolean release(int decrement) { Preconditions.checkState(decrement >= 1, "ref count decrement should be greater than or equal to 1"); @@ -76,7 +83,8 @@ public void retain() { @Override public void retain(int increment) { Preconditions.checkArgument(increment > 0, "retain(%s) argument is not positive", increment); - bufRefCnt.addAndGet(increment); + final int originalReferenceCount = bufRefCnt.getAndAdd(increment); + Preconditions.checkArgument(originalReferenceCount > 0); } @Override From dbab35ea946cfb349008fbf59692964deb56127a Mon Sep 17 00:00:00 2001 From: roee88 Date: Tue, 14 Sep 2021 09:49:37 +0300 Subject: [PATCH 10/21] Removed redundant synchronized Signed-off-by: roee88 --- .../main/java/org/apache/arrow/ffi/FFIReferenceManager.java | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/java/ffi/src/main/java/org/apache/arrow/ffi/FFIReferenceManager.java b/java/ffi/src/main/java/org/apache/arrow/ffi/FFIReferenceManager.java index 38bded3fd6f..b90f665ae7e 100644 --- a/java/ffi/src/main/java/org/apache/arrow/ffi/FFIReferenceManager.java +++ b/java/ffi/src/main/java/org/apache/arrow/ffi/FFIReferenceManager.java @@ -67,10 +67,8 @@ public boolean release(int decrement) { if (refCnt == 0) { // refcount of this reference manager has dropped to 0 // release the underlying memory - synchronized (this) { - struct.release(); - struct.close(); - } + struct.release(); + struct.close(); } return refCnt == 0; } From c23e1c47d2005ccd3086e383937d1401d8eacbaa Mon Sep 17 00:00:00 2001 From: roee88 Date: Tue, 14 Sep 2021 09:50:01 +0300 Subject: [PATCH 11/21] Added comment about lack of support for 32bit systems Signed-off-by: roee88 --- java/ffi/src/main/java/org/apache/arrow/ffi/FFI.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/java/ffi/src/main/java/org/apache/arrow/ffi/FFI.java b/java/ffi/src/main/java/org/apache/arrow/ffi/FFI.java index 8e835b8dd6d..32c77434184 100644 --- a/java/ffi/src/main/java/org/apache/arrow/ffi/FFI.java +++ b/java/ffi/src/main/java/org/apache/arrow/ffi/FFI.java @@ -35,7 +35,9 @@ /** * Functions for working with the C data interface. - * This API is EXPERIMENTAL. + *

+ * This API is EXPERIMENTAL. Note that currently only 64bit systems are + * supported. */ public final class FFI { From 97db8eaa99eb5c93acbacadbfdf11f09831d0491 Mon Sep 17 00:00:00 2001 From: roee88 Date: Wed, 29 Sep 2021 15:55:35 +0300 Subject: [PATCH 12/21] fix: StructVector with inner complex type Signed-off-by: roee88 --- .../src/main/java/org/apache/arrow/ffi/ArrayImporter.java | 2 +- java/ffi/src/main/java/org/apache/arrow/ffi/FFI.java | 4 +--- .../java/org/apache/arrow/vector/StructVectorLoader.java | 6 +++--- 3 files changed, 5 insertions(+), 7 deletions(-) diff --git a/java/ffi/src/main/java/org/apache/arrow/ffi/ArrayImporter.java b/java/ffi/src/main/java/org/apache/arrow/ffi/ArrayImporter.java index 4e52deba302..b671e60bc28 100644 --- a/java/ffi/src/main/java/org/apache/arrow/ffi/ArrayImporter.java +++ b/java/ffi/src/main/java/org/apache/arrow/ffi/ArrayImporter.java @@ -134,7 +134,7 @@ private List importBuffers(ArrowArray.Snapshot snapshot) { } int buffersCount = TypeLayout.getTypeBufferCount(vector.getField().getType()); - checkState(buffers.length == buffersCount, "Expected %d buffers for imported type %s, ArrowArray struct has %d", + checkState(buffers.length == buffersCount, "Expected %s buffers for imported type %s, ArrowArray struct has %s", buffersCount, vector.getField().getType().getTypeID(), buffers.length); List result = new ArrayList<>(buffersCount); diff --git a/java/ffi/src/main/java/org/apache/arrow/ffi/FFI.java b/java/ffi/src/main/java/org/apache/arrow/ffi/FFI.java index 32c77434184..1a16acd802d 100644 --- a/java/ffi/src/main/java/org/apache/arrow/ffi/FFI.java +++ b/java/ffi/src/main/java/org/apache/arrow/ffi/FFI.java @@ -260,9 +260,7 @@ public static FieldVector importVector(BufferAllocator allocator, ArrowArray arr public static void importIntoVectorSchemaRoot(BufferAllocator allocator, ArrowArray array, VectorSchemaRoot root, DictionaryProvider provider) { try (StructVector structVector = StructVector.empty("", allocator)) { - for (Field field : root.getSchema().getFields()) { - structVector.addOrGet(field.getName(), field.getFieldType(), FieldVector.class); - } + structVector.initializeChildrenFromFields(root.getSchema().getFields()); importIntoVector(allocator, array, structVector, provider); StructVectorUnloader unloader = new StructVectorUnloader(structVector); VectorLoader loader = new VectorLoader(root); diff --git a/java/ffi/src/main/java/org/apache/arrow/vector/StructVectorLoader.java b/java/ffi/src/main/java/org/apache/arrow/vector/StructVectorLoader.java index e92d2246c6a..114ee54bcd5 100644 --- a/java/ffi/src/main/java/org/apache/arrow/vector/StructVectorLoader.java +++ b/java/ffi/src/main/java/org/apache/arrow/vector/StructVectorLoader.java @@ -80,6 +80,7 @@ public StructVectorLoader(Schema schema, CompressionCodec.Factory factory) { */ public StructVector load(BufferAllocator allocator, ArrowRecordBatch recordBatch) { StructVector result = StructVector.empty("", allocator); + result.initializeChildrenFromFields(this.schema.getFields()); Iterator buffers = recordBatch.getBuffers().iterator(); Iterator nodes = recordBatch.getNodes().iterator(); @@ -87,9 +88,8 @@ public StructVector load(BufferAllocator allocator, ArrowRecordBatch recordBatch .fromCompressionType(recordBatch.getBodyCompression().getCodec()); decompressionNeeded = codecType != CompressionUtil.CodecType.NO_COMPRESSION; CompressionCodec codec = decompressionNeeded ? factory.createCodec(codecType) : NoCompressionCodec.INSTANCE; - for (Field field : this.schema.getFields()) { - FieldVector fieldVector = result.addOrGet(field.getName(), field.getFieldType(), FieldVector.class); - loadBuffers(fieldVector, field, buffers, nodes, codec); + for (FieldVector fieldVector : result.getChildrenFromFields()) { + loadBuffers(fieldVector, fieldVector.getField(), buffers, nodes, codec); } result.loadFieldBuffers(new ArrowFieldNode(recordBatch.getLength(), 0), Collections.singletonList(null)); if (nodes.hasNext() || buffers.hasNext()) { From a30812847654aee5a21e7bd0cec7768c9701c0d6 Mon Sep 17 00:00:00 2001 From: roee88 Date: Sun, 3 Oct 2021 08:34:35 +0300 Subject: [PATCH 13/21] Improve retain after release check Signed-off-by: roee88 --- .../main/java/org/apache/arrow/ffi/FFIReferenceManager.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/java/ffi/src/main/java/org/apache/arrow/ffi/FFIReferenceManager.java b/java/ffi/src/main/java/org/apache/arrow/ffi/FFIReferenceManager.java index b90f665ae7e..95b4e427703 100644 --- a/java/ffi/src/main/java/org/apache/arrow/ffi/FFIReferenceManager.java +++ b/java/ffi/src/main/java/org/apache/arrow/ffi/FFIReferenceManager.java @@ -63,7 +63,7 @@ public boolean release(int decrement) { // decrement the ref count final int refCnt = bufRefCnt.addAndGet(-decrement); // the new ref count should be >= 0 - Preconditions.checkState(refCnt >= 0, "RefCnt has gone negative"); + Preconditions.checkState(refCnt >= 0, "ref count has gone negative"); if (refCnt == 0) { // refcount of this reference manager has dropped to 0 // release the underlying memory @@ -82,7 +82,7 @@ public void retain() { public void retain(int increment) { Preconditions.checkArgument(increment > 0, "retain(%s) argument is not positive", increment); final int originalReferenceCount = bufRefCnt.getAndAdd(increment); - Preconditions.checkArgument(originalReferenceCount > 0); + Preconditions.checkState(originalReferenceCount > 0, "retain called but memory was already released"); } @Override From dee4fbdb1d1a01d2d2c834c49e8b949e8a7f2c72 Mon Sep 17 00:00:00 2001 From: Roee Shlomo Date: Mon, 11 Oct 2021 08:49:46 +0300 Subject: [PATCH 14/21] Java c data interface CI testing packaging * Add testing of the ffi module to the JNI tests * Add packaging of ffi module to java jars packaging Signed-off-by: roee88 Co-authored-by: Doron Chen --- ci/docker/linux-apt-jni.dockerfile | 1 + ci/scripts/java_build.sh | 5 +++ ci/scripts/java_ffi_build.sh | 45 +++++++++++++++++++ .../{java_jni_build.sh => java_full_build.sh} | 9 ++-- ci/scripts/java_test.sh | 5 +++ dev/tasks/java-jars/github.yml | 12 ++++- docker-compose.yml | 4 +- java/ffi/CMakeLists.txt | 4 +- java/ffi/pom.xml | 2 +- 9 files changed, 77 insertions(+), 10 deletions(-) create mode 100755 ci/scripts/java_ffi_build.sh rename ci/scripts/{java_jni_build.sh => java_full_build.sh} (79%) diff --git a/ci/docker/linux-apt-jni.dockerfile b/ci/docker/linux-apt-jni.dockerfile index 1abbf05af3b..97f82ea0b17 100644 --- a/ci/docker/linux-apt-jni.dockerfile +++ b/ci/docker/linux-apt-jni.dockerfile @@ -79,6 +79,7 @@ ENV ARROW_BUILD_TESTS=OFF \ ARROW_PLASMA_JAVA_CLIENT=ON \ ARROW_PLASMA=ON \ ARROW_USE_CCACHE=ON \ + ARROW_JAVA_FFI=ON \ CC=gcc \ CXX=g++ \ ORC_SOURCE=BUNDLED \ diff --git a/ci/scripts/java_build.sh b/ci/scripts/java_build.sh index 5ef150fd1e7..118e07c793e 100755 --- a/ci/scripts/java_build.sh +++ b/ci/scripts/java_build.sh @@ -21,6 +21,7 @@ set -ex arrow_dir=${1} source_dir=${1}/java cpp_build_dir=${2}/cpp/${ARROW_BUILD_TYPE:-debug} +ffi_build_dir=${2}/java/ffi with_docs=${3:-false} if [[ "$(uname -s)" == "Linux" ]] && [[ "$(uname -m)" == "s390x" ]]; then @@ -84,6 +85,10 @@ if [ "${ARROW_JAVA_SHADE_FLATBUFFERS}" == "ON" ]; then ${mvn} -Pshade-flatbuffers install fi +if [ "${ARROW_JAVA_FFI}" = "ON" ]; then + ${mvn} -Darrow.ffi.cpp.build.dir=${ffi_build_dir} -Parrow-ffi install +fi + if [ "${ARROW_GANDIVA_JAVA}" = "ON" ]; then ${mvn} -Darrow.cpp.build.dir=${cpp_build_dir} -Parrow-jni install fi diff --git a/ci/scripts/java_ffi_build.sh b/ci/scripts/java_ffi_build.sh new file mode 100755 index 00000000000..81b12fdca04 --- /dev/null +++ b/ci/scripts/java_ffi_build.sh @@ -0,0 +1,45 @@ +#!/usr/bin/env bash + +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -ex + +arrow_dir=${1} +build_dir=${2} +# The directory where the final binaries will be stored when scripts finish +dist_dir=${3} + +echo "=== Clear output directories and leftovers ===" +# Clear output directories and leftovers +rm -rf ${build_dir} + +echo "=== Building Arrow Java C Data Interface native library ===" +mkdir -p "${build_dir}" +pushd "${build_dir}" + +cmake \ + -DCMAKE_BUILD_TYPE=${ARROW_BUILD_TYPE:-release} \ + -DCMAKE_INSTALL_LIBDIR=lib \ + -DCMAKE_INSTALL_PREFIX=${build_dir} \ + ${arrow_dir}/java/ffi +cmake --build . --target install --config ${ARROW_BUILD_TYPE:-release} +popd + +echo "=== Copying libraries to the distribution folder ===" +mkdir -p "${dist_dir}" +cp -L ${build_dir}/lib/*arrow_ffi_jni.* ${dist_dir} diff --git a/ci/scripts/java_jni_build.sh b/ci/scripts/java_full_build.sh similarity index 79% rename from ci/scripts/java_jni_build.sh rename to ci/scripts/java_full_build.sh index de6e852b71f..3d348f0cba1 100755 --- a/ci/scripts/java_jni_build.sh +++ b/ci/scripts/java_full_build.sh @@ -20,18 +20,17 @@ set -e arrow_dir=${1} -cpp_lib_dir=${2} -java_dist_dir=${3} +dist_dir=${3} export ARROW_TEST_DATA=${arrow_dir}/testing/data pushd ${arrow_dir}/java # build the entire project -mvn clean install -P arrow-jni -Darrow.cpp.build.dir=$cpp_lib_dir +mvn clean install -Parrow-ffi -Parrow-jni -Darrow.cpp.build.dir=$dist_dir -Darrow.ffi.cpp.build.dir=$dist_dir # copy all jars and pom files to the distribution folder -find . -name "*.jar" -exec echo {} \; -exec cp {} $java_dist_dir \; -find . -name "*.pom" -exec echo {} \; -exec cp {} $java_dist_dir \; +find . -name "*.jar" -exec echo {} \; -exec cp {} $dist_dir \; +find . -name "*.pom" -exec echo {} \; -exec cp {} $dist_dir \; popd diff --git a/ci/scripts/java_test.sh b/ci/scripts/java_test.sh index da9e45280ec..f9fdd888799 100755 --- a/ci/scripts/java_test.sh +++ b/ci/scripts/java_test.sh @@ -21,6 +21,7 @@ set -ex arrow_dir=${1} source_dir=${1}/java cpp_build_dir=${2}/cpp/${ARROW_BUILD_TYPE:-debug} +ffi_build_dir=${2}/java/ffi # For JNI and Plasma tests export LD_LIBRARY_PATH=${ARROW_HOME}/lib:${LD_LIBRARY_PATH} @@ -38,6 +39,10 @@ if [ "${ARROW_JNI}" = "ON" ]; then ${mvn} test -Parrow-jni -pl adapter/orc,gandiva,dataset -Darrow.cpp.build.dir=${cpp_build_dir} fi +if [ "${ARROW_JAVA_FFI}" = "ON" ]; then + ${mvn} test -Parrow-ffi -pl ffi -Darrow.ffi.cpp.build.dir=${ffi_build_dir} +fi + if [ "${ARROW_PLASMA}" = "ON" ]; then pushd ${source_dir}/plasma java -cp target/test-classes:target/classes \ diff --git a/dev/tasks/java-jars/github.yml b/dev/tasks/java-jars/github.yml index e2372c56eb0..ef5a221b0e1 100644 --- a/dev/tasks/java-jars/github.yml +++ b/dev/tasks/java-jars/github.yml @@ -51,6 +51,13 @@ jobs: steps: {{ macros.github_checkout_arrow()|indent }} {{ macros.github_install_archery()|indent }} + - name: Build C Data Interface lib + run: | + set -e + arrow/ci/scripts/java_ffi_build.sh \ + $GITHUB_WORKSPACE/arrow \ + $GITHUB_WORKSPACE/arrow/java-native-build \ + $GITHUB_WORKSPACE/arrow/java/dist - name: Build C++ Libs run: | set -e @@ -86,17 +93,18 @@ jobs: tar -xvzf arrow-shared-libs-linux.tar.gz - name: Test that Shared Libraries Exist run: | + test -f arrow/java/dist/libarrow_ffi_jni.dylib test -f arrow/java/dist/libarrow_dataset_jni.dylib test -f arrow/java/dist/libgandiva_jni.dylib test -f arrow/java/dist/libarrow_orc_jni.dylib + test -f arrow/java/dist/libarrow_ffi_jni.so test -f arrow/java/dist/libarrow_dataset_jni.so test -f arrow/java/dist/libarrow_orc_jni.so test -f arrow/java/dist/libgandiva_jni.so - name: Build Bundled Jar run: | set -e - arrow/ci/scripts/java_jni_build.sh \ + arrow/ci/scripts/java_full_build.sh \ $GITHUB_WORKSPACE/arrow \ - $GITHUB_WORKSPACE/arrow/java/dist \ $GITHUB_WORKSPACE/arrow/java/dist {{ macros.github_upload_releases(["arrow/java/dist/*.jar", "arrow/java/dist/*.pom"])|indent }} diff --git a/docker-compose.yml b/docker-compose.yml index 63c1ee6aac4..73fc70ac48f 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -875,6 +875,7 @@ services: - ${DOCKER_VOLUME_PREFIX}python-wheel-manylinux2014-ccache:/ccache:delegated command: ["pip install -e /arrow/dev/archery && + /arrow/ci/scripts/java_ffi_build.sh /arrow /java-native-build /arrow/java/dist && /arrow/ci/scripts/java_jni_manylinux_build.sh /arrow /build /arrow/java/dist"] ############################## Integration ################################# @@ -1308,7 +1309,7 @@ services: /arrow/ci/scripts/java_test.sh /arrow /build" debian-java-jni: - # Includes plasma test and jni for gandiva and orc. + # Includes plasma test, jni for gandiva and orc, and C data interface. # Usage: # docker-compose build debian-java # docker-compose build debian-java-jni @@ -1331,6 +1332,7 @@ services: - ${DOCKER_VOLUME_PREFIX}debian-ccache:/ccache:delegated command: /bin/bash -c " + /arrow/ci/scripts/java_ffi_build.sh /arrow /build/java/ffi/build /build/java/ffi && /arrow/ci/scripts/cpp_build.sh /arrow /build && /arrow/ci/scripts/java_build.sh /arrow /build && /arrow/ci/scripts/java_test.sh /arrow /build" diff --git a/java/ffi/CMakeLists.txt b/java/ffi/CMakeLists.txt index b45c82f48b7..a831807ff6b 100644 --- a/java/ffi/CMakeLists.txt +++ b/java/ffi/CMakeLists.txt @@ -19,8 +19,8 @@ # arrow_ffi_java # -# Headers: top level cmake_minimum_required(VERSION 3.11) +message(STATUS "Building using CMake version: ${CMAKE_VERSION}") project(arrow_ffi_java) # Find java/jni @@ -49,3 +49,5 @@ set(SOURCES src/main/cpp/jni_wrapper.cc) add_library(arrow_ffi_jni SHARED ${SOURCES}) target_link_libraries(arrow_ffi_jni ${JAVA_JVM_LIBRARY}) add_dependencies(arrow_ffi_jni ${PROJECT_NAME}) + +install(TARGETS arrow_ffi_jni DESTINATION ${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_LIBDIR}) diff --git a/java/ffi/pom.xml b/java/ffi/pom.xml index b9c98a88f57..77e8ff801f3 100644 --- a/java/ffi/pom.xml +++ b/java/ffi/pom.xml @@ -70,7 +70,7 @@ ${arrow.ffi.cpp.build.dir} - **/libarrow_ffi_jni.* + **/*arrow_ffi_jni.* From 99d7556804108591a31e9ad58d30a7284df8d1c7 Mon Sep 17 00:00:00 2001 From: roee88 Date: Mon, 11 Oct 2021 21:37:40 +0300 Subject: [PATCH 15/21] Code style Signed-off-by: roee88 --- ci/docker/linux-apt-jni.dockerfile | 2 +- java/ffi/src/main/cpp/jni_wrapper.cc | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/ci/docker/linux-apt-jni.dockerfile b/ci/docker/linux-apt-jni.dockerfile index 97f82ea0b17..3d1ea0a9144 100644 --- a/ci/docker/linux-apt-jni.dockerfile +++ b/ci/docker/linux-apt-jni.dockerfile @@ -73,13 +73,13 @@ ENV ARROW_BUILD_TESTS=OFF \ ARROW_GANDIVA_JAVA=ON \ ARROW_GANDIVA=ON \ ARROW_HOME=/usr/local \ + ARROW_JAVA_FFI=ON \ ARROW_JNI=ON \ ARROW_ORC=ON \ ARROW_PARQUET=ON \ ARROW_PLASMA_JAVA_CLIENT=ON \ ARROW_PLASMA=ON \ ARROW_USE_CCACHE=ON \ - ARROW_JAVA_FFI=ON \ CC=gcc \ CXX=g++ \ ORC_SOURCE=BUNDLED \ diff --git a/java/ffi/src/main/cpp/jni_wrapper.cc b/java/ffi/src/main/cpp/jni_wrapper.cc index c4a29b0e93e..39001b20978 100644 --- a/java/ffi/src/main/cpp/jni_wrapper.cc +++ b/java/ffi/src/main/cpp/jni_wrapper.cc @@ -75,7 +75,7 @@ class InnerPrivateData { class JNIEnvGuard { public: - explicit JNIEnvGuard(JavaVM* vm) : vm_(vm), should_deattach_(false) { + explicit JNIEnvGuard(JavaVM* vm) : vm_(vm), should_detach_(false) { JNIEnv* env; jint code = vm->GetEnv(reinterpret_cast(&env), JNI_VERSION); if (code == JNI_EDETACHED) { @@ -84,7 +84,7 @@ class JNIEnvGuard { args.name = NULL; args.group = NULL; code = vm->AttachCurrentThread(reinterpret_cast(&env), &args); - should_deattach_ = (code == JNI_OK); + should_detach_ = (code == JNI_OK); } if (code != JNI_OK) { ThrowPendingException("Failed to attach the current thread to a Java VM"); @@ -95,14 +95,14 @@ class JNIEnvGuard { JNIEnv* env() { return env_; } ~JNIEnvGuard() { - if (should_deattach_) { + if (should_detach_) { vm_->DetachCurrentThread(); - should_deattach_ = false; + should_detach_ = false; } } private: - bool should_deattach_; + bool should_detach_; JavaVM* vm_; JNIEnv* env_; }; From 0f0ac3c83751db0d84a43024e716291844de77d3 Mon Sep 17 00:00:00 2001 From: roee88 Date: Tue, 12 Oct 2021 08:33:43 +0300 Subject: [PATCH 16/21] Fix java_full_build script Signed-off-by: roee88 --- ci/scripts/java_full_build.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/scripts/java_full_build.sh b/ci/scripts/java_full_build.sh index 3d348f0cba1..1cdd3ff22b7 100755 --- a/ci/scripts/java_full_build.sh +++ b/ci/scripts/java_full_build.sh @@ -20,7 +20,7 @@ set -e arrow_dir=${1} -dist_dir=${3} +dist_dir=${2} export ARROW_TEST_DATA=${arrow_dir}/testing/data From 1f43a84c52cc2c7e2a13e2b242db7c76ed24cd72 Mon Sep 17 00:00:00 2001 From: Roee Shlomo Date: Tue, 12 Oct 2021 12:04:07 +0300 Subject: [PATCH 17/21] Rename ffi to c data * Package name: org.apache.arrow.c * Class name: Data * Maven profile name: arrow-c-data * Shared library name: libarrow_cdata_jni * Maven property for dir with the shared library: arrow.c.jni.dist.dir * CI script for building the shared library: java_cdata_build.sh * CI flag to enable building the shared library: ARROW_JAVA_CDATA=ON Signed-off-by: roee88 --- ci/docker/linux-apt-jni.dockerfile | 2 +- ci/scripts/java_build.sh | 6 +-- ...{java_ffi_build.sh => java_cdata_build.sh} | 4 +- ci/scripts/java_full_build.sh | 2 +- ci/scripts/java_test.sh | 6 +-- dev/release/01-prepare-test.rb | 12 +++--- dev/tasks/java-jars/github.yml | 6 +-- docker-compose.yml | 4 +- java/.gitignore | 1 + java/{ffi => c}/.gitignore | 0 java/{ffi => c}/CMakeLists.txt | 20 ++++----- java/{ffi => c}/README.md | 8 ++-- java/{ffi => c}/pom.xml | 16 +++---- java/{ffi => c}/src/main/cpp/abi.h | 0 java/{ffi => c}/src/main/cpp/jni_wrapper.cc | 26 +++++------ .../org/apache/arrow/c}/ArrayExporter.java | 10 ++--- .../org/apache/arrow/c}/ArrayImporter.java | 8 ++-- .../java/org/apache/arrow/c}/ArrowArray.java | 11 ++--- .../java/org/apache/arrow/c}/ArrowSchema.java | 15 ++++--- .../java/org/apache/arrow/c}/BaseStruct.java | 2 +- .../arrow/c/CDataDictionaryProvider.java} | 14 +++--- .../arrow/c/CDataReferenceManager.java} | 15 ++++--- .../main/java/org/apache/arrow/c/Data.java} | 16 +++---- .../main/java/org/apache/arrow/c}/Flags.java | 2 +- .../main/java/org/apache/arrow/c}/Format.java | 2 +- .../java/org/apache/arrow/c}/Metadata.java | 4 +- .../java/org/apache/arrow/c}/NativeUtil.java | 2 +- .../org/apache/arrow/c}/SchemaExporter.java | 10 ++--- .../org/apache/arrow/c}/SchemaImporter.java | 9 ++-- .../org/apache/arrow/c}/jni/JniLoader.java | 4 +- .../org/apache/arrow/c}/jni/JniWrapper.java | 2 +- .../org/apache/arrow/c}/jni/PrivateData.java | 2 +- .../arrow/vector/StructVectorLoader.java | 2 +- .../arrow/vector/StructVectorUnloader.java | 41 ++++++++++-------- .../org/apache/arrow/c}/DictionaryTest.java | 32 ++++++++------ .../java/org/apache/arrow/c}/FlagsTest.java | 3 +- .../java/org/apache/arrow/c}/FormatTest.java | 25 +++++------ .../org/apache/arrow/c}/MetadataTest.java | 4 +- .../org/apache/arrow/c}/NativeUtilTest.java | 11 ++--- .../org/apache/arrow/c}/RoundtripTest.java | 43 ++++++++++--------- java/pom.xml | 6 +-- 41 files changed, 208 insertions(+), 200 deletions(-) rename ci/scripts/{java_ffi_build.sh => java_cdata_build.sh} (95%) rename java/{ffi => c}/.gitignore (100%) rename java/{ffi => c}/CMakeLists.txt (73%) rename java/{ffi => c}/README.md (88%) rename java/{ffi => c}/pom.xml (84%) rename java/{ffi => c}/src/main/cpp/abi.h (100%) rename java/{ffi => c}/src/main/cpp/jni_wrapper.cc (89%) rename java/{ffi/src/main/java/org/apache/arrow/ffi => c/src/main/java/org/apache/arrow/c}/ArrayExporter.java (95%) rename java/{ffi/src/main/java/org/apache/arrow/ffi => c/src/main/java/org/apache/arrow/c}/ArrayImporter.java (96%) rename java/{ffi/src/main/java/org/apache/arrow/ffi => c/src/main/java/org/apache/arrow/c}/ArrowArray.java (96%) rename java/{ffi/src/main/java/org/apache/arrow/ffi => c/src/main/java/org/apache/arrow/c}/ArrowSchema.java (94%) rename java/{ffi/src/main/java/org/apache/arrow/ffi => c/src/main/java/org/apache/arrow/c}/BaseStruct.java (97%) rename java/{ffi/src/main/java/org/apache/arrow/ffi/FFIDictionaryProvider.java => c/src/main/java/org/apache/arrow/c/CDataDictionaryProvider.java} (82%) rename java/{ffi/src/main/java/org/apache/arrow/ffi/FFIReferenceManager.java => c/src/main/java/org/apache/arrow/c/CDataReferenceManager.java} (91%) rename java/{ffi/src/main/java/org/apache/arrow/ffi/FFI.java => c/src/main/java/org/apache/arrow/c/Data.java} (97%) rename java/{ffi/src/main/java/org/apache/arrow/ffi => c/src/main/java/org/apache/arrow/c}/Flags.java (98%) rename java/{ffi/src/main/java/org/apache/arrow/ffi => c/src/main/java/org/apache/arrow/c}/Format.java (99%) rename java/{ffi/src/main/java/org/apache/arrow/ffi => c/src/main/java/org/apache/arrow/c}/Metadata.java (97%) rename java/{ffi/src/main/java/org/apache/arrow/ffi => c/src/main/java/org/apache/arrow/c}/NativeUtil.java (99%) rename java/{ffi/src/main/java/org/apache/arrow/ffi => c/src/main/java/org/apache/arrow/c}/SchemaExporter.java (95%) rename java/{ffi/src/main/java/org/apache/arrow/ffi => c/src/main/java/org/apache/arrow/c}/SchemaImporter.java (94%) rename java/{ffi/src/main/java/org/apache/arrow/ffi => c/src/main/java/org/apache/arrow/c}/jni/JniLoader.java (97%) rename java/{ffi/src/main/java/org/apache/arrow/ffi => c/src/main/java/org/apache/arrow/c}/jni/JniWrapper.java (97%) rename java/{ffi/src/main/java/org/apache/arrow/ffi => c/src/main/java/org/apache/arrow/c}/jni/PrivateData.java (96%) rename java/{ffi => c}/src/main/java/org/apache/arrow/vector/StructVectorLoader.java (99%) rename java/{ffi => c}/src/main/java/org/apache/arrow/vector/StructVectorUnloader.java (70%) rename java/{ffi/src/test/java/org/apache/arrow/ffi => c/src/test/java/org/apache/arrow/c}/DictionaryTest.java (85%) rename java/{ffi/src/test/java/org/apache/arrow/ffi => c/src/test/java/org/apache/arrow/c}/FlagsTest.java (98%) rename java/{ffi/src/test/java/org/apache/arrow/ffi => c/src/test/java/org/apache/arrow/c}/FormatTest.java (91%) rename java/{ffi/src/test/java/org/apache/arrow/ffi => c/src/test/java/org/apache/arrow/c}/MetadataTest.java (96%) rename java/{ffi/src/test/java/org/apache/arrow/ffi => c/src/test/java/org/apache/arrow/c}/NativeUtilTest.java (93%) rename java/{ffi/src/test/java/org/apache/arrow/ffi => c/src/test/java/org/apache/arrow/c}/RoundtripTest.java (95%) diff --git a/ci/docker/linux-apt-jni.dockerfile b/ci/docker/linux-apt-jni.dockerfile index 3d1ea0a9144..ddfa72e1774 100644 --- a/ci/docker/linux-apt-jni.dockerfile +++ b/ci/docker/linux-apt-jni.dockerfile @@ -73,7 +73,7 @@ ENV ARROW_BUILD_TESTS=OFF \ ARROW_GANDIVA_JAVA=ON \ ARROW_GANDIVA=ON \ ARROW_HOME=/usr/local \ - ARROW_JAVA_FFI=ON \ + ARROW_JAVA_CDATA=ON \ ARROW_JNI=ON \ ARROW_ORC=ON \ ARROW_PARQUET=ON \ diff --git a/ci/scripts/java_build.sh b/ci/scripts/java_build.sh index 118e07c793e..1ba37606d3c 100755 --- a/ci/scripts/java_build.sh +++ b/ci/scripts/java_build.sh @@ -21,7 +21,7 @@ set -ex arrow_dir=${1} source_dir=${1}/java cpp_build_dir=${2}/cpp/${ARROW_BUILD_TYPE:-debug} -ffi_build_dir=${2}/java/ffi +cdata_dist_dir=${2}/java/c with_docs=${3:-false} if [[ "$(uname -s)" == "Linux" ]] && [[ "$(uname -m)" == "s390x" ]]; then @@ -85,8 +85,8 @@ if [ "${ARROW_JAVA_SHADE_FLATBUFFERS}" == "ON" ]; then ${mvn} -Pshade-flatbuffers install fi -if [ "${ARROW_JAVA_FFI}" = "ON" ]; then - ${mvn} -Darrow.ffi.cpp.build.dir=${ffi_build_dir} -Parrow-ffi install +if [ "${ARROW_JAVA_CDATA}" = "ON" ]; then + ${mvn} -Darrow.c.jni.dist.dir=${cdata_dist_dir} -Parrow-c-data install fi if [ "${ARROW_GANDIVA_JAVA}" = "ON" ]; then diff --git a/ci/scripts/java_ffi_build.sh b/ci/scripts/java_cdata_build.sh similarity index 95% rename from ci/scripts/java_ffi_build.sh rename to ci/scripts/java_cdata_build.sh index 81b12fdca04..730c775d402 100755 --- a/ci/scripts/java_ffi_build.sh +++ b/ci/scripts/java_cdata_build.sh @@ -36,10 +36,10 @@ cmake \ -DCMAKE_BUILD_TYPE=${ARROW_BUILD_TYPE:-release} \ -DCMAKE_INSTALL_LIBDIR=lib \ -DCMAKE_INSTALL_PREFIX=${build_dir} \ - ${arrow_dir}/java/ffi + ${arrow_dir}/java/c cmake --build . --target install --config ${ARROW_BUILD_TYPE:-release} popd echo "=== Copying libraries to the distribution folder ===" mkdir -p "${dist_dir}" -cp -L ${build_dir}/lib/*arrow_ffi_jni.* ${dist_dir} +cp -L ${build_dir}/lib/*arrow_cdata_jni.* ${dist_dir} diff --git a/ci/scripts/java_full_build.sh b/ci/scripts/java_full_build.sh index 1cdd3ff22b7..fb1d2b5f535 100755 --- a/ci/scripts/java_full_build.sh +++ b/ci/scripts/java_full_build.sh @@ -27,7 +27,7 @@ export ARROW_TEST_DATA=${arrow_dir}/testing/data pushd ${arrow_dir}/java # build the entire project -mvn clean install -Parrow-ffi -Parrow-jni -Darrow.cpp.build.dir=$dist_dir -Darrow.ffi.cpp.build.dir=$dist_dir +mvn clean install -Parrow-c-data -Parrow-jni -Darrow.cpp.build.dir=$dist_dir -Darrow.c.jni.dist.dir=$dist_dir # copy all jars and pom files to the distribution folder find . -name "*.jar" -exec echo {} \; -exec cp {} $dist_dir \; diff --git a/ci/scripts/java_test.sh b/ci/scripts/java_test.sh index f9fdd888799..0e755bcafbe 100755 --- a/ci/scripts/java_test.sh +++ b/ci/scripts/java_test.sh @@ -21,7 +21,7 @@ set -ex arrow_dir=${1} source_dir=${1}/java cpp_build_dir=${2}/cpp/${ARROW_BUILD_TYPE:-debug} -ffi_build_dir=${2}/java/ffi +cdata_dist_dir=${2}/java/c # For JNI and Plasma tests export LD_LIBRARY_PATH=${ARROW_HOME}/lib:${LD_LIBRARY_PATH} @@ -39,8 +39,8 @@ if [ "${ARROW_JNI}" = "ON" ]; then ${mvn} test -Parrow-jni -pl adapter/orc,gandiva,dataset -Darrow.cpp.build.dir=${cpp_build_dir} fi -if [ "${ARROW_JAVA_FFI}" = "ON" ]; then - ${mvn} test -Parrow-ffi -pl ffi -Darrow.ffi.cpp.build.dir=${ffi_build_dir} +if [ "${ARROW_JAVA_CDATA}" = "ON" ]; then + ${mvn} test -Parrow-c-data -pl c -Darrow.c.jni.dist.dir=${cdata_dist_dir} fi if [ "${ARROW_PLASMA}" = "ON" ]; then diff --git a/dev/release/01-prepare-test.rb b/dev/release/01-prepare-test.rb index ec9730bf88c..51665ec02ad 100644 --- a/dev/release/01-prepare-test.rb +++ b/dev/release/01-prepare-test.rb @@ -207,15 +207,15 @@ def test_version_pre_tag { hunks: [["- #{@snapshot_version}", "+ #{@release_version}"]], path: "java/algorithm/pom.xml" }, + { hunks: [["- #{@snapshot_version}", + "+ #{@release_version}"]], + path: "java/c/pom.xml" }, { hunks: [["- #{@snapshot_version}", "+ #{@release_version}"]], path: "java/compression/pom.xml" }, { hunks: [["- #{@snapshot_version}", "+ #{@release_version}"]], path: "java/dataset/pom.xml" }, - { hunks: [["- #{@snapshot_version}", - "+ #{@release_version}"]], - path: "java/ffi/pom.xml" }, { hunks: [["- #{@snapshot_version}", "+ #{@release_version}"]], path: "java/flight/flight-core/pom.xml" }, @@ -408,15 +408,15 @@ def test_version_post_tag { hunks: [["- #{@snapshot_version}", "+ #{@next_snapshot_version}"]], path: "java/algorithm/pom.xml" }, + { hunks: [["- #{@snapshot_version}", + "+ #{@next_snapshot_version}"]], + path: "java/c/pom.xml" }, { hunks: [["- #{@snapshot_version}", "+ #{@next_snapshot_version}"]], path: "java/compression/pom.xml" }, { hunks: [["- #{@snapshot_version}", "+ #{@next_snapshot_version}"]], path: "java/dataset/pom.xml" }, - { hunks: [["- #{@snapshot_version}", - "+ #{@next_snapshot_version}"]], - path: "java/ffi/pom.xml" }, { hunks: [["- #{@snapshot_version}", "+ #{@next_snapshot_version}"]], path: "java/flight/flight-core/pom.xml" }, diff --git a/dev/tasks/java-jars/github.yml b/dev/tasks/java-jars/github.yml index ef5a221b0e1..641abbfb892 100644 --- a/dev/tasks/java-jars/github.yml +++ b/dev/tasks/java-jars/github.yml @@ -54,7 +54,7 @@ jobs: - name: Build C Data Interface lib run: | set -e - arrow/ci/scripts/java_ffi_build.sh \ + arrow/ci/scripts/java_cdata_build.sh \ $GITHUB_WORKSPACE/arrow \ $GITHUB_WORKSPACE/arrow/java-native-build \ $GITHUB_WORKSPACE/arrow/java/dist @@ -93,11 +93,11 @@ jobs: tar -xvzf arrow-shared-libs-linux.tar.gz - name: Test that Shared Libraries Exist run: | - test -f arrow/java/dist/libarrow_ffi_jni.dylib + test -f arrow/java/dist/libarrow_cdata_jni.dylib test -f arrow/java/dist/libarrow_dataset_jni.dylib test -f arrow/java/dist/libgandiva_jni.dylib test -f arrow/java/dist/libarrow_orc_jni.dylib - test -f arrow/java/dist/libarrow_ffi_jni.so + test -f arrow/java/dist/libarrow_cdata_jni.so test -f arrow/java/dist/libarrow_dataset_jni.so test -f arrow/java/dist/libarrow_orc_jni.so test -f arrow/java/dist/libgandiva_jni.so diff --git a/docker-compose.yml b/docker-compose.yml index 73fc70ac48f..660fa99b0fc 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -875,7 +875,7 @@ services: - ${DOCKER_VOLUME_PREFIX}python-wheel-manylinux2014-ccache:/ccache:delegated command: ["pip install -e /arrow/dev/archery && - /arrow/ci/scripts/java_ffi_build.sh /arrow /java-native-build /arrow/java/dist && + /arrow/ci/scripts/java_cdata_build.sh /arrow /java-native-build /arrow/java/dist && /arrow/ci/scripts/java_jni_manylinux_build.sh /arrow /build /arrow/java/dist"] ############################## Integration ################################# @@ -1332,8 +1332,8 @@ services: - ${DOCKER_VOLUME_PREFIX}debian-ccache:/ccache:delegated command: /bin/bash -c " - /arrow/ci/scripts/java_ffi_build.sh /arrow /build/java/ffi/build /build/java/ffi && /arrow/ci/scripts/cpp_build.sh /arrow /build && + /arrow/ci/scripts/java_cdata_build.sh /arrow /build/java/c/build /build/java/c && /arrow/ci/scripts/java_build.sh /arrow /build && /arrow/ci/scripts/java_test.sh /arrow /build" diff --git a/java/.gitignore b/java/.gitignore index bc6ce4f6c32..51b4f602264 100644 --- a/java/.gitignore +++ b/java/.gitignore @@ -22,3 +22,4 @@ Makefile cmake_install.cmake install_manifest.txt ?/ +!c/ diff --git a/java/ffi/.gitignore b/java/c/.gitignore similarity index 100% rename from java/ffi/.gitignore rename to java/c/.gitignore diff --git a/java/ffi/CMakeLists.txt b/java/c/CMakeLists.txt similarity index 73% rename from java/ffi/CMakeLists.txt rename to java/c/CMakeLists.txt index a831807ff6b..1c6b048be12 100644 --- a/java/ffi/CMakeLists.txt +++ b/java/c/CMakeLists.txt @@ -16,12 +16,12 @@ # under the License. # -# arrow_ffi_java +# arrow_cdata_java # cmake_minimum_required(VERSION 3.11) message(STATUS "Building using CMake version: ${CMAKE_VERSION}") -project(arrow_ffi_java) +project(arrow_cdata_java) # Find java/jni include(FindJava) @@ -37,17 +37,17 @@ include_directories(${CMAKE_CURRENT_BINARY_DIR} ${CMAKE_CURRENT_SOURCE_DIR} ${JNI_INCLUDE_DIRS} ${JNI_HEADERS_DIR}) add_jar(${PROJECT_NAME} - src/main/java/org/apache/arrow/ffi/jni/JniLoader.java - src/main/java/org/apache/arrow/ffi/jni/JniWrapper.java - src/main/java/org/apache/arrow/ffi/jni/PrivateData.java + src/main/java/org/apache/arrow/c/jni/JniLoader.java + src/main/java/org/apache/arrow/c/jni/JniWrapper.java + src/main/java/org/apache/arrow/c/jni/PrivateData.java GENERATE_NATIVE_HEADERS - arrow_ffi_java-native + arrow_cdata_java-native DESTINATION ${JNI_HEADERS_DIR}) set(SOURCES src/main/cpp/jni_wrapper.cc) -add_library(arrow_ffi_jni SHARED ${SOURCES}) -target_link_libraries(arrow_ffi_jni ${JAVA_JVM_LIBRARY}) -add_dependencies(arrow_ffi_jni ${PROJECT_NAME}) +add_library(arrow_cdata_jni SHARED ${SOURCES}) +target_link_libraries(arrow_cdata_jni ${JAVA_JVM_LIBRARY}) +add_dependencies(arrow_cdata_jni ${PROJECT_NAME}) -install(TARGETS arrow_ffi_jni DESTINATION ${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_LIBDIR}) +install(TARGETS arrow_cdata_jni DESTINATION ${CMAKE_INSTALL_LIBDIR}) diff --git a/java/ffi/README.md b/java/c/README.md similarity index 88% rename from java/ffi/README.md rename to java/c/README.md index 3302e128ee7..ce73f531c1b 100644 --- a/java/ffi/README.md +++ b/java/c/README.md @@ -17,7 +17,7 @@ under the License. --> -# Java FFI (C Data Interface) +# C Interfaces for Arrow Java ## Setup Build Environment @@ -34,12 +34,10 @@ install: mkdir -p build pushd build cmake .. -make +cmake --build . popd ``` -To use ninja, pass `-GNinja` when calling cmake and then use the `ninja` command instead of `make`. - ## Building and running tests Run tests with @@ -52,5 +50,5 @@ To install Apache Arrow (Java) with this module enabled run the following from t ``` cd java -mvn -Parrow-ffi install +mvn -Parrow-c-data install ``` diff --git a/java/ffi/pom.xml b/java/c/pom.xml similarity index 84% rename from java/ffi/pom.xml rename to java/c/pom.xml index 77e8ff801f3..c6edbfb296f 100644 --- a/java/ffi/pom.xml +++ b/java/c/pom.xml @@ -9,9 +9,7 @@ License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. --> - + arrow-java-root org.apache.arrow @@ -19,12 +17,12 @@ 4.0.0 - arrow-ffi + arrow-c-data Arrow Java C Data Interface Java implementation of C Data Interface jar - ./build + ./build @@ -51,7 +49,7 @@ org.slf4j slf4j-api - + org.apache.arrow arrow-memory-unsafe @@ -68,12 +66,12 @@ - ${arrow.ffi.cpp.build.dir} + ${arrow.c.jni.dist.dir} - **/*arrow_ffi_jni.* + **/*arrow_cdata_jni.* - + \ No newline at end of file diff --git a/java/ffi/src/main/cpp/abi.h b/java/c/src/main/cpp/abi.h similarity index 100% rename from java/ffi/src/main/cpp/abi.h rename to java/c/src/main/cpp/abi.h diff --git a/java/ffi/src/main/cpp/jni_wrapper.cc b/java/c/src/main/cpp/jni_wrapper.cc similarity index 89% rename from java/ffi/src/main/cpp/jni_wrapper.cc rename to java/c/src/main/cpp/jni_wrapper.cc index 39001b20978..cfb0af9bcbb 100644 --- a/java/ffi/src/main/cpp/jni_wrapper.cc +++ b/java/c/src/main/cpp/jni_wrapper.cc @@ -23,7 +23,7 @@ #include #include "./abi.h" -#include "org_apache_arrow_ffi_jni_JniWrapper.h" +#include "org_apache_arrow_c_jni_JniWrapper.h" namespace { @@ -174,7 +174,7 @@ jint JNI_OnLoad(JavaVM* vm, void* reserved) { runtime_exception_class = CreateGlobalClassReference(env, "Ljava/lang/RuntimeException;"); private_data_class = - CreateGlobalClassReference(env, "Lorg/apache/arrow/ffi/jni/PrivateData;"); + CreateGlobalClassReference(env, "Lorg/apache/arrow/c/jni/PrivateData;"); private_data_close_method = GetMethodID(env, private_data_class, "close", "()V"); @@ -191,11 +191,11 @@ void JNI_OnUnload(JavaVM* vm, void* reserved) { } /* - * Class: org_apache_arrow_ffi_jni_JniWrapper + * Class: org_apache_arrow_c_jni_JniWrapper * Method: releaseSchema * Signature: (J)V */ -JNIEXPORT void JNICALL Java_org_apache_arrow_ffi_jni_JniWrapper_releaseSchema( +JNIEXPORT void JNICALL Java_org_apache_arrow_c_jni_JniWrapper_releaseSchema( JNIEnv* env, jobject, jlong address) { JNI_METHOD_START ArrowSchema* schema = reinterpret_cast(address); @@ -206,12 +206,12 @@ JNIEXPORT void JNICALL Java_org_apache_arrow_ffi_jni_JniWrapper_releaseSchema( } /* - * Class: org_apache_arrow_ffi_jni_JniWrapper + * Class: org_apache_arrow_c_jni_JniWrapper * Method: releaseArray * Signature: (J)V */ -JNIEXPORT void JNICALL Java_org_apache_arrow_ffi_jni_JniWrapper_releaseArray( - JNIEnv* env, jobject, jlong address) { +JNIEXPORT void JNICALL +Java_org_apache_arrow_c_jni_JniWrapper_releaseArray(JNIEnv* env, jobject, jlong address) { JNI_METHOD_START ArrowArray* array = reinterpret_cast(address); if (array->release != nullptr) { @@ -221,11 +221,11 @@ JNIEXPORT void JNICALL Java_org_apache_arrow_ffi_jni_JniWrapper_releaseArray( } /* - * Class: org_apache_arrow_ffi_jni_JniWrapper + * Class: org_apache_arrow_c_jni_JniWrapper * Method: exportSchema - * Signature: (JLorg/apache/arrow/ffi/jni/PrivateData;)V + * Signature: (JLorg/apache/arrow/c/jni/PrivateData;)V */ -JNIEXPORT void JNICALL Java_org_apache_arrow_ffi_jni_JniWrapper_exportSchema( +JNIEXPORT void JNICALL Java_org_apache_arrow_c_jni_JniWrapper_exportSchema( JNIEnv* env, jobject, jlong address, jobject private_data) { JNI_METHOD_START ArrowSchema* schema = reinterpret_cast(address); @@ -242,11 +242,11 @@ JNIEXPORT void JNICALL Java_org_apache_arrow_ffi_jni_JniWrapper_exportSchema( } /* - * Class: org_apache_arrow_ffi_jni_JniWrapper + * Class: org_apache_arrow_c_jni_JniWrapper * Method: exportArray - * Signature: (JLorg/apache/arrow/ffi/jni/PrivateData;)V + * Signature: (JLorg/apache/arrow/c/jni/PrivateData;)V */ -JNIEXPORT void JNICALL Java_org_apache_arrow_ffi_jni_JniWrapper_exportArray( +JNIEXPORT void JNICALL Java_org_apache_arrow_c_jni_JniWrapper_exportArray( JNIEnv* env, jobject, jlong address, jobject private_data) { JNI_METHOD_START ArrowArray* array = reinterpret_cast(address); diff --git a/java/ffi/src/main/java/org/apache/arrow/ffi/ArrayExporter.java b/java/c/src/main/java/org/apache/arrow/c/ArrayExporter.java similarity index 95% rename from java/ffi/src/main/java/org/apache/arrow/ffi/ArrayExporter.java rename to java/c/src/main/java/org/apache/arrow/c/ArrayExporter.java index 46e037443d0..d6479a3ba4c 100644 --- a/java/ffi/src/main/java/org/apache/arrow/ffi/ArrayExporter.java +++ b/java/c/src/main/java/org/apache/arrow/c/ArrayExporter.java @@ -15,17 +15,17 @@ * limitations under the License. */ -package org.apache.arrow.ffi; +package org.apache.arrow.c; -import static org.apache.arrow.ffi.NativeUtil.NULL; -import static org.apache.arrow.ffi.NativeUtil.addressOrNull; +import static org.apache.arrow.c.NativeUtil.NULL; +import static org.apache.arrow.c.NativeUtil.addressOrNull; import static org.apache.arrow.util.Preconditions.checkNotNull; import java.util.ArrayList; import java.util.List; -import org.apache.arrow.ffi.jni.JniWrapper; -import org.apache.arrow.ffi.jni.PrivateData; +import org.apache.arrow.c.jni.JniWrapper; +import org.apache.arrow.c.jni.PrivateData; import org.apache.arrow.memory.ArrowBuf; import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.vector.FieldVector; diff --git a/java/ffi/src/main/java/org/apache/arrow/ffi/ArrayImporter.java b/java/c/src/main/java/org/apache/arrow/c/ArrayImporter.java similarity index 96% rename from java/ffi/src/main/java/org/apache/arrow/ffi/ArrayImporter.java rename to java/c/src/main/java/org/apache/arrow/c/ArrayImporter.java index b671e60bc28..e82cef6a8a6 100644 --- a/java/ffi/src/main/java/org/apache/arrow/ffi/ArrayImporter.java +++ b/java/c/src/main/java/org/apache/arrow/c/ArrayImporter.java @@ -15,9 +15,9 @@ * limitations under the License. */ -package org.apache.arrow.ffi; +package org.apache.arrow.c; -import static org.apache.arrow.ffi.NativeUtil.NULL; +import static org.apache.arrow.c.NativeUtil.NULL; import static org.apache.arrow.memory.util.LargeMemoryUtil.checkedCastToInt; import static org.apache.arrow.util.Preconditions.checkNotNull; import static org.apache.arrow.util.Preconditions.checkState; @@ -44,7 +44,7 @@ final class ArrayImporter { private final FieldVector vector; private final DictionaryProvider dictionaryProvider; - private FFIReferenceManager referenceManager; + private CDataReferenceManager referenceManager; private int recursionLevel; ArrayImporter(BufferAllocator allocator, FieldVector vector, DictionaryProvider dictionaryProvider) { @@ -66,7 +66,7 @@ void importArray(ArrowArray src) { recursionLevel = 0; // This keeps the array alive as long as there are any buffers that need it - referenceManager = new FFIReferenceManager(ownedArray); + referenceManager = new CDataReferenceManager(ownedArray); try { referenceManager.increment(); doImport(snapshot); diff --git a/java/ffi/src/main/java/org/apache/arrow/ffi/ArrowArray.java b/java/c/src/main/java/org/apache/arrow/c/ArrowArray.java similarity index 96% rename from java/ffi/src/main/java/org/apache/arrow/ffi/ArrowArray.java rename to java/c/src/main/java/org/apache/arrow/c/ArrowArray.java index 87189827273..99fe0432c14 100644 --- a/java/ffi/src/main/java/org/apache/arrow/ffi/ArrowArray.java +++ b/java/c/src/main/java/org/apache/arrow/c/ArrowArray.java @@ -15,15 +15,15 @@ * limitations under the License. */ -package org.apache.arrow.ffi; +package org.apache.arrow.c; -import static org.apache.arrow.ffi.NativeUtil.NULL; +import static org.apache.arrow.c.NativeUtil.NULL; import static org.apache.arrow.util.Preconditions.checkNotNull; import java.nio.ByteBuffer; import java.nio.ByteOrder; -import org.apache.arrow.ffi.jni.JniWrapper; +import org.apache.arrow.c.jni.JniWrapper; import org.apache.arrow.memory.ArrowBuf; import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.ReferenceManager; @@ -75,8 +75,8 @@ public static class Snapshot { public long private_data; /** - * Initialize empty ArrowArray snapshot. - */ + * Initialize empty ArrowArray snapshot. + */ public Snapshot() { length = NULL; null_count = NULL; @@ -155,6 +155,7 @@ private ByteBuffer directBuffer() { /** * Take a snapshot of the ArrowArray raw values. + * * @return snapshot */ public Snapshot snapshot() { diff --git a/java/ffi/src/main/java/org/apache/arrow/ffi/ArrowSchema.java b/java/c/src/main/java/org/apache/arrow/c/ArrowSchema.java similarity index 94% rename from java/ffi/src/main/java/org/apache/arrow/ffi/ArrowSchema.java rename to java/c/src/main/java/org/apache/arrow/c/ArrowSchema.java index a9a7d4d506e..b34ce7d5a52 100644 --- a/java/ffi/src/main/java/org/apache/arrow/ffi/ArrowSchema.java +++ b/java/c/src/main/java/org/apache/arrow/c/ArrowSchema.java @@ -15,15 +15,15 @@ * limitations under the License. */ -package org.apache.arrow.ffi; +package org.apache.arrow.c; -import static org.apache.arrow.ffi.NativeUtil.NULL; +import static org.apache.arrow.c.NativeUtil.NULL; import static org.apache.arrow.util.Preconditions.checkNotNull; import java.nio.ByteBuffer; import java.nio.ByteOrder; -import org.apache.arrow.ffi.jni.JniWrapper; +import org.apache.arrow.c.jni.JniWrapper; import org.apache.arrow.memory.ArrowBuf; import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.ReferenceManager; @@ -72,8 +72,8 @@ public static class Snapshot { public long private_data; /** - * Initialize empty ArrowSchema snapshot. - */ + * Initialize empty ArrowSchema snapshot. + */ public Snapshot() { format = NULL; name = NULL; @@ -142,6 +142,7 @@ private ByteBuffer directBuffer() { /** * Take a snapshot of the ArrowSchema raw values. + * * @return snapshot */ public Snapshot snapshot() { @@ -164,7 +165,7 @@ public Snapshot snapshot() { */ public void save(Snapshot snapshot) { directBuffer().putLong(snapshot.format).putLong(snapshot.name).putLong(snapshot.metadata).putLong(snapshot.flags) - .putLong(snapshot.n_children).putLong(snapshot.children).putLong(snapshot.dictionary) - .putLong(snapshot.release).putLong(snapshot.private_data); + .putLong(snapshot.n_children).putLong(snapshot.children).putLong(snapshot.dictionary).putLong(snapshot.release) + .putLong(snapshot.private_data); } } diff --git a/java/ffi/src/main/java/org/apache/arrow/ffi/BaseStruct.java b/java/c/src/main/java/org/apache/arrow/c/BaseStruct.java similarity index 97% rename from java/ffi/src/main/java/org/apache/arrow/ffi/BaseStruct.java rename to java/c/src/main/java/org/apache/arrow/c/BaseStruct.java index c23e12f978c..d90fe817589 100644 --- a/java/ffi/src/main/java/org/apache/arrow/ffi/BaseStruct.java +++ b/java/c/src/main/java/org/apache/arrow/c/BaseStruct.java @@ -15,7 +15,7 @@ * limitations under the License. */ -package org.apache.arrow.ffi; +package org.apache.arrow.c; /** * Base interface for C Data Interface structures. diff --git a/java/ffi/src/main/java/org/apache/arrow/ffi/FFIDictionaryProvider.java b/java/c/src/main/java/org/apache/arrow/c/CDataDictionaryProvider.java similarity index 82% rename from java/ffi/src/main/java/org/apache/arrow/ffi/FFIDictionaryProvider.java rename to java/c/src/main/java/org/apache/arrow/c/CDataDictionaryProvider.java index c65fc90b3cf..43bcda276ef 100644 --- a/java/ffi/src/main/java/org/apache/arrow/ffi/FFIDictionaryProvider.java +++ b/java/c/src/main/java/org/apache/arrow/c/CDataDictionaryProvider.java @@ -15,7 +15,7 @@ * limitations under the License. */ -package org.apache.arrow.ffi; +package org.apache.arrow.c; import java.util.HashMap; import java.util.Map; @@ -25,23 +25,23 @@ import org.apache.arrow.vector.dictionary.DictionaryProvider; /** - * A DictionaryProvider that is used in FFI for imports. + * A DictionaryProvider that is used in C Data Interface for imports. *

- * FFIDictionaryProvider is similar to + * CDataDictionaryProvider is similar to * {@link DictionaryProvider.MapDictionaryProvider} with a key difference that * the dictionaries are owned by the provider so it must eventually be closed. *

- * The typical usage is to create the FFIDictionaryProvider and pass it to - * {@link FFI#importField} or {@link FFI#importSchema} to allocate empty + * The typical usage is to create the CDataDictionaryProvider and pass it to + * {@link Data#importField} or {@link Data#importSchema} to allocate empty * dictionaries based on the information in {@link ArrowSchema}. Then you can * re-use the same dictionary provider in any function that imports an * {@link ArrowArray} that has the same schema. */ -public class FFIDictionaryProvider implements DictionaryProvider, AutoCloseable { +public class CDataDictionaryProvider implements DictionaryProvider, AutoCloseable { private final Map map; - public FFIDictionaryProvider() { + public CDataDictionaryProvider() { this.map = new HashMap<>(); } diff --git a/java/ffi/src/main/java/org/apache/arrow/ffi/FFIReferenceManager.java b/java/c/src/main/java/org/apache/arrow/c/CDataReferenceManager.java similarity index 91% rename from java/ffi/src/main/java/org/apache/arrow/ffi/FFIReferenceManager.java rename to java/c/src/main/java/org/apache/arrow/c/CDataReferenceManager.java index 95b4e427703..c5c2f977900 100644 --- a/java/ffi/src/main/java/org/apache/arrow/ffi/FFIReferenceManager.java +++ b/java/c/src/main/java/org/apache/arrow/c/CDataReferenceManager.java @@ -15,7 +15,7 @@ * limitations under the License. */ -package org.apache.arrow.ffi; +package org.apache.arrow.c; import java.util.concurrent.atomic.AtomicInteger; @@ -26,17 +26,18 @@ import org.apache.arrow.util.Preconditions; /** - * A ReferenceManager implementation that holds a {@link org.apache.arrow.ffi.BaseStruct}. + * A ReferenceManager implementation that holds a + * {@link org.apache.arrow.c.BaseStruct}. *

- * A reference count is maintained and once it reaches zero the struct - * is released (as per the C data interface specification) and closed. + * A reference count is maintained and once it reaches zero the struct is + * released (as per the C data interface specification) and closed. */ -final class FFIReferenceManager implements ReferenceManager { +final class CDataReferenceManager implements ReferenceManager { private final AtomicInteger bufRefCnt = new AtomicInteger(0); private final BaseStruct struct; - FFIReferenceManager(BaseStruct struct) { + CDataReferenceManager(BaseStruct struct) { this.struct = struct; } @@ -56,7 +57,7 @@ public boolean release() { void increment() { bufRefCnt.incrementAndGet(); } - + @Override public boolean release(int decrement) { Preconditions.checkState(decrement >= 1, "ref count decrement should be greater than or equal to 1"); diff --git a/java/ffi/src/main/java/org/apache/arrow/ffi/FFI.java b/java/c/src/main/java/org/apache/arrow/c/Data.java similarity index 97% rename from java/ffi/src/main/java/org/apache/arrow/ffi/FFI.java rename to java/c/src/main/java/org/apache/arrow/c/Data.java index 1a16acd802d..872effdac9c 100644 --- a/java/ffi/src/main/java/org/apache/arrow/ffi/FFI.java +++ b/java/c/src/main/java/org/apache/arrow/c/Data.java @@ -15,7 +15,7 @@ * limitations under the License. */ -package org.apache.arrow.ffi; +package org.apache.arrow.c; import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.vector.FieldVector; @@ -39,9 +39,9 @@ * This API is EXPERIMENTAL. Note that currently only 64bit systems are * supported. */ -public final class FFI { +public final class Data { - private FFI() { + private Data() { } /** @@ -174,7 +174,7 @@ public static void exportVectorSchemaRoot(BufferAllocator allocator, VectorSchem * dictionary vectors (optional) * @return Imported field object */ - public static Field importField(BufferAllocator allocator, ArrowSchema schema, FFIDictionaryProvider provider) { + public static Field importField(BufferAllocator allocator, ArrowSchema schema, CDataDictionaryProvider provider) { try { SchemaImporter importer = new SchemaImporter(allocator); return importer.importField(schema, provider); @@ -196,7 +196,7 @@ public static Field importField(BufferAllocator allocator, ArrowSchema schema, F * dictionary vectors (optional) * @return Imported schema object */ - public static Schema importSchema(BufferAllocator allocator, ArrowSchema schema, FFIDictionaryProvider provider) { + public static Schema importSchema(BufferAllocator allocator, ArrowSchema schema, CDataDictionaryProvider provider) { Field structField = importField(allocator, schema, provider); if (structField.getType().getTypeID() != ArrowTypeID.Struct) { throw new IllegalArgumentException("Cannot import schema: ArrowSchema describes non-struct type"); @@ -235,7 +235,7 @@ public static void importIntoVector(BufferAllocator allocator, ArrowArray array, * @return Imported vector object */ public static FieldVector importVector(BufferAllocator allocator, ArrowArray array, ArrowSchema schema, - FFIDictionaryProvider provider) { + CDataDictionaryProvider provider) { Field field = importField(allocator, schema, provider); FieldVector vector = field.createVector(allocator); importIntoVector(allocator, array, vector, provider); @@ -283,7 +283,7 @@ public static void importIntoVectorSchemaRoot(BufferAllocator allocator, ArrowAr * @return Imported vector schema root */ public static VectorSchemaRoot importVectorSchemaRoot(BufferAllocator allocator, ArrowSchema schema, - FFIDictionaryProvider provider) { + CDataDictionaryProvider provider) { return importVectorSchemaRoot(allocator, schema, null, provider); } @@ -307,7 +307,7 @@ public static VectorSchemaRoot importVectorSchemaRoot(BufferAllocator allocator, * @return Imported vector schema root */ public static VectorSchemaRoot importVectorSchemaRoot(BufferAllocator allocator, ArrowSchema schema, ArrowArray array, - FFIDictionaryProvider provider) { + CDataDictionaryProvider provider) { VectorSchemaRoot vsr = VectorSchemaRoot.create(importSchema(allocator, schema, provider), allocator); if (array != null) { importIntoVectorSchemaRoot(allocator, array, vsr, provider); diff --git a/java/ffi/src/main/java/org/apache/arrow/ffi/Flags.java b/java/c/src/main/java/org/apache/arrow/c/Flags.java similarity index 98% rename from java/ffi/src/main/java/org/apache/arrow/ffi/Flags.java rename to java/c/src/main/java/org/apache/arrow/c/Flags.java index c41ce771bcc..744b4695ac3 100644 --- a/java/ffi/src/main/java/org/apache/arrow/ffi/Flags.java +++ b/java/c/src/main/java/org/apache/arrow/c/Flags.java @@ -15,7 +15,7 @@ * limitations under the License. */ -package org.apache.arrow.ffi; +package org.apache.arrow.c; import org.apache.arrow.vector.types.pojo.ArrowType; import org.apache.arrow.vector.types.pojo.ArrowType.ArrowTypeID; diff --git a/java/ffi/src/main/java/org/apache/arrow/ffi/Format.java b/java/c/src/main/java/org/apache/arrow/c/Format.java similarity index 99% rename from java/ffi/src/main/java/org/apache/arrow/ffi/Format.java rename to java/c/src/main/java/org/apache/arrow/c/Format.java index 087608b5f54..315d3caad7d 100644 --- a/java/ffi/src/main/java/org/apache/arrow/ffi/Format.java +++ b/java/c/src/main/java/org/apache/arrow/c/Format.java @@ -15,7 +15,7 @@ * limitations under the License. */ -package org.apache.arrow.ffi; +package org.apache.arrow.c; import java.util.Arrays; import java.util.stream.Collectors; diff --git a/java/ffi/src/main/java/org/apache/arrow/ffi/Metadata.java b/java/c/src/main/java/org/apache/arrow/c/Metadata.java similarity index 97% rename from java/ffi/src/main/java/org/apache/arrow/ffi/Metadata.java rename to java/c/src/main/java/org/apache/arrow/c/Metadata.java index e928336eb2f..b81b24fe4ce 100644 --- a/java/ffi/src/main/java/org/apache/arrow/ffi/Metadata.java +++ b/java/c/src/main/java/org/apache/arrow/c/Metadata.java @@ -15,9 +15,9 @@ * limitations under the License. */ -package org.apache.arrow.ffi; +package org.apache.arrow.c; -import static org.apache.arrow.ffi.NativeUtil.NULL; +import static org.apache.arrow.c.NativeUtil.NULL; import static org.apache.arrow.util.Preconditions.checkState; import java.nio.ByteBuffer; diff --git a/java/ffi/src/main/java/org/apache/arrow/ffi/NativeUtil.java b/java/c/src/main/java/org/apache/arrow/c/NativeUtil.java similarity index 99% rename from java/ffi/src/main/java/org/apache/arrow/ffi/NativeUtil.java rename to java/c/src/main/java/org/apache/arrow/c/NativeUtil.java index daa66829f67..e2feda1e5dc 100644 --- a/java/ffi/src/main/java/org/apache/arrow/ffi/NativeUtil.java +++ b/java/c/src/main/java/org/apache/arrow/c/NativeUtil.java @@ -15,7 +15,7 @@ * limitations under the License. */ -package org.apache.arrow.ffi; +package org.apache.arrow.c; import java.nio.ByteBuffer; import java.nio.ByteOrder; diff --git a/java/ffi/src/main/java/org/apache/arrow/ffi/SchemaExporter.java b/java/c/src/main/java/org/apache/arrow/c/SchemaExporter.java similarity index 95% rename from java/ffi/src/main/java/org/apache/arrow/ffi/SchemaExporter.java rename to java/c/src/main/java/org/apache/arrow/c/SchemaExporter.java index 8b11a121fbb..04d41a4e4f9 100644 --- a/java/ffi/src/main/java/org/apache/arrow/ffi/SchemaExporter.java +++ b/java/c/src/main/java/org/apache/arrow/c/SchemaExporter.java @@ -15,17 +15,17 @@ * limitations under the License. */ -package org.apache.arrow.ffi; +package org.apache.arrow.c; -import static org.apache.arrow.ffi.NativeUtil.NULL; -import static org.apache.arrow.ffi.NativeUtil.addressOrNull; +import static org.apache.arrow.c.NativeUtil.NULL; +import static org.apache.arrow.c.NativeUtil.addressOrNull; import static org.apache.arrow.util.Preconditions.checkNotNull; import java.util.ArrayList; import java.util.List; -import org.apache.arrow.ffi.jni.JniWrapper; -import org.apache.arrow.ffi.jni.PrivateData; +import org.apache.arrow.c.jni.JniWrapper; +import org.apache.arrow.c.jni.PrivateData; import org.apache.arrow.memory.ArrowBuf; import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.vector.dictionary.Dictionary; diff --git a/java/ffi/src/main/java/org/apache/arrow/ffi/SchemaImporter.java b/java/c/src/main/java/org/apache/arrow/c/SchemaImporter.java similarity index 94% rename from java/ffi/src/main/java/org/apache/arrow/ffi/SchemaImporter.java rename to java/c/src/main/java/org/apache/arrow/c/SchemaImporter.java index dc1326aab24..21d88f6cd4b 100644 --- a/java/ffi/src/main/java/org/apache/arrow/ffi/SchemaImporter.java +++ b/java/c/src/main/java/org/apache/arrow/c/SchemaImporter.java @@ -15,9 +15,9 @@ * limitations under the License. */ -package org.apache.arrow.ffi; +package org.apache.arrow.c; -import static org.apache.arrow.ffi.NativeUtil.NULL; +import static org.apache.arrow.c.NativeUtil.NULL; import static org.apache.arrow.memory.util.LargeMemoryUtil.checkedCastToInt; import static org.apache.arrow.util.Preconditions.checkNotNull; import static org.apache.arrow.util.Preconditions.checkState; @@ -52,12 +52,11 @@ public SchemaImporter(BufferAllocator allocator) { this.allocator = allocator; } - Field importField(ArrowSchema schema, FFIDictionaryProvider provider) { + Field importField(ArrowSchema schema, CDataDictionaryProvider provider) { return importField(schema, provider, 0); } - private Field importField(ArrowSchema schema, FFIDictionaryProvider provider, - int recursionLevel) { + private Field importField(ArrowSchema schema, CDataDictionaryProvider provider, int recursionLevel) { checkState(recursionLevel <= MAX_IMPORT_RECURSION_LEVEL, "Recursion level in ArrowSchema struct exceeded"); ArrowSchema.Snapshot snapshot = schema.snapshot(); diff --git a/java/ffi/src/main/java/org/apache/arrow/ffi/jni/JniLoader.java b/java/c/src/main/java/org/apache/arrow/c/jni/JniLoader.java similarity index 97% rename from java/ffi/src/main/java/org/apache/arrow/ffi/jni/JniLoader.java rename to java/c/src/main/java/org/apache/arrow/c/jni/JniLoader.java index 893b736cc5a..bd2008f054a 100644 --- a/java/ffi/src/main/java/org/apache/arrow/ffi/jni/JniLoader.java +++ b/java/c/src/main/java/org/apache/arrow/c/jni/JniLoader.java @@ -15,7 +15,7 @@ * limitations under the License. */ -package org.apache.arrow.ffi.jni; +package org.apache.arrow.c.jni; import java.io.File; import java.io.FileNotFoundException; @@ -33,7 +33,7 @@ * The JniLoader for C Data Interface API's native implementation. */ public class JniLoader { - private static final JniLoader INSTANCE = new JniLoader(Collections.singletonList("arrow_ffi_jni")); + private static final JniLoader INSTANCE = new JniLoader(Collections.singletonList("arrow_cdata_jni")); public static JniLoader get() { return INSTANCE; diff --git a/java/ffi/src/main/java/org/apache/arrow/ffi/jni/JniWrapper.java b/java/c/src/main/java/org/apache/arrow/c/jni/JniWrapper.java similarity index 97% rename from java/ffi/src/main/java/org/apache/arrow/ffi/jni/JniWrapper.java rename to java/c/src/main/java/org/apache/arrow/c/jni/JniWrapper.java index a32cc3b6a3f..8d3e4d4f5a9 100644 --- a/java/ffi/src/main/java/org/apache/arrow/ffi/jni/JniWrapper.java +++ b/java/c/src/main/java/org/apache/arrow/c/jni/JniWrapper.java @@ -15,7 +15,7 @@ * limitations under the License. */ -package org.apache.arrow.ffi.jni; +package org.apache.arrow.c.jni; /** * JniWrapper for C Data Interface API implementation. diff --git a/java/ffi/src/main/java/org/apache/arrow/ffi/jni/PrivateData.java b/java/c/src/main/java/org/apache/arrow/c/jni/PrivateData.java similarity index 96% rename from java/ffi/src/main/java/org/apache/arrow/ffi/jni/PrivateData.java rename to java/c/src/main/java/org/apache/arrow/c/jni/PrivateData.java index 7a9a68dba94..e6336cc6477 100644 --- a/java/ffi/src/main/java/org/apache/arrow/ffi/jni/PrivateData.java +++ b/java/c/src/main/java/org/apache/arrow/c/jni/PrivateData.java @@ -15,7 +15,7 @@ * limitations under the License. */ -package org.apache.arrow.ffi.jni; +package org.apache.arrow.c.jni; import java.io.Closeable; diff --git a/java/ffi/src/main/java/org/apache/arrow/vector/StructVectorLoader.java b/java/c/src/main/java/org/apache/arrow/vector/StructVectorLoader.java similarity index 99% rename from java/ffi/src/main/java/org/apache/arrow/vector/StructVectorLoader.java rename to java/c/src/main/java/org/apache/arrow/vector/StructVectorLoader.java index 114ee54bcd5..eab7e491f31 100644 --- a/java/ffi/src/main/java/org/apache/arrow/vector/StructVectorLoader.java +++ b/java/c/src/main/java/org/apache/arrow/vector/StructVectorLoader.java @@ -71,7 +71,7 @@ public StructVectorLoader(Schema schema, CompressionCodec.Factory factory) { } /** - * Loads the record batch into the struct vector. + * Loads the record batch into the struct vector. * *

* This will not close the record batch. diff --git a/java/ffi/src/main/java/org/apache/arrow/vector/StructVectorUnloader.java b/java/c/src/main/java/org/apache/arrow/vector/StructVectorUnloader.java similarity index 70% rename from java/ffi/src/main/java/org/apache/arrow/vector/StructVectorUnloader.java rename to java/c/src/main/java/org/apache/arrow/vector/StructVectorUnloader.java index 8196359e86c..e75156cf237 100644 --- a/java/ffi/src/main/java/org/apache/arrow/vector/StructVectorUnloader.java +++ b/java/c/src/main/java/org/apache/arrow/vector/StructVectorUnloader.java @@ -29,8 +29,8 @@ import org.apache.arrow.vector.ipc.message.ArrowRecordBatch; /** - * Helper class that handles converting a {@link StructVector} - * to a {@link ArrowRecordBatch}. + * Helper class that handles converting a {@link StructVector} to a + * {@link ArrowRecordBatch}. */ public class StructVectorUnloader { @@ -49,25 +49,29 @@ public StructVectorUnloader(StructVector root) { /** * Constructs a new instance. * - * @param root The struct vector to serialize to an {@link ArrowRecordBatch}. - * @param includeNullCount Controls whether null count is copied to the {@link ArrowRecordBatch} - * @param alignBuffers Controls if buffers get aligned to 8-byte boundaries. + * @param root The struct vector to serialize to an + * {@link ArrowRecordBatch}. + * @param includeNullCount Controls whether null count is copied to the + * {@link ArrowRecordBatch} + * @param alignBuffers Controls if buffers get aligned to 8-byte boundaries. */ - public StructVectorUnloader( - StructVector root, boolean includeNullCount, boolean alignBuffers) { + public StructVectorUnloader(StructVector root, boolean includeNullCount, boolean alignBuffers) { this(root, includeNullCount, NoCompressionCodec.INSTANCE, alignBuffers); } /** * Constructs a new instance. * - * @param root The struct vector to serialize to an {@link ArrowRecordBatch}. - * @param includeNullCount Controls whether null count is copied to the {@link ArrowRecordBatch} - * @param codec the codec for compressing data. If it is null, then no compression is needed. - * @param alignBuffers Controls if buffers get aligned to 8-byte boundaries. + * @param root The struct vector to serialize to an + * {@link ArrowRecordBatch}. + * @param includeNullCount Controls whether null count is copied to the + * {@link ArrowRecordBatch} + * @param codec the codec for compressing data. If it is null, then + * no compression is needed. + * @param alignBuffers Controls if buffers get aligned to 8-byte boundaries. */ - public StructVectorUnloader( - StructVector root, boolean includeNullCount, CompressionCodec codec, boolean alignBuffers) { + public StructVectorUnloader(StructVector root, boolean includeNullCount, CompressionCodec codec, + boolean alignBuffers) { this.root = root; this.includeNullCount = includeNullCount; this.codec = codec; @@ -75,8 +79,8 @@ public StructVectorUnloader( } /** - * Performs the depth first traversal of the Vectors to create an {@link ArrowRecordBatch} suitable - * for serialization. + * Performs the depth first traversal of the Vectors to create an + * {@link ArrowRecordBatch} suitable for serialization. */ public ArrowRecordBatch getRecordBatch() { List nodes = new ArrayList<>(); @@ -84,8 +88,8 @@ public ArrowRecordBatch getRecordBatch() { for (FieldVector vector : root.getChildrenFromFields()) { appendNodes(vector, nodes, buffers); } - return new ArrowRecordBatch( - root.getValueCount(), nodes, buffers, CompressionUtil.createBodyCompression(codec), alignBuffers); + return new ArrowRecordBatch(root.getValueCount(), nodes, buffers, CompressionUtil.createBodyCompression(codec), + alignBuffers); } private void appendNodes(FieldVector vector, List nodes, List buffers) { @@ -93,8 +97,7 @@ private void appendNodes(FieldVector vector, List nodes, List fieldBuffers = vector.getFieldBuffers(); int expectedBufferCount = TypeLayout.getTypeBufferCount(vector.getField().getType()); if (fieldBuffers.size() != expectedBufferCount) { - throw new IllegalArgumentException(String.format( - "wrong number of buffers for field %s in vector %s. found: %s", + throw new IllegalArgumentException(String.format("wrong number of buffers for field %s in vector %s. found: %s", vector.getField(), vector.getClass().getSimpleName(), fieldBuffers)); } for (ArrowBuf buf : fieldBuffers) { diff --git a/java/ffi/src/test/java/org/apache/arrow/ffi/DictionaryTest.java b/java/c/src/test/java/org/apache/arrow/c/DictionaryTest.java similarity index 85% rename from java/ffi/src/test/java/org/apache/arrow/ffi/DictionaryTest.java rename to java/c/src/test/java/org/apache/arrow/c/DictionaryTest.java index 8a3e7dd05b7..3f793f836d6 100644 --- a/java/ffi/src/test/java/org/apache/arrow/ffi/DictionaryTest.java +++ b/java/c/src/test/java/org/apache/arrow/c/DictionaryTest.java @@ -15,7 +15,7 @@ * limitations under the License. */ -package org.apache.arrow.ffi; +package org.apache.arrow.c; import static org.junit.jupiter.api.Assertions.assertTrue; @@ -25,6 +25,10 @@ import java.nio.channels.Channels; import java.util.Collections; +import org.apache.arrow.c.ArrowArray; +import org.apache.arrow.c.ArrowSchema; +import org.apache.arrow.c.CDataDictionaryProvider; +import org.apache.arrow.c.Data; import org.apache.arrow.memory.RootAllocator; import org.apache.arrow.util.AutoCloseables; import org.apache.arrow.vector.FieldVector; @@ -65,19 +69,19 @@ void roundtrip(FieldVector vector, DictionaryProvider provider, Class clazz) // Producer creates structures from existing memory pointers try (ArrowSchema arrowSchema = ArrowSchema.wrap(consumerArrowSchema.memoryAddress()); ArrowArray arrowArray = ArrowArray.wrap(consumerArrowArray.memoryAddress())) { - // Producer exports vector into the FFI structures - FFI.exportVector(allocator, vector, provider, arrowArray, arrowSchema); + // Producer exports vector into the C Data Interface structures + Data.exportVector(allocator, vector, provider, arrowArray, arrowSchema); } // Consumer imports vector - try (FFIDictionaryProvider ffiDictionaryProvider = new FFIDictionaryProvider(); - FieldVector imported = FFI.importVector(allocator, consumerArrowArray, consumerArrowSchema, - ffiDictionaryProvider);) { + try (CDataDictionaryProvider cDictionaryProvider = new CDataDictionaryProvider(); + FieldVector imported = Data.importVector(allocator, consumerArrowArray, consumerArrowSchema, + cDictionaryProvider);) { assertTrue(clazz.isInstance(imported), String.format("expected %s but was %s", clazz, imported.getClass())); assertTrue(VectorEqualsVisitor.vectorEquals(vector, imported), "vectors are not equivalent"); - for (long id : ffiDictionaryProvider.getDictionaryIds()) { + for (long id : cDictionaryProvider.getDictionaryIds()) { ValueVector exportedDictionaryVector = provider.lookup(id).getVector(); - ValueVector importedDictionaryVector = ffiDictionaryProvider.lookup(id).getVector(); + ValueVector importedDictionaryVector = cDictionaryProvider.lookup(id).getVector(); assertTrue(VectorEqualsVisitor.vectorEquals(exportedDictionaryVector, importedDictionaryVector), String.format("Dictionary vectors for ID %d are not equivalent", id)); } @@ -111,7 +115,7 @@ public void testWithDictionary() throws Exception { // get the encoded vector IntVector encodedVector = (IntVector) DictionaryEncoder.encode(vector, dictionary); - // FFI roundtrip + // Perform roundtrip using C Data Interface roundtrip(encodedVector, provider, IntVector.class); // Close all @@ -125,17 +129,17 @@ public void testRoundtripMultipleBatches() throws IOException { // Load first batch reader.loadNextBatch(); // Producer fills consumer schema stucture - FFI.exportSchema(allocator, reader.getVectorSchemaRoot().getSchema(), reader, consumerArrowSchema); + Data.exportSchema(allocator, reader.getVectorSchemaRoot().getSchema(), reader, consumerArrowSchema); // Consumer loads it as an empty vector schema root - try (FFIDictionaryProvider consumerDictionaryProvider = new FFIDictionaryProvider(); - VectorSchemaRoot consumerRoot = FFI.importVectorSchemaRoot(allocator, consumerArrowSchema, + try (CDataDictionaryProvider consumerDictionaryProvider = new CDataDictionaryProvider(); + VectorSchemaRoot consumerRoot = Data.importVectorSchemaRoot(allocator, consumerArrowSchema, consumerDictionaryProvider)) { do { try (ArrowArray consumerArray = ArrowArray.allocateNew(allocator)) { // Producer exports next data - FFI.exportVectorSchemaRoot(allocator, reader.getVectorSchemaRoot(), reader, consumerArray); + Data.exportVectorSchemaRoot(allocator, reader.getVectorSchemaRoot(), reader, consumerArray); // Consumer loads next data - FFI.importIntoVectorSchemaRoot(allocator, consumerArray, consumerRoot, consumerDictionaryProvider); + Data.importIntoVectorSchemaRoot(allocator, consumerArray, consumerRoot, consumerDictionaryProvider); // Roundtrip validation assertTrue(consumerRoot.equals(reader.getVectorSchemaRoot()), "vector schema roots are not equivalent"); diff --git a/java/ffi/src/test/java/org/apache/arrow/ffi/FlagsTest.java b/java/c/src/test/java/org/apache/arrow/c/FlagsTest.java similarity index 98% rename from java/ffi/src/test/java/org/apache/arrow/ffi/FlagsTest.java rename to java/c/src/test/java/org/apache/arrow/c/FlagsTest.java index 8dad4ee7032..35f836f71f5 100644 --- a/java/ffi/src/test/java/org/apache/arrow/ffi/FlagsTest.java +++ b/java/c/src/test/java/org/apache/arrow/c/FlagsTest.java @@ -15,12 +15,13 @@ * limitations under the License. */ -package org.apache.arrow.ffi; +package org.apache.arrow.c; import static org.junit.jupiter.api.Assertions.assertEquals; import java.util.ArrayList; +import org.apache.arrow.c.Flags; import org.apache.arrow.vector.types.pojo.ArrowType; import org.apache.arrow.vector.types.pojo.DictionaryEncoding; import org.apache.arrow.vector.types.pojo.Field; diff --git a/java/ffi/src/test/java/org/apache/arrow/ffi/FormatTest.java b/java/c/src/test/java/org/apache/arrow/c/FormatTest.java similarity index 91% rename from java/ffi/src/test/java/org/apache/arrow/ffi/FormatTest.java rename to java/c/src/test/java/org/apache/arrow/c/FormatTest.java index 40b5eff45ea..1f7f86b36f6 100644 --- a/java/ffi/src/test/java/org/apache/arrow/ffi/FormatTest.java +++ b/java/c/src/test/java/org/apache/arrow/c/FormatTest.java @@ -15,12 +15,14 @@ * limitations under the License. */ -package org.apache.arrow.ffi; +package org.apache.arrow.c; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertThrows; import static org.junit.jupiter.api.Assertions.assertTrue; +import org.apache.arrow.c.Flags; +import org.apache.arrow.c.Format; import org.apache.arrow.vector.types.DateUnit; import org.apache.arrow.vector.types.FloatingPointPrecision; import org.apache.arrow.vector.types.IntervalUnit; @@ -73,18 +75,14 @@ public void testAsString() { assertEquals("tsm:Timezone", Format.asString(new ArrowType.Timestamp(TimeUnit.MILLISECOND, "Timezone"))); assertEquals("tsu:Timezone", Format.asString(new ArrowType.Timestamp(TimeUnit.MICROSECOND, "Timezone"))); assertEquals("tsn:Timezone", Format.asString(new ArrowType.Timestamp(TimeUnit.NANOSECOND, "Timezone"))); - assertEquals("+us:1,1,1", - Format.asString(new ArrowType.Union(UnionMode.Sparse, new int[]{1, 1, 1}))); - assertEquals("+ud:1,1,1", - Format.asString(new ArrowType.Union(UnionMode.Dense, new int[]{1, 1, 1}))); + assertEquals("+us:1,1,1", Format.asString(new ArrowType.Union(UnionMode.Sparse, new int[] { 1, 1, 1 }))); + assertEquals("+ud:1,1,1", Format.asString(new ArrowType.Union(UnionMode.Dense, new int[] { 1, 1, 1 }))); assertEquals("u", Format.asString(new ArrowType.Utf8())); - assertThrows(UnsupportedOperationException.class, () -> - Format.asString(new ArrowType.Int(1, true))); - assertThrows(UnsupportedOperationException.class, () -> - Format.asString(new ArrowType.Time(TimeUnit.SECOND, 1))); - assertThrows(UnsupportedOperationException.class, () -> - Format.asString(new ArrowType.Time(TimeUnit.MILLISECOND, 64))); + assertThrows(UnsupportedOperationException.class, () -> Format.asString(new ArrowType.Int(1, true))); + assertThrows(UnsupportedOperationException.class, () -> Format.asString(new ArrowType.Time(TimeUnit.SECOND, 1))); + assertThrows(UnsupportedOperationException.class, + () -> Format.asString(new ArrowType.Time(TimeUnit.MILLISECOND, 64))); } @Test @@ -128,8 +126,8 @@ public void testAsType() throws IllegalStateException, NumberFormatException, Un assertEquals(new ArrowType.Decimal(9, 1, 1), Format.asType("d:9,1,1", 0L)); assertEquals(new ArrowType.FixedSizeBinary(1), Format.asType("w:1", 0L)); assertEquals(new ArrowType.FixedSizeList(3), Format.asType("+w:3", 0L)); - assertEquals(new ArrowType.Union(UnionMode.Dense, new int[]{1, 1, 1}), Format.asType("+ud:1,1,1", 0L)); - assertEquals(new ArrowType.Union(UnionMode.Sparse, new int[]{1, 1, 1}), Format.asType("+us:1,1,1", 0L)); + assertEquals(new ArrowType.Union(UnionMode.Dense, new int[] { 1, 1, 1 }), Format.asType("+ud:1,1,1", 0L)); + assertEquals(new ArrowType.Union(UnionMode.Sparse, new int[] { 1, 1, 1 }), Format.asType("+us:1,1,1", 0L)); assertEquals(new ArrowType.Timestamp(TimeUnit.SECOND, "Timezone"), Format.asType("tss:Timezone", 0L)); assertEquals(new ArrowType.Timestamp(TimeUnit.MILLISECOND, "Timezone"), Format.asType("tsm:Timezone", 0L)); assertEquals(new ArrowType.Timestamp(TimeUnit.MICROSECOND, "Timezone"), Format.asType("tsu:Timezone", 0L)); @@ -140,4 +138,3 @@ public void testAsType() throws IllegalStateException, NumberFormatException, Un assertThrows(NumberFormatException.class, () -> Format.asType("w:1,2,3", 0L)); } } - diff --git a/java/ffi/src/test/java/org/apache/arrow/ffi/MetadataTest.java b/java/c/src/test/java/org/apache/arrow/c/MetadataTest.java similarity index 96% rename from java/ffi/src/test/java/org/apache/arrow/ffi/MetadataTest.java rename to java/c/src/test/java/org/apache/arrow/c/MetadataTest.java index 019f2d63a63..1d9703b1a87 100644 --- a/java/ffi/src/test/java/org/apache/arrow/ffi/MetadataTest.java +++ b/java/c/src/test/java/org/apache/arrow/c/MetadataTest.java @@ -15,7 +15,7 @@ * limitations under the License. */ -package org.apache.arrow.ffi; +package org.apache.arrow.c; import static org.junit.jupiter.api.Assertions.*; @@ -24,6 +24,8 @@ import java.util.HashMap; import java.util.Map; +import org.apache.arrow.c.Metadata; +import org.apache.arrow.c.NativeUtil; import org.apache.arrow.memory.ArrowBuf; import org.apache.arrow.memory.RootAllocator; import org.apache.arrow.memory.util.LargeMemoryUtil; diff --git a/java/ffi/src/test/java/org/apache/arrow/ffi/NativeUtilTest.java b/java/c/src/test/java/org/apache/arrow/c/NativeUtilTest.java similarity index 93% rename from java/ffi/src/test/java/org/apache/arrow/ffi/NativeUtilTest.java rename to java/c/src/test/java/org/apache/arrow/c/NativeUtilTest.java index 931e4fce269..f46a0128c86 100644 --- a/java/ffi/src/test/java/org/apache/arrow/ffi/NativeUtilTest.java +++ b/java/c/src/test/java/org/apache/arrow/c/NativeUtilTest.java @@ -15,7 +15,7 @@ * limitations under the License. */ -package org.apache.arrow.ffi; +package org.apache.arrow.c; import static org.junit.jupiter.api.Assertions.assertArrayEquals; import static org.junit.jupiter.api.Assertions.assertEquals; @@ -23,6 +23,7 @@ import java.nio.ByteBuffer; import java.nio.ByteOrder; +import org.apache.arrow.c.NativeUtil; import org.apache.arrow.memory.ArrowBuf; import org.apache.arrow.memory.RootAllocator; import org.apache.arrow.memory.util.LargeMemoryUtil; @@ -62,13 +63,13 @@ public void testString() { @Test public void testToJavaArray() { - long[] nativeArray = new long[] {1, 2, 3}; + long[] nativeArray = new long[] { 1, 2, 3 }; try (ArrowBuf buffer = allocator.buffer(Long.BYTES * nativeArray.length, null)) { for (long value : nativeArray) { buffer.writeLong(value); - } + } long[] actual = NativeUtil.toJavaArray(buffer.memoryAddress(), nativeArray.length); - assertArrayEquals(nativeArray, actual); + assertArrayEquals(nativeArray, actual); } } @@ -77,5 +78,5 @@ public void testToZeroJavaArray() { long[] actual = NativeUtil.toJavaArray(0xDEADBEEF, 0); assertEquals(0, actual.length); } - + } diff --git a/java/ffi/src/test/java/org/apache/arrow/ffi/RoundtripTest.java b/java/c/src/test/java/org/apache/arrow/c/RoundtripTest.java similarity index 95% rename from java/ffi/src/test/java/org/apache/arrow/ffi/RoundtripTest.java rename to java/c/src/test/java/org/apache/arrow/c/RoundtripTest.java index 03a52e047f0..dc2c0b1e1f5 100644 --- a/java/ffi/src/test/java/org/apache/arrow/ffi/RoundtripTest.java +++ b/java/c/src/test/java/org/apache/arrow/c/RoundtripTest.java @@ -15,7 +15,7 @@ * limitations under the License. */ -package org.apache.arrow.ffi; +package org.apache.arrow.c; import static org.apache.arrow.vector.testing.ValueVectorDataPopulator.setVector; import static org.junit.jupiter.api.Assertions.assertEquals; @@ -33,6 +33,9 @@ import java.util.UUID; import java.util.stream.Collectors; +import org.apache.arrow.c.ArrowArray; +import org.apache.arrow.c.ArrowSchema; +import org.apache.arrow.c.Data; import org.apache.arrow.memory.ArrowBuf; import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.RootAllocator; @@ -122,12 +125,12 @@ FieldVector vectorRoundtrip(FieldVector vector) { // Producer creates structures from existing memory pointers try (ArrowSchema arrowSchema = ArrowSchema.wrap(consumerArrowSchema.memoryAddress()); ArrowArray arrowArray = ArrowArray.wrap(consumerArrowArray.memoryAddress())) { - // Producer exports vector into the FFI structures - FFI.exportVector(allocator, vector, null, arrowArray, arrowSchema); + // Producer exports vector into the C Data Interface structures + Data.exportVector(allocator, vector, null, arrowArray, arrowSchema); } // Consumer imports vector - return FFI.importVector(allocator, consumerArrowArray, consumerArrowSchema, null); + return Data.importVector(allocator, consumerArrowArray, consumerArrowSchema, null); } } @@ -139,12 +142,12 @@ VectorSchemaRoot vectorSchemaRootRoundtrip(VectorSchemaRoot root) { // Producer creates structures from existing memory pointers try (ArrowSchema arrowSchema = ArrowSchema.wrap(consumerArrowSchema.memoryAddress()); ArrowArray arrowArray = ArrowArray.wrap(consumerArrowArray.memoryAddress())) { - // Producer exports vector into the FFI structures - FFI.exportVectorSchemaRoot(allocator, root, null, arrowArray, arrowSchema); + // Producer exports vector into the C Data Interface structures + Data.exportVectorSchemaRoot(allocator, root, null, arrowArray, arrowSchema); } // Consumer imports vector - return FFI.importVectorSchemaRoot(allocator, consumerArrowSchema, consumerArrowArray, null); + return Data.importVectorSchemaRoot(allocator, consumerArrowSchema, consumerArrowArray, null); } } @@ -612,10 +615,8 @@ public void testExtensionTypeVector() { final Field field = importedRoot.getSchema().getFields().get(0); final UuidType expectedType = new UuidType(); - assertEquals(field.getMetadata().get(ExtensionType.EXTENSION_METADATA_KEY_NAME), - expectedType.extensionName()); - assertEquals(field.getMetadata().get(ExtensionType.EXTENSION_METADATA_KEY_METADATA), - expectedType.serialize()); + assertEquals(field.getMetadata().get(ExtensionType.EXTENSION_METADATA_KEY_NAME), expectedType.extensionName()); + assertEquals(field.getMetadata().get(ExtensionType.EXTENSION_METADATA_KEY_METADATA), expectedType.serialize()); final UuidVector deserialized = (UuidVector) importedRoot.getFieldVectors().get(0); assertEquals(vector.getValueCount(), deserialized.getValueCount()); @@ -641,16 +642,16 @@ public void testVectorSchemaRoot() { // Producer creates structures from existing memory pointers try (ArrowSchema arrowSchema = ArrowSchema.wrap(consumerArrowSchema.memoryAddress()); ArrowArray arrowArray = ArrowArray.wrap(consumerArrowArray.memoryAddress())) { - // Producer exports vector into the FFI structures - FFI.exportVectorSchemaRoot(allocator, vsr, null, arrowArray, arrowSchema); + // Producer exports vector into the C Data Interface structures + Data.exportVectorSchemaRoot(allocator, vsr, null, arrowArray, arrowSchema); } } // Consumer imports vector - imported = FFI.importVectorSchemaRoot(allocator, consumerArrowSchema, consumerArrowArray, null); + imported = Data.importVectorSchemaRoot(allocator, consumerArrowSchema, consumerArrowArray, null); } - // Ensure that imported VectorSchemaRoot is valid even after FFI structures - // closed + // Ensure that imported VectorSchemaRoot is valid even after C Data Interface + // structures are closed try (VectorSchemaRoot original = createTestVSR()) { assertTrue(imported.equals(original)); } @@ -669,10 +670,10 @@ public void testSchema() { try (ArrowSchema consumerArrowSchema = ArrowSchema.allocateNew(allocator)) { // Producer fills the schema with data try (ArrowSchema arrowSchema = ArrowSchema.wrap(consumerArrowSchema.memoryAddress())) { - FFI.exportSchema(allocator, schema, null, arrowSchema); + Data.exportSchema(allocator, schema, null, arrowSchema); } // Consumer imports schema - Schema importedSchema = FFI.importSchema(allocator, consumerArrowSchema, null); + Schema importedSchema = Data.importSchema(allocator, consumerArrowSchema, null); assertEquals(schema.toJson(), importedSchema.toJson()); } } @@ -685,9 +686,9 @@ public void testImportReleasedArray() { // Producer creates structures from existing memory pointers try (ArrowSchema arrowSchema = ArrowSchema.wrap(consumerArrowSchema.memoryAddress()); ArrowArray arrowArray = ArrowArray.wrap(consumerArrowArray.memoryAddress())) { - // Producer exports vector into the FFI structures + // Producer exports vector into the C Data Interface structures try (final NullVector vector = new NullVector()) { - FFI.exportVector(allocator, vector, null, arrowArray, arrowSchema); + Data.exportVector(allocator, vector, null, arrowArray, arrowSchema); } } @@ -696,7 +697,7 @@ public void testImportReleasedArray() { // Consumer tried to imports vector but fails Exception e = assertThrows(IllegalStateException.class, () -> { - FFI.importVector(allocator, consumerArrowArray, consumerArrowSchema, null); + Data.importVector(allocator, consumerArrowArray, consumerArrowSchema, null); }); assertEquals("Cannot import released ArrowArray", e.getMessage()); diff --git a/java/pom.xml b/java/pom.xml index ddc3822f549..5dd2dfc9402 100644 --- a/java/pom.xml +++ b/java/pom.xml @@ -697,10 +697,10 @@ - - arrow-ffi + + arrow-c-data - ffi + c From 334564156912f9f630d4a6c0dffd729b5c278909 Mon Sep 17 00:00:00 2001 From: roee88 Date: Tue, 12 Oct 2021 12:05:35 +0300 Subject: [PATCH 18/21] Removed redundant lines in CMakeLists.txt Signed-off-by: roee88 --- java/c/CMakeLists.txt | 2 -- 1 file changed, 2 deletions(-) diff --git a/java/c/CMakeLists.txt b/java/c/CMakeLists.txt index 1c6b048be12..1025f87afbc 100644 --- a/java/c/CMakeLists.txt +++ b/java/c/CMakeLists.txt @@ -24,9 +24,7 @@ message(STATUS "Building using CMake version: ${CMAKE_VERSION}") project(arrow_cdata_java) # Find java/jni -include(FindJava) include(UseJava) -include(FindJNI) find_package(Java REQUIRED) find_package(JNI REQUIRED) From 9412dab135bc33ecf790d36fcece231c4400e8b1 Mon Sep 17 00:00:00 2001 From: roee88 Date: Tue, 12 Oct 2021 12:21:42 +0300 Subject: [PATCH 19/21] Add best effort to error on 32-bit systems Signed-off-by: roee88 --- java/c/src/main/java/org/apache/arrow/c/jni/JniWrapper.java | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/java/c/src/main/java/org/apache/arrow/c/jni/JniWrapper.java b/java/c/src/main/java/org/apache/arrow/c/jni/JniWrapper.java index 8d3e4d4f5a9..04a143a7a99 100644 --- a/java/c/src/main/java/org/apache/arrow/c/jni/JniWrapper.java +++ b/java/c/src/main/java/org/apache/arrow/c/jni/JniWrapper.java @@ -28,6 +28,12 @@ public static JniWrapper get() { } private JniWrapper() { + // A best effort to error on 32-bit systems + String dataModel = System.getProperty("sun.arch.data.model"); + if (dataModel != null && dataModel.equals("32")) { + throw new UnsupportedOperationException( + "The Java C Data Interface implementation is currently only supported on 64-bit systems"); + } JniLoader.get().ensureLoaded(); } From 4fbb16fb97974f39a65139ca264b2300f6b45c22 Mon Sep 17 00:00:00 2001 From: roee88 Date: Tue, 12 Oct 2021 12:37:40 +0300 Subject: [PATCH 20/21] code style: missing end of line Signed-off-by: roee88 --- java/c/pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/java/c/pom.xml b/java/c/pom.xml index c6edbfb296f..55d07302ee6 100644 --- a/java/c/pom.xml +++ b/java/c/pom.xml @@ -74,4 +74,4 @@ - \ No newline at end of file + From bff64109e4f2cf48a01808c6fbd4cd3750276ff1 Mon Sep 17 00:00:00 2001 From: Doron Chen Date: Wed, 13 Oct 2021 09:02:51 +0300 Subject: [PATCH 21/21] swap order of ArrowArray and ArrowSchema parameters (#16) Signed-off-by: Doron Chen Co-authored-by: CDORON@il.ibm.com --- java/c/src/main/java/org/apache/arrow/c/Data.java | 12 ++++++------ .../test/java/org/apache/arrow/c/RoundtripTest.java | 4 ++-- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/java/c/src/main/java/org/apache/arrow/c/Data.java b/java/c/src/main/java/org/apache/arrow/c/Data.java index 872effdac9c..27b0ce4bfcc 100644 --- a/java/c/src/main/java/org/apache/arrow/c/Data.java +++ b/java/c/src/main/java/org/apache/arrow/c/Data.java @@ -101,7 +101,7 @@ public static void exportVector(BufferAllocator allocator, FieldVector vector, D * @param provider Dictionary provider for dictionary encoded vectors * (optional) * @param out C struct where to export the array - * @param outSchema Optional C struct where to export the array type + * @param outSchema C struct where to export the array type (optional) */ public static void exportVector(BufferAllocator allocator, FieldVector vector, DictionaryProvider provider, ArrowArray out, ArrowSchema outSchema) { @@ -145,7 +145,7 @@ public static void exportVectorSchemaRoot(BufferAllocator allocator, VectorSchem * @param provider Dictionary provider for dictionary encoded vectors * (optional) * @param out C struct where to export the record batch - * @param outSchema Optional C struct where to export the record batch schema + * @param outSchema C struct where to export the record batch schema (optional) */ public static void exportVectorSchemaRoot(BufferAllocator allocator, VectorSchemaRoot vsr, DictionaryProvider provider, ArrowArray out, ArrowSchema outSchema) { @@ -284,7 +284,7 @@ public static void importIntoVectorSchemaRoot(BufferAllocator allocator, ArrowAr */ public static VectorSchemaRoot importVectorSchemaRoot(BufferAllocator allocator, ArrowSchema schema, CDataDictionaryProvider provider) { - return importVectorSchemaRoot(allocator, schema, null, provider); + return importVectorSchemaRoot(allocator, null, schema, provider); } /** @@ -300,13 +300,13 @@ public static VectorSchemaRoot importVectorSchemaRoot(BufferAllocator allocator, * reusing the same vector schema root. * * @param allocator Buffer allocator for allocating the output VectorSchemaRoot + * @param array C data interface struct holding the record batch data + * (optional) * @param schema C data interface struct holding the record batch schema - * @param array Optional C data interface struct holding the record batch - * data * @param provider Dictionary provider to load dictionary vectors to (optional) * @return Imported vector schema root */ - public static VectorSchemaRoot importVectorSchemaRoot(BufferAllocator allocator, ArrowSchema schema, ArrowArray array, + public static VectorSchemaRoot importVectorSchemaRoot(BufferAllocator allocator, ArrowArray array, ArrowSchema schema, CDataDictionaryProvider provider) { VectorSchemaRoot vsr = VectorSchemaRoot.create(importSchema(allocator, schema, provider), allocator); if (array != null) { diff --git a/java/c/src/test/java/org/apache/arrow/c/RoundtripTest.java b/java/c/src/test/java/org/apache/arrow/c/RoundtripTest.java index dc2c0b1e1f5..059ca328453 100644 --- a/java/c/src/test/java/org/apache/arrow/c/RoundtripTest.java +++ b/java/c/src/test/java/org/apache/arrow/c/RoundtripTest.java @@ -147,7 +147,7 @@ VectorSchemaRoot vectorSchemaRootRoundtrip(VectorSchemaRoot root) { } // Consumer imports vector - return Data.importVectorSchemaRoot(allocator, consumerArrowSchema, consumerArrowArray, null); + return Data.importVectorSchemaRoot(allocator, consumerArrowArray, consumerArrowSchema, null); } } @@ -647,7 +647,7 @@ public void testVectorSchemaRoot() { } } // Consumer imports vector - imported = Data.importVectorSchemaRoot(allocator, consumerArrowSchema, consumerArrowArray, null); + imported = Data.importVectorSchemaRoot(allocator, consumerArrowArray, consumerArrowSchema, null); } // Ensure that imported VectorSchemaRoot is valid even after C Data Interface