apache · pitrou · Nov 13, 2019 · Feb 14, 2020 · jacques-n · Dec 16, 2019
diff --git a/c_glib/test/test-array.rb b/c_glib/test/test-array.rb
@@ -154,7 +154,7 @@ def test_diff
     def test_different_type
       array = build_string_array(["Start", "Shutdown", "Reboot"])
       other_array = build_int8_array([2, 3, 6, 10])
-      assert_equal("# Array types differed: string vs int8",
+      assert_equal("# Array types differed: string vs int8\n",
                    array.diff_unified(other_array))
     end
   end

diff --git a/ci/conda_env_python.yml b/ci/conda_env_python.yml
@@ -16,6 +16,7 @@
 # under the License.
 
 # don't add pandas here, because it is not a mandatory test dependency
+cffi
 cython
 cloudpickle
 hypothesis

diff --git a/ci/conda_env_r.yml b/ci/conda_env_r.yml
@@ -32,6 +32,7 @@ r-covr
 r-hms
 r-lubridate
 r-rcmdcheck
+r-reticulate
 r-rmarkdown
 r-testthat
 r-tibble
diff --git a/ci/docker/conda-r.dockerfile b/ci/docker/conda-r.dockerfile
@@ -48,5 +48,6 @@ ENV ARROW_BUILD_STATIC=OFF \
     ARROW_ORC=OFF \
     ARROW_PARQUET=ON \
     ARROW_PLASMA=OFF \
+    ARROW_USE_CCACHE=ON \
     ARROW_USE_GLOG=OFF \
     LC_ALL=en_US.UTF-8
diff --git a/ci/docker/linux-apt-r.dockerfile b/ci/docker/linux-apt-r.dockerfile
@@ -47,7 +47,11 @@ RUN apt-get update -y && \
         # R CMD CHECK --as-cran needs pdflatex to build the package manual
         texlive-latex-base \
         # Need locales so we can set UTF-8
-        locales && \
+        locales \
+        # Need Python to check py-to-r bridge
+        python3 \
+        python3-pip \
+        python3-dev && \
     locale-gen en_US.UTF-8 && \
     apt-get clean && \
     rm -rf /var/lib/apt/lists/*
@@ -63,6 +67,18 @@ COPY ci/scripts/r_deps.sh /arrow/ci/scripts/
 COPY r/DESCRIPTION /arrow/r/
 RUN /arrow/ci/scripts/r_deps.sh /arrow
 
+# Set up Python 3 and its dependencies
+RUN ln -s /usr/bin/python3 /usr/local/bin/python && \
+    ln -s /usr/bin/pip3 /usr/local/bin/pip
+
+COPY python/requirements.txt \
+     python/requirements-test.txt \
+     /arrow/python/
+
+RUN pip install \
+    -r arrow/python/requirements.txt \
+    cython setuptools
+
 ENV \
     ARROW_BUILD_STATIC=OFF \
     ARROW_BUILD_TESTS=OFF \
@@ -74,5 +90,7 @@ ENV \
     ARROW_ORC=OFF \
     ARROW_PARQUET=ON \
     ARROW_PLASMA=OFF \
+    ARROW_PYTHON=ON \
+    ARROW_USE_CCACHE=ON \
     ARROW_USE_GLOG=OFF \
     LC_ALL=en_US.UTF-8
diff --git a/cpp/src/arrow/CMakeLists.txt b/cpp/src/arrow/CMakeLists.txt
@@ -139,6 +139,7 @@ set(ARROW_SRCS
     tensor.cc
     type.cc
     visitor.cc
+    c/bridge.cc
     io/buffered.cc
     io/compressed.cc
     io/file.cc
@@ -278,6 +279,7 @@ add_subdirectory(testing)
 #
 
 add_subdirectory(array)
+add_subdirectory(c)
 add_subdirectory(io)
 add_subdirectory(util)
 add_subdirectory(vendored)

diff --git a/cpp/src/arrow/array.cc b/cpp/src/arrow/array.cc
@@ -716,6 +716,12 @@ Result<std::shared_ptr<StructArray>> StructArray::Make(
   if (offset > length) {
     return Status::IndexError("Offset greater than length of child arrays");
   }
+  if (null_bitmap == nullptr) {
+    if (null_count > 0) {
+      return Status::Invalid("null_count = ", null_count, " but no null bitmap given");
+    }
+    null_count = 0;
+  }
   return std::make_shared<StructArray>(struct_(fields), length - offset, children,
                                        null_bitmap, null_count, offset);
 }

diff --git a/cpp/src/arrow/array/builder_primitive.h b/cpp/src/arrow/array/builder_primitive.h
@@ -83,15 +83,15 @@ class NumericBuilder : public ArrayBuilder {
   /// uninitialized memory access
   Status AppendNulls(int64_t length) final {
     ARROW_RETURN_NOT_OK(Reserve(length));
-    data_builder_.UnsafeAppend(length, static_cast<value_type>(0));
+    data_builder_.UnsafeAppend(length, value_type{});  // zero
     UnsafeSetNull(length);
     return Status::OK();
   }
 
   /// \brief Append a single null element
   Status AppendNull() final {
     ARROW_RETURN_NOT_OK(Reserve(1));
-    data_builder_.UnsafeAppend(static_cast<value_type>(0));
+    data_builder_.UnsafeAppend(value_type{});  // zero
     UnsafeAppendToBitmap(false);
     return Status::OK();
   }
@@ -243,7 +243,7 @@ class NumericBuilder : public ArrayBuilder {
 
   void UnsafeAppendNull() {
     ArrayBuilder::UnsafeAppendToBitmap(false);
-    data_builder_.UnsafeAppend(0);
+    data_builder_.UnsafeAppend(value_type{});  // zero
   }
 
   std::shared_ptr<DataType> type() const override { return type_; }

diff --git a/cpp/src/arrow/array/builder_time.h b/cpp/src/arrow/array/builder_time.h
@@ -21,52 +21,23 @@
 
 #include <memory>
 
-#include "arrow/array.h"
 #include "arrow/array/builder_base.h"
-#include "arrow/array/builder_binary.h"
 #include "arrow/array/builder_primitive.h"
-#include "arrow/buffer_builder.h"
-#include "arrow/status.h"
-#include "arrow/type_traits.h"
-#include "arrow/util/macros.h"
 
 namespace arrow {
 
-class ARROW_EXPORT DayTimeIntervalBuilder : public ArrayBuilder {
+// TODO this class is untested
+
+class ARROW_EXPORT DayTimeIntervalBuilder : public NumericBuilder<DayTimeIntervalType> {
  public:
-  using TypeClass = DayTimeIntervalType;
   using DayMilliseconds = DayTimeIntervalType::DayMilliseconds;
 
   explicit DayTimeIntervalBuilder(MemoryPool* pool ARROW_MEMORY_POOL_DEFAULT)
       : DayTimeIntervalBuilder(day_time_interval(), pool) {}
 
-  DayTimeIntervalBuilder(std::shared_ptr<DataType> type,
-                         MemoryPool* pool ARROW_MEMORY_POOL_DEFAULT)
-      : ArrayBuilder(pool), builder_(fixed_size_binary(sizeof(DayMilliseconds)), pool) {}
-
-  void Reset() override { builder_.Reset(); }
-  Status Resize(int64_t capacity) override { return builder_.Resize(capacity); }
-  Status Append(DayMilliseconds day_millis) {
-    return builder_.Append(reinterpret_cast<uint8_t*>(&day_millis));
-  }
-  void UnsafeAppend(DayMilliseconds day_millis) {
-    builder_.UnsafeAppend(reinterpret_cast<uint8_t*>(&day_millis));
-  }
-  using ArrayBuilder::UnsafeAppendNull;
-  Status AppendNull() override { return builder_.AppendNull(); }
-  Status AppendNulls(int64_t length) override { return builder_.AppendNulls(length); }
-  Status FinishInternal(std::shared_ptr<ArrayData>* out) override {
-    auto result = builder_.FinishInternal(out);
-    if (*out != NULLPTR) {
-      (*out)->type = type();
-    }
-    return result;
-  }
-
-  std::shared_ptr<DataType> type() const override { return day_time_interval(); }
-
- private:
-  FixedSizeBinaryBuilder builder_;
+  explicit DayTimeIntervalBuilder(std::shared_ptr<DataType> type,
+                                  MemoryPool* pool ARROW_MEMORY_POOL_DEFAULT)
+      : NumericBuilder<DayTimeIntervalType>(type, pool) {}
 };
 
 }  // namespace arrow
diff --git a/cpp/src/arrow/array/diff_test.cc b/cpp/src/arrow/array/diff_test.cc
@@ -152,7 +152,7 @@ TEST_F(DiffTest, Errors) {
   ASSERT_RAISES(TypeError, Diff(*base_, *target_, default_memory_pool()));
 
   ASSERT_FALSE(base_->Equals(*target_, EqualOptions().diff_sink(&formatted)));
-  ASSERT_EQ(formatted.str(), R"(# Array types differed: int32 vs string)");
+  ASSERT_EQ(formatted.str(), "# Array types differed: int32 vs string\n");
 }
 
 template <typename ArrowType>

diff --git a/cpp/src/arrow/array/validate.cc b/cpp/src/arrow/array/validate.cc
@@ -18,6 +18,7 @@
 #include "arrow/array/validate.h"
 
 #include "arrow/array.h"
+#include "arrow/util/bit_util.h"
 #include "arrow/util/int_util.h"
 #include "arrow/util/logging.h"
 #include "arrow/visitor_inline.h"
@@ -41,11 +42,13 @@ struct ValidateArrayVisitor {
     ARROW_RETURN_IF(array.data()->buffers.size() != 2,
                     Status::Invalid("number of buffers is != 2"));
 
-    if (array.length() > 0 && array.data()->buffers[1] == nullptr) {
-      return Status::Invalid("values buffer is null");
-    }
-    if (array.length() > 0 && array.values() == nullptr) {
-      return Status::Invalid("values is null");
+    if (array.length() > 0) {
+      if (array.data()->buffers[1] == nullptr) {
+        return Status::Invalid("values buffer is null");
+      }
+      if (array.values() == nullptr) {
+        return Status::Invalid("values is null");
+      }
     }
     return Status::OK();
   }
@@ -265,7 +268,8 @@ struct ValidateArrayVisitor {
 
     auto value_offsets = array.value_offsets();
     if (value_offsets == nullptr) {
-      if (array.length() != 0) {
+      // For length 0, an empty offsets array seems accepted as a special case (ARROW-544)
+      if (array.length() > 0) {
         return Status::Invalid("non-empty array but value_offsets_ is null");
       }
       return Status::OK();

diff --git a/cpp/src/arrow/c/CMakeLists.txt b/cpp/src/arrow/c/CMakeLists.txt
@@ -0,0 +1,22 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+add_arrow_test(bridge_test PREFIX "arrow-c")
+
+add_arrow_benchmark(bridge_benchmark)
+
+arrow_install_all_headers("arrow/c")
diff --git a/cpp/src/arrow/c/abi.h b/cpp/src/arrow/c/abi.h
@@ -0,0 +1,65 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <stdint.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define ARROW_FLAG_DICTIONARY_ORDERED 1
+#define ARROW_FLAG_NULLABLE 2
+#define ARROW_FLAG_MAP_KEYS_SORTED 4
+
+struct ArrowSchema {
+  // Array type description
+  const char* format;
+  const char* name;
+  const char* metadata;
+  int64_t flags;
+  int64_t n_children;
+  struct ArrowSchema** children;
+  struct ArrowSchema* dictionary;
+
+  // Release callback
+  void (*release)(struct ArrowSchema*);
+  // Opaque producer-specific data
+  void* private_data;
+};
+
+struct ArrowArray {
+  // Array data description
+  int64_t length;
+  int64_t null_count;
+  int64_t offset;
+  int64_t n_buffers;
+  int64_t n_children;
+  const void** buffers;
+  struct ArrowArray** children;
+  struct ArrowArray* dictionary;
+
+  // Release callback
+  void (*release)(struct ArrowArray*);
+  // Opaque producer-specific data
+  void* private_data;
+};
+
+#ifdef __cplusplus
+}
+#endif