Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
8e816ec
ARROW-2145: [Python] Decimal conversion not working for NaN values
cpcloud Feb 14, 2018
f562378
IWYU
cpcloud Feb 23, 2018
8893a45
Revert header change
cpcloud Feb 23, 2018
0665f6e
Revert test change
cpcloud Feb 23, 2018
e6ac864
Install libboost-regex-dev on travis
cpcloud Feb 23, 2018
50e35d6
Use shared boost on parquet CI build
cpcloud Feb 26, 2018
8be22a6
Install boost with c++11 option
cpcloud Feb 26, 2018
7c7270a
Show boost install
cpcloud Feb 26, 2018
77a41ee
Install boost first
cpcloud Feb 26, 2018
4c74c63
NULLPTR to nullptr
cpcloud Feb 27, 2018
d905202
DCHECK_OK
cpcloud Feb 27, 2018
281f798
DCHECK_OK
cpcloud Feb 27, 2018
1df6923
DCHECK_OK
cpcloud Feb 27, 2018
db664f2
DCHECK_Ok
cpcloud Feb 27, 2018
092a962
Fix order of operands
cpcloud Feb 27, 2018
418754f
Check return value of PyList_SetItem
cpcloud Feb 27, 2018
b24ff25
Add DecimalMetadata::Update test for ignoring NaN values
cpcloud Feb 27, 2018
3190b1a
Ignore nans in decimal metadata update
cpcloud Feb 27, 2018
a05b316
Refactor import decimal and acquire the gil before importing
cpcloud Feb 27, 2018
4e6db3c
Formatting
cpcloud Feb 27, 2018
29e1ebc
boost osx debugging
cpcloud Feb 27, 2018
b4bcfd9
DCHECK_OK for release builds
cpcloud Feb 28, 2018
78cbf51
More script debugging
cpcloud Feb 28, 2018
03ee999
Fix boost root
cpcloud Feb 28, 2018
ae5db5f
Perms
cpcloud Feb 28, 2018
99505a9
Silence cmake complaints about boost version
cpcloud Feb 28, 2018
00be578
Add tests to accommodate decimal values
cpcloud Feb 28, 2018
ab3e4a5
Brewfile
cpcloud Feb 28, 2018
0d45688
Pass version as argument
cpcloud Mar 1, 2018
1fc2a96
Args must be a ruby Hash
cpcloud Mar 1, 2018
97fcb96
Make sure we only install if glibc is affected
cpcloud Mar 1, 2018
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,7 @@ matrix:
- $TRAVIS_BUILD_DIR/ci/travis_before_script_c_glib.sh
script:
- $TRAVIS_BUILD_DIR/ci/travis_script_c_glib.sh
# [OS X] C++ & glib w/ XCode 8.3 & autotools
# [OS X] C++ & glib w/ XCode 8.3 & autotools & homebrew
- compiler: clang
osx_image: xcode8.3
os: osx
Expand All @@ -185,7 +185,8 @@ matrix:
- BUILD_SYSTEM=autotools
before_script:
- if [ $ARROW_CI_C_GLIB_AFFECTED != "1" ]; then exit; fi
- $TRAVIS_BUILD_DIR/ci/travis_before_script_cpp.sh --only-library
- $TRAVIS_BUILD_DIR/ci/travis_install_osx.sh
- $TRAVIS_BUILD_DIR/ci/travis_before_script_cpp.sh --only-library --homebrew
- $TRAVIS_BUILD_DIR/ci/travis_before_script_c_glib.sh
script:
- $TRAVIS_BUILD_DIR/ci/travis_script_c_glib.sh
Expand Down
2 changes: 1 addition & 1 deletion c_glib/Brewfile
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
# under the License.

brew "autoconf-archive"
brew "boost"
brew "boost", args: ["1.65.0"]
brew "ccache"
brew "cmake"
brew "git"
Expand Down
4 changes: 1 addition & 3 deletions ci/travis_before_script_c_glib.sh
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,7 @@ set -ex

source $TRAVIS_BUILD_DIR/ci/travis_env_common.sh

if [ $TRAVIS_OS_NAME = "osx" ]; then
brew update && brew bundle --file=$TRAVIS_BUILD_DIR/c_glib/Brewfile
else # Linux
if [ $TRAVIS_OS_NAME = "linux" ]; then
sudo apt-get install -y -q gtk-doc-tools autoconf-archive libgirepository1.0-dev
fi

Expand Down
24 changes: 20 additions & 4 deletions ci/travis_before_script_cpp.sh
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,22 @@ set -ex

source $TRAVIS_BUILD_DIR/ci/travis_env_common.sh

if [ "$1" == "--only-library" ]; then
only_library_mode=yes
else
only_library_mode=no
only_library_mode=no
using_homebrew=no

while true; do
case "$1" in
--only-library)
only_library_mode=yes
shift ;;
--homebrew)
using_homebrew=yes
shift ;;
*) break ;;
esac
done

if [ "$only_library_mode" == "no" ]; then
source $TRAVIS_BUILD_DIR/ci/travis_install_conda.sh
fi

Expand Down Expand Up @@ -78,6 +90,10 @@ if [ $TRAVIS_OS_NAME == "linux" ]; then
-DBUILD_WARNING_LEVEL=$ARROW_BUILD_WARNING_LEVEL \
$ARROW_CPP_DIR
else
if [ "$using_homebrew" = "yes" ]; then
# build against homebrew's boost if we're using it
export BOOST_ROOT=/usr/local/opt/boost
fi
cmake $CMAKE_COMMON_FLAGS \
$CMAKE_OSX_FLAGS \
-DCMAKE_BUILD_TYPE=$ARROW_BUILD_TYPE \
Expand Down
2 changes: 1 addition & 1 deletion ci/travis_build_parquet_cpp.sh
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ cmake \
-GNinja \
-DCMAKE_BUILD_TYPE=debug \
-DCMAKE_INSTALL_PREFIX=$ARROW_PYTHON_PARQUET_HOME \
-DPARQUET_BOOST_USE_SHARED=off \
-DPARQUET_BOOST_USE_SHARED=on \
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

what's the rationale for this, the symbol linking issue?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

import pyarrow.parquet was segfaulting, I assumed because we're statically linking boost in the parquet build and dynamically in the arrow build. This only shows up when using the regex library.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see, we should be consistent about which we do across the libraries. Part of why I wish we were building all these libraries in a monorepo setting

-DPARQUET_BUILD_BENCHMARKS=off \
-DPARQUET_BUILD_EXECUTABLES=off \
-DPARQUET_BUILD_TESTS=off \
Expand Down
2 changes: 1 addition & 1 deletion ci/travis_install_linux.sh
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@

sudo apt-get install -y -q \
gdb ccache libboost-dev libboost-filesystem-dev \
libboost-system-dev libjemalloc-dev
libboost-system-dev libboost-regex-dev libjemalloc-dev

if [ "$ARROW_TRAVIS_VALGRIND" == "1" ]; then
sudo apt-get install -y -q valgrind
Expand Down
23 changes: 23 additions & 0 deletions ci/travis_install_osx.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
#!/usr/bin/env bash

# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

if [ "$ARROW_CI_C_GLIB_AFFECTED" = "1" ]; then
brew update
brew bundle --file=$TRAVIS_BUILD_DIR/c_glib/Brewfile
fi
6 changes: 4 additions & 2 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -574,11 +574,13 @@ set(ARROW_LINK_LIBS

set(ARROW_SHARED_PRIVATE_LINK_LIBS
${BOOST_SYSTEM_LIBRARY}
${BOOST_FILESYSTEM_LIBRARY})
${BOOST_FILESYSTEM_LIBRARY}
${BOOST_REGEX_LIBRARY})

set(ARROW_STATIC_PRIVATE_LINK_LIBS
${BOOST_SYSTEM_LIBRARY}
${BOOST_FILESYSTEM_LIBRARY})
${BOOST_FILESYSTEM_LIBRARY}
${BOOST_REGEX_LIBRARY})

if (NOT MSVC)
set(ARROW_LINK_LIBS
Expand Down
22 changes: 18 additions & 4 deletions cpp/cmake_modules/ThirdpartyToolchain.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -157,16 +157,20 @@ if (ARROW_BOOST_VENDORED)
"${BOOST_LIB_DIR}/${CMAKE_STATIC_LIBRARY_PREFIX}boost_system${CMAKE_STATIC_LIBRARY_SUFFIX}")
set(BOOST_STATIC_FILESYSTEM_LIBRARY
"${BOOST_LIB_DIR}/${CMAKE_STATIC_LIBRARY_PREFIX}boost_filesystem${CMAKE_STATIC_LIBRARY_SUFFIX}")
set(BOOST_STATIC_REGEX_LIBRARY
"${BOOST_LIB_DIR}/${CMAKE_STATIC_LIBRARY_PREFIX}boost_regex${CMAKE_STATIC_LIBRARY_SUFFIX}")
set(BOOST_SYSTEM_LIBRARY "${BOOST_STATIC_SYSTEM_LIBRARY}")
set(BOOST_FILESYSTEM_LIBRARY "${BOOST_STATIC_FILESYSTEM_LIBRARY}")
set(BOOST_REGEX_LIBRARY "${BOOST_STATIC_REGEX_LIBRARY}")
if (ARROW_BOOST_HEADER_ONLY)
set(BOOST_BUILD_PRODUCTS)
set(BOOST_CONFIGURE_COMMAND "")
set(BOOST_BUILD_COMMAND "")
else()
set(BOOST_BUILD_PRODUCTS
${BOOST_SYSTEM_LIBRARY}
${BOOST_FILESYSTEM_LIBRARY})
${BOOST_FILESYSTEM_LIBRARY}
${BOOST_REGEX_LIBRARY})
set(BOOST_CONFIGURE_COMMAND
"./bootstrap.sh"
"--prefix=${BOOST_PREFIX}"
Expand Down Expand Up @@ -210,16 +214,19 @@ else()
if (ARROW_BOOST_HEADER_ONLY)
find_package(Boost REQUIRED)
else()
find_package(Boost COMPONENTS system filesystem REQUIRED)
find_package(Boost COMPONENTS system filesystem regex REQUIRED)
if ("${CMAKE_BUILD_TYPE}" STREQUAL "DEBUG")
set(BOOST_SHARED_SYSTEM_LIBRARY ${Boost_SYSTEM_LIBRARY_DEBUG})
set(BOOST_SHARED_FILESYSTEM_LIBRARY ${Boost_FILESYSTEM_LIBRARY_DEBUG})
set(BOOST_SHARED_REGEX_LIBRARY ${Boost_REGEX_LIBRARY_DEBUG})
else()
set(BOOST_SHARED_SYSTEM_LIBRARY ${Boost_SYSTEM_LIBRARY_RELEASE})
set(BOOST_SHARED_FILESYSTEM_LIBRARY ${Boost_FILESYSTEM_LIBRARY_RELEASE})
set(BOOST_SHARED_REGEX_LIBRARY ${Boost_REGEX_LIBRARY_RELEASE})
endif()
set(BOOST_SYSTEM_LIBRARY boost_system_shared)
set(BOOST_FILESYSTEM_LIBRARY boost_filesystem_shared)
set(BOOST_REGEX_LIBRARY boost_regex_shared)
endif()
else()
# Find static boost headers and libs
Expand All @@ -228,16 +235,19 @@ else()
if (ARROW_BOOST_HEADER_ONLY)
find_package(Boost REQUIRED)
else()
find_package(Boost COMPONENTS system filesystem REQUIRED)
find_package(Boost COMPONENTS system filesystem regex REQUIRED)
if ("${CMAKE_BUILD_TYPE}" STREQUAL "DEBUG")
set(BOOST_STATIC_SYSTEM_LIBRARY ${Boost_SYSTEM_LIBRARY_DEBUG})
set(BOOST_STATIC_FILESYSTEM_LIBRARY ${Boost_FILESYSTEM_LIBRARY_DEBUG})
set(BOOST_STATIC_REGEX_LIBRARY ${Boost_REGEX_LIBRARY_DEBUG})
else()
set(BOOST_STATIC_SYSTEM_LIBRARY ${Boost_SYSTEM_LIBRARY_RELEASE})
set(BOOST_STATIC_FILESYSTEM_LIBRARY ${Boost_FILESYSTEM_LIBRARY_RELEASE})
set(BOOST_STATIC_REGEX_LIBRARY ${Boost_REGEX_LIBRARY_RELEASE})
endif()
set(BOOST_SYSTEM_LIBRARY boost_system_static)
set(BOOST_FILESYSTEM_LIBRARY boost_filesystem_static)
set(BOOST_REGEX_LIBRARY boost_regex_static)
endif()
endif()
endif()
Expand All @@ -254,7 +264,11 @@ if (NOT ARROW_BOOST_HEADER_ONLY)
STATIC_LIB "${BOOST_STATIC_FILESYSTEM_LIBRARY}"
SHARED_LIB "${BOOST_SHARED_FILESYSTEM_LIBRARY}")

SET(ARROW_BOOST_LIBS boost_system boost_filesystem)
ADD_THIRDPARTY_LIB(boost_regex
STATIC_LIB "${BOOST_STATIC_REGEX_LIBRARY}"
SHARED_LIB "${BOOST_SHARED_REGEX_LIBRARY}")

SET(ARROW_BOOST_LIBS boost_system boost_filesystem boost_regex)
endif()

include_directories(SYSTEM ${Boost_INCLUDE_DIR})
Expand Down
13 changes: 7 additions & 6 deletions cpp/src/arrow/python/arrow_to_pandas.cc
Original file line number Diff line number Diff line change
Expand Up @@ -640,11 +640,11 @@ static Status ConvertTimes(PandasOptions options, const ChunkedArray& data,
static Status ConvertDecimals(PandasOptions options, const ChunkedArray& data,
PyObject** out_values) {
PyAcquireGIL lock;
OwnedRef decimal_ref;
OwnedRef Decimal_ref;
RETURN_NOT_OK(internal::ImportModule("decimal", &decimal_ref));
RETURN_NOT_OK(internal::ImportFromModule(decimal_ref, "Decimal", &Decimal_ref));
PyObject* Decimal = Decimal_ref.obj();
OwnedRef decimal;
OwnedRef Decimal;
RETURN_NOT_OK(internal::ImportModule("decimal", &decimal));
RETURN_NOT_OK(internal::ImportFromModule(decimal, "Decimal", &Decimal));
PyObject* decimal_constructor = Decimal.obj();

for (int c = 0; c < data.num_chunks(); c++) {
const auto& arr = static_cast<const arrow::Decimal128Array&>(*data.chunk(c));
Expand All @@ -654,7 +654,8 @@ static Status ConvertDecimals(PandasOptions options, const ChunkedArray& data,
Py_INCREF(Py_None);
*out_values++ = Py_None;
} else {
*out_values++ = internal::DecimalFromString(Decimal, arr.FormatValue(i));
*out_values++ =
internal::DecimalFromString(decimal_constructor, arr.FormatValue(i));
RETURN_IF_PYERROR();
}
}
Expand Down
40 changes: 30 additions & 10 deletions cpp/src/arrow/python/builtin_convert.cc
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,15 @@ class ScalarVisitor {
timestamp_count_(0),
float_count_(0),
binary_count_(0),
unicode_count_(0) {}
unicode_count_(0),
decimal_count_(0),
max_decimal_metadata_(std::numeric_limits<int32_t>::min(),
std::numeric_limits<int32_t>::min()),
decimal_type_() {
PyAcquireGIL lock;
Status status = internal::ImportDecimalType(&decimal_type_);
DCHECK_OK(status);
}

Status Visit(PyObject* obj) {
++total_count_;
Expand Down Expand Up @@ -111,10 +119,13 @@ class ScalarVisitor {
ss << type->ToString();
return Status::Invalid(ss.str());
}
} else if (PyObject_IsInstance(obj, decimal_type_.obj())) {
RETURN_NOT_OK(max_decimal_metadata_.Update(obj));
++decimal_count_;
} else {
// TODO(wesm): accumulate error information somewhere
static std::string supported_types =
"bool, float, integer, date, datetime, bytes, unicode";
"bool, float, integer, date, datetime, bytes, unicode, decimal";
std::stringstream ss;
ss << "Error inferring Arrow data type for collection of Python objects. ";
RETURN_NOT_OK(InvalidConversion(obj, supported_types, &ss));
Expand All @@ -125,7 +136,9 @@ class ScalarVisitor {

std::shared_ptr<DataType> GetType() {
// TODO(wesm): handling mixed-type cases
if (float_count_) {
if (decimal_count_) {
return decimal(max_decimal_metadata_.precision(), max_decimal_metadata_.scale());
} else if (float_count_) {
return float64();
} else if (int_count_) {
// TODO(wesm): tighter type later
Expand Down Expand Up @@ -157,8 +170,13 @@ class ScalarVisitor {
int64_t float_count_;
int64_t binary_count_;
int64_t unicode_count_;
int64_t decimal_count_;

internal::DecimalMetadata max_decimal_metadata_;

// Place to accumulate errors
// std::vector<Status> errors_;
OwnedRefNoGIL decimal_type_;
};

static constexpr int MAX_NESTING_LEVELS = 32;
Expand Down Expand Up @@ -379,17 +397,14 @@ class TypedConverter : public SeqConverter {
BuilderType* typed_builder_;
};

// We use the CRTP trick here to devirtualize the AppendItem() and AppendNull()
// We use the CRTP trick here to devirtualize the AppendItem(), AppendNull(), and IsNull()
// method calls.
template <typename BuilderType, class Derived>
class TypedConverterVisitor : public TypedConverter<BuilderType> {
public:
Status AppendSingle(PyObject* obj) override {
if (obj == Py_None) {
return static_cast<Derived*>(this)->AppendNull();
} else {
return static_cast<Derived*>(this)->AppendItem(obj);
}
auto self = static_cast<Derived*>(this);
return self->IsNull(obj) ? self->AppendNull() : self->AppendItem(obj);
}

Status AppendMultiple(PyObject* obj, int64_t size) override {
Expand All @@ -409,6 +424,7 @@ class TypedConverterVisitor : public TypedConverter<BuilderType> {

// Append a missing item (default implementation)
Status AppendNull() { return this->typed_builder_->AppendNull(); }
bool IsNull(PyObject* obj) const { return obj == Py_None; }
};

class NullConverter : public TypedConverterVisitor<NullBuilder, NullConverter> {
Expand Down Expand Up @@ -830,12 +846,16 @@ class DecimalConverter
public:
// Append a non-missing item
Status AppendItem(PyObject* obj) {
/// TODO(phillipc): Check for nan?
Decimal128 value;
const auto& type = static_cast<const DecimalType&>(*typed_builder_->type());
RETURN_NOT_OK(internal::DecimalFromPythonDecimal(obj, type, &value));
return typed_builder_->Append(value);
}

bool IsNull(PyObject* obj) const {
return obj == Py_None || obj == numpy_nan || internal::PyFloat_isnan(obj) ||
(internal::PyDecimal_Check(obj) && internal::PyDecimal_ISNAN(obj));
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ugh, Python, what did we do to deserve this? =)

}
};

// Dynamic constructor for sequence converters
Expand Down
Loading