From 82c4d2d832c47ba37bf9c5eec3b251dec4555958 Mon Sep 17 00:00:00 2001 From: Wes McKinney Date: Tue, 27 Dec 2016 00:02:37 -0500 Subject: [PATCH 1/5] Remove ARROW_HDFS option, always build the module Change-Id: I7212f421596eba5697142978fade1c057cf01753 --- ci/travis_before_script_cpp.sh | 2 -- cpp/CMakeLists.txt | 4 --- cpp/src/arrow/io/CMakeLists.txt | 56 +++++++++++-------------------- cpp/src/arrow/io/hdfs-internal.cc | 4 --- 4 files changed, 20 insertions(+), 46 deletions(-) diff --git a/ci/travis_before_script_cpp.sh b/ci/travis_before_script_cpp.sh index 20307736e67..73bdaeb81fe 100755 --- a/ci/travis_before_script_cpp.sh +++ b/ci/travis_before_script_cpp.sh @@ -26,8 +26,6 @@ CPP_DIR=$TRAVIS_BUILD_DIR/cpp CMAKE_COMMON_FLAGS="\ -DARROW_BUILD_BENCHMARKS=ON \ --DARROW_PARQUET=OFF \ --DARROW_HDFS=ON \ -DCMAKE_INSTALL_PREFIX=$ARROW_CPP_INSTALL" if [ $TRAVIS_OS_NAME == "linux" ]; then diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 93e9853df89..47ca2fd3765 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -74,10 +74,6 @@ if("${CMAKE_SOURCE_DIR}" STREQUAL "${CMAKE_CURRENT_SOURCE_DIR}") "Build the Arrow IPC extensions" ON) - option(ARROW_HDFS - "Build the Arrow IO extensions for the Hadoop file system" - OFF) - option(ARROW_BOOST_USE_SHARED "Rely on boost shared libraries where relevant" ON) diff --git a/cpp/src/arrow/io/CMakeLists.txt b/cpp/src/arrow/io/CMakeLists.txt index 2062cd43b7b..1e65a1a46ab 100644 --- a/cpp/src/arrow/io/CMakeLists.txt +++ b/cpp/src/arrow/io/CMakeLists.txt @@ -45,50 +45,30 @@ set(ARROW_IO_TEST_LINK_LIBS set(ARROW_IO_SRCS file.cc + hdfs.cc + hdfs-internal.cc interfaces.cc memory.cc ) -if(ARROW_HDFS) - if(NOT THIRDPARTY_DIR) - message(FATAL_ERROR "THIRDPARTY_DIR not set") - endif() - - if (DEFINED ENV{HADOOP_HOME}) - set(HADOOP_HOME $ENV{HADOOP_HOME}) - if (NOT EXISTS "${HADOOP_HOME}/include/hdfs.h") - message(STATUS "Did not find hdfs.h in expected location, using vendored one") - set(HADOOP_HOME "${THIRDPARTY_DIR}/hadoop") - endif() - else() +# HDFS thirdparty setup +if (DEFINED ENV{HADOOP_HOME}) + set(HADOOP_HOME $ENV{HADOOP_HOME}) + if (NOT EXISTS "${HADOOP_HOME}/include/hdfs.h") + message(STATUS "Did not find hdfs.h in expected location, using vendored one") set(HADOOP_HOME "${THIRDPARTY_DIR}/hadoop") endif() +else() + set(HADOOP_HOME "${THIRDPARTY_DIR}/hadoop") +endif() - set(HDFS_H_PATH "${HADOOP_HOME}/include/hdfs.h") - if (NOT EXISTS ${HDFS_H_PATH}) - message(FATAL_ERROR "Did not find hdfs.h at ${HDFS_H_PATH}") - endif() - message(STATUS "Found hdfs.h at: " ${HDFS_H_PATH}) - message(STATUS "Building libhdfs shim component") - - include_directories(SYSTEM "${HADOOP_HOME}/include") - - set(ARROW_HDFS_SRCS - hdfs.cc - hdfs-internal.cc) - - set_property(SOURCE ${ARROW_HDFS_SRCS} - APPEND_STRING PROPERTY - COMPILE_FLAGS "-DHAS_HADOOP") - - set(ARROW_IO_SRCS - ${ARROW_HDFS_SRCS} - ${ARROW_IO_SRCS}) - - ADD_ARROW_TEST(io-hdfs-test) - ARROW_TEST_LINK_LIBRARIES(io-hdfs-test - ${ARROW_IO_TEST_LINK_LIBS}) +set(HDFS_H_PATH "${HADOOP_HOME}/include/hdfs.h") +if (NOT EXISTS ${HDFS_H_PATH}) + message(FATAL_ERROR "Did not find hdfs.h at ${HDFS_H_PATH}") endif() +message(STATUS "Found hdfs.h at: " ${HDFS_H_PATH}) + +include_directories(SYSTEM "${HADOOP_HOME}/include") add_library(arrow_io SHARED ${ARROW_IO_SRCS} @@ -119,6 +99,10 @@ ADD_ARROW_TEST(io-file-test) ARROW_TEST_LINK_LIBRARIES(io-file-test ${ARROW_IO_TEST_LINK_LIBS}) +ADD_ARROW_TEST(io-hdfs-test) +ARROW_TEST_LINK_LIBRARIES(io-hdfs-test + ${ARROW_IO_TEST_LINK_LIBS}) + ADD_ARROW_TEST(io-memory-test) ARROW_TEST_LINK_LIBRARIES(io-memory-test ${ARROW_IO_TEST_LINK_LIBS}) diff --git a/cpp/src/arrow/io/hdfs-internal.cc b/cpp/src/arrow/io/hdfs-internal.cc index 7094785de02..13c7d05a570 100644 --- a/cpp/src/arrow/io/hdfs-internal.cc +++ b/cpp/src/arrow/io/hdfs-internal.cc @@ -28,8 +28,6 @@ // This software may be modified and distributed under the terms // of the BSD license. See the LICENSE file for details. -#ifdef HAS_HADOOP - #ifndef _WIN32 #include #else @@ -586,5 +584,3 @@ Status ARROW_EXPORT ConnectLibHdfs3(LibHdfsShim** driver) { } // namespace io } // namespace arrow - -#endif // HAS_HADOOP From ea8fb9dd05c2a65ccf66d2d0e826f50b29572b9b Mon Sep 17 00:00:00 2001 From: Wes McKinney Date: Tue, 27 Dec 2016 12:03:17 -0500 Subject: [PATCH 2/5] Various Win32 compilation fixes --- cpp/src/arrow/io/hdfs-internal.cc | 22 +++++----------------- cpp/src/arrow/io/hdfs-internal.h | 10 ++++++++++ 2 files changed, 15 insertions(+), 17 deletions(-) diff --git a/cpp/src/arrow/io/hdfs-internal.cc b/cpp/src/arrow/io/hdfs-internal.cc index 13c7d05a570..e4b2cd55978 100644 --- a/cpp/src/arrow/io/hdfs-internal.cc +++ b/cpp/src/arrow/io/hdfs-internal.cc @@ -28,19 +28,7 @@ // This software may be modified and distributed under the terms // of the BSD license. See the LICENSE file for details. -#ifndef _WIN32 -#include -#else -#include -#include - -// TODO(wesm): address when/if we add windows support -// #include -#endif - -extern "C" { -#include -} +#include "arrow/io/hdfs-internal.h" #include #include @@ -51,7 +39,6 @@ extern "C" { #include // NOLINT -#include "arrow/io/hdfs-internal.h" #include "arrow/status.h" #include "arrow/util/visibility.h" @@ -263,7 +250,8 @@ static inline void* GetLibrarySymbol(void* handle, const char* symbol) { return dlsym(handle, symbol); #else - void* ret = reinterpret_cast(GetProcAddress(handle, symbol)); + void* ret = reinterpret_cast( + GetProcAddress(reinterpret_cast(handle), symbol)); if (ret == NULL) { // logstream(LOG_INFO) << "GetProcAddress error: " // << get_last_err_str(GetLastError()) << std::endl; @@ -535,7 +523,7 @@ Status LibHdfsShim::GetRequiredSymbols() { return Status::OK(); } -Status ARROW_EXPORT ConnectLibHdfs(LibHdfsShim** driver) { +Status ConnectLibHdfs(LibHdfsShim** driver) { static std::mutex lock; std::lock_guard guard(lock); @@ -560,7 +548,7 @@ Status ARROW_EXPORT ConnectLibHdfs(LibHdfsShim** driver) { return shim->GetRequiredSymbols(); } -Status ARROW_EXPORT ConnectLibHdfs3(LibHdfsShim** driver) { +Status ConnectLibHdfs3(LibHdfsShim** driver) { static std::mutex lock; std::lock_guard guard(lock); diff --git a/cpp/src/arrow/io/hdfs-internal.h b/cpp/src/arrow/io/hdfs-internal.h index 0ff118a8f57..df6ab90b49a 100644 --- a/cpp/src/arrow/io/hdfs-internal.h +++ b/cpp/src/arrow/io/hdfs-internal.h @@ -18,6 +18,16 @@ #ifndef ARROW_IO_HDFS_INTERNAL #define ARROW_IO_HDFS_INTERNAL +#ifndef _WIN32 +#include +#else +#include +#include + +// TODO(wesm): address when/if we add windows support +// #include +#endif + #include namespace arrow { From 5e53ddb74aec17b2ed53d696c24c47ece74855ba Mon Sep 17 00:00:00 2001 From: Wes McKinney Date: Tue, 27 Dec 2016 15:52:00 -0500 Subject: [PATCH 3/5] Visibility fix Change-Id: Icfeabb74913a3e9f0edbc4a67a62d4fdcaed54b0 --- cpp/src/arrow/io/hdfs-internal.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/cpp/src/arrow/io/hdfs-internal.h b/cpp/src/arrow/io/hdfs-internal.h index df6ab90b49a..ac0da23fb7d 100644 --- a/cpp/src/arrow/io/hdfs-internal.h +++ b/cpp/src/arrow/io/hdfs-internal.h @@ -30,6 +30,8 @@ #include +#include "arrow/util/visibility.h" + namespace arrow { class Status; @@ -204,8 +206,9 @@ struct LibHdfsShim { Status GetRequiredSymbols(); }; -Status ConnectLibHdfs(LibHdfsShim** driver); -Status ConnectLibHdfs3(LibHdfsShim** driver); +// TODO(wesm): Remove these exports when we are linking statically +Status ARROW_EXPORT ConnectLibHdfs(LibHdfsShim** driver); +Status ARROW_EXPORT ConnectLibHdfs3(LibHdfsShim** driver); } // namespace io } // namespace arrow From d0cc3766dbf212819f23e1b2b8d856f0b9783de1 Mon Sep 17 00:00:00 2001 From: Wes McKinney Date: Tue, 27 Dec 2016 17:12:37 -0500 Subject: [PATCH 4/5] Add NOMINMAX windows workaround Change-Id: I2390e2e964be3c79dc1d43b40a8fa45ca89e8889 --- cpp/src/arrow/io/hdfs-internal.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/cpp/src/arrow/io/hdfs-internal.h b/cpp/src/arrow/io/hdfs-internal.h index ac0da23fb7d..8f9a06758cb 100644 --- a/cpp/src/arrow/io/hdfs-internal.h +++ b/cpp/src/arrow/io/hdfs-internal.h @@ -21,6 +21,11 @@ #ifndef _WIN32 #include #else + +// Windows defines min and max macros that mess up std::min/maxa +#ifndef NOMINMAX +#define NOMINMAX +#endif #include #include From e793fd1f453dd0f81e789b19c9ed635e9de8cbe4 Mon Sep 17 00:00:00 2001 From: Wes McKinney Date: Wed, 28 Dec 2016 08:21:56 -0500 Subject: [PATCH 5/5] Use string() instead of native() for file paths because windows uses utf16 native encoding Change-Id: If6bdf709168504db90331e422c984608efa75427 --- cpp/src/arrow/io/io-hdfs-test.cc | 2 +- cpp/src/arrow/ipc/json-integration-test.cc | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/src/arrow/io/io-hdfs-test.cc b/cpp/src/arrow/io/io-hdfs-test.cc index 4ef47b8babe..72e0ba8f298 100644 --- a/cpp/src/arrow/io/io-hdfs-test.cc +++ b/cpp/src/arrow/io/io-hdfs-test.cc @@ -79,7 +79,7 @@ class TestHdfsClient : public ::testing::Test { client_ = nullptr; scratch_dir_ = - boost::filesystem::unique_path("/tmp/arrow-hdfs/scratch-%%%%").native(); + boost::filesystem::unique_path("/tmp/arrow-hdfs/scratch-%%%%").string(); loaded_driver_ = false; diff --git a/cpp/src/arrow/ipc/json-integration-test.cc b/cpp/src/arrow/ipc/json-integration-test.cc index 5e593560f8c..757e6c00ab2 100644 --- a/cpp/src/arrow/ipc/json-integration-test.cc +++ b/cpp/src/arrow/ipc/json-integration-test.cc @@ -221,7 +221,7 @@ Status RunCommand(const std::string& json_path, const std::string& arrow_path, } static std::string temp_path() { - return (fs::temp_directory_path() / fs::unique_path()).native(); + return (fs::temp_directory_path() / fs::unique_path()).string(); } class TestJSONIntegration : public ::testing::Test {