From bbbd8c1029d5435dc2600f273ebf283061a66314 Mon Sep 17 00:00:00 2001 From: Wes McKinney Date: Wed, 26 Jul 2017 12:18:06 -0400 Subject: [PATCH 1/5] Docker HDFS testing scripts, use hdfs-client.xml from Apache HAWQ (incubating) --- python/testing/README.md | 21 ++ python/testing/functions.sh | 71 ++++ python/testing/hdfs/Dockerfile | 40 +++ python/testing/hdfs/libhdfs3-hdfs-client.xml | 332 ++++++++++++++++++ .../testing/hdfs/restart_docker_container.sh | 37 ++ python/testing/hdfs/run_tests.sh | 34 ++ python/testing/set_env_common.sh | 71 ++++ python/testing/setup_toolchain.sh | 65 ++++ python/testing/test_hdfs.sh | 24 ++ 9 files changed, 695 insertions(+) create mode 100644 python/testing/README.md create mode 100644 python/testing/functions.sh create mode 100644 python/testing/hdfs/Dockerfile create mode 100644 python/testing/hdfs/libhdfs3-hdfs-client.xml create mode 100644 python/testing/hdfs/restart_docker_container.sh create mode 100755 python/testing/hdfs/run_tests.sh create mode 100644 python/testing/set_env_common.sh create mode 100644 python/testing/setup_toolchain.sh create mode 100755 python/testing/test_hdfs.sh diff --git a/python/testing/README.md b/python/testing/README.md new file mode 100644 index 00000000000..2498445fe92 --- /dev/null +++ b/python/testing/README.md @@ -0,0 +1,21 @@ + + +# Testing tools for odds and ends + +## Testing HDFS file interface + +```shell +./test_hdfs.sh +``` \ No newline at end of file diff --git a/python/testing/functions.sh b/python/testing/functions.sh new file mode 100644 index 00000000000..0583685182c --- /dev/null +++ b/python/testing/functions.sh @@ -0,0 +1,71 @@ +#!/usr/bin/env bash + +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http:#www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +use_gcc() { + export CC=gcc-4.9 + export CXX=g++-4.9 +} + +use_clang() { + export CC=clang-4.0 + export CXX=clang++-4.0 +} + +build_arrow() { + mkdir -p $ARROW_CPP_BUILD_DIR + pushd $ARROW_CPP_BUILD_DIR + + cmake -GNinja \ + -DCMAKE_BUILD_TYPE=$BUILD_TYPE \ + -DCMAKE_INSTALL_PREFIX=$ARROW_HOME \ + -DARROW_NO_DEPRECATED_API=ON \ + -DARROW_PLASMA=ON \ + -DARROW_BOOST_USE_SHARED=off \ + $ARROW_CPP_DIR + + ninja + ninja install + popd +} + +build_parquet() { + PARQUET_DIR=$BUILD_DIR/parquet + mkdir -p $PARQUET_DIR + + git clone https://github.com/apache/parquet-cpp.git $PARQUET_DIR + + pushd $PARQUET_DIR + mkdir build-dir + cd build-dir + + cmake \ + -GNinja \ + -DCMAKE_BUILD_TYPE=$BUILD_TYPE \ + -DCMAKE_INSTALL_PREFIX=$PARQUET_HOME \ + -DPARQUET_BOOST_USE_SHARED=off \ + -DPARQUET_BUILD_BENCHMARKS=off \ + -DPARQUET_BUILD_EXECUTABLES=off \ + -DPARQUET_BUILD_TESTS=off \ + .. + + ninja + ninja install + + popd +} diff --git a/python/testing/hdfs/Dockerfile b/python/testing/hdfs/Dockerfile new file mode 100644 index 00000000000..01017069e18 --- /dev/null +++ b/python/testing/hdfs/Dockerfile @@ -0,0 +1,40 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http:#www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# TODO Replace this with a complete clean image build +FROM cpcloud86/impala:metastore + +RUN sudo apt-add-repository -y ppa:ubuntu-toolchain-r/test && \ + sudo apt-get update && \ + sudo apt-get install -y \ + gcc-4.9 \ + g++-4.9 \ + libtool \ + libjemalloc-dev \ + ccache \ + valgrind \ + gdb + +RUN wget -O - http://llvm.org/apt/llvm-snapshot.gpg.key|sudo apt-key add - && \ + sudo apt-add-repository -y \ + "deb http://llvm.org/apt/trusty/ llvm-toolchain-trusty-4.0 main" && \ + sudo apt-get update && \ + sudo apt-get install -y clang-4.0 clang-format-4.0 clang-tidy-4.0 + +RUN wget -O /tmp/miniconda.sh https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh && \ + bash /tmp/miniconda.sh -b -p /home/ubuntu/miniconda && \ + rm /tmp/miniconda.sh diff --git a/python/testing/hdfs/libhdfs3-hdfs-client.xml b/python/testing/hdfs/libhdfs3-hdfs-client.xml new file mode 100644 index 00000000000..fc651516c63 --- /dev/null +++ b/python/testing/hdfs/libhdfs3-hdfs-client.xml @@ -0,0 +1,332 @@ + + + + + + + + + + + + + + + rpc.client.timeout + 3600000 + + timeout interval of a RPC invocation in millisecond. default is 3600000. + + + + rpc.client.connect.tcpnodelay + true + + whether set socket TCP_NODELAY to true when connect to RPC server. default is true. + + + + + rpc.client.max.idle + 10000 + + the max idle time of a RPC connection in millisecond. default is 10000. + + + + + rpc.client.ping.interval + 10000 + + the interval which the RPC client send a heart beat to server. 0 means disable, default is 10000. + + + + + rpc.client.connect.timeout + 600000 + + the timeout interval in millisecond when the RPC client is trying to setup the connection. default is 600000. + + + + + rpc.client.connect.retry + 10 + + the max retry times if the RPC client fail to setup the connection to server. default is 10. + + + + + rpc.client.read.timeout + 3600000 + + the timeout interval in millisecond when the RPC client is trying to read from server. default is 3600000. + + + + + rpc.client.write.timeout + 3600000 + + the timeout interval in millisecond when the RPC client is trying to write to server. default is 3600000. + + + + + rpc.client.socket.linger.timeout + -1 + + set value to socket SO_LINGER when connect to RPC server. -1 means default OS value. default is -1. + + + + + + dfs.client.read.shortcircuit + false + + whether reading block file bypass datanode if the block and the client are on the same node. default is true. + + + + + dfs.default.replica + 1 + + the default number of replica. default is 3. + + + + + dfs.prefetchsize + 10 + + the default number of blocks which information will be prefetched. default is 10. + + + + + dfs.client.failover.max.attempts + 15 + + if multiply namenodes are configured, it is the max retry times when the dfs client try to issue a RPC call. default is 15. + + + + + dfs.default.blocksize + 134217728 + + default block size. default is 134217728. + + + + + dfs.client.log.severity + INFO + + the minimal log severity level, valid values include FATAL, ERROR, INFO, DEBUG1, DEBUG2, DEBUG3. default is INFO. + + + + + + input.connect.timeout + 600000 + + the timeout interval in millisecond when the input stream is trying to setup the connection to datanode. default is 600000. + + + + + input.read.timeout + 3600000 + + the timeout interval in millisecond when the input stream is trying to read from datanode. default is 3600000. + + + + + input.write.timeout + 3600000 + + the timeout interval in millisecond when the input stream is trying to write to datanode. default is 3600000. + + + + + input.localread.default.buffersize + 2097152 + + number of bytes of the buffer which is used to hold the data from block file and verify checksum. + it is only used when "dfs.client.read.shortcircuit" is set to true. default is 1048576. + + + + + input.localread.blockinfo.cachesize + 1000 + + the size of block file path information cache. default is 1000. + + + + + input.read.getblockinfo.retry + 3 + + the max retry times when the client fail to get block information from namenode. default is 3. + + + + + + output.replace-datanode-on-failure + false + + whether the client add new datanode into pipeline if the number of nodes in pipeline is less the specified number of replicas. default is false. + + + + + output.default.chunksize + 512 + + the number of bytes of a chunk in pipeline. default is 512. + + + + + output.default.packetsize + 65536 + + the number of bytes of a packet in pipeline. default is 65536. + + + + + output.default.write.retry + 10 + + the max retry times when the client fail to setup the pipeline. default is 10. + + + + + output.connect.timeout + 600000 + + the timeout interval in millisecond when the output stream is trying to setup the connection to datanode. default is 600000. + + + + + output.read.timeout + 3600000 + + the timeout interval in millisecond when the output stream is trying to read from datanode. default is 3600000. + + + + + output.write.timeout + 3600000 + + the timeout interval in millisecond when the output stream is trying to write to datanode. default is 3600000. + + + + + output.packetpool.size + 1024 + + the max number of packets in a file's packet pool. default is 1024. + + + + + output.close.timeout + 900000 + + the timeout interval in millisecond when close an output stream. default is 900000. + + + + + dfs.domain.socket.path + /var/lib/hadoop-hdfs/dn_socket + + Optional. This is a path to a UNIX domain socket that will be used for + communication between the DataNode and local HDFS clients. + If the string "_PORT" is present in this path, it will be replaced by the + TCP port of the DataNode. + + + + + dfs.client.use.legacy.blockreader.local + false + + Legacy short-circuit reader implementation based on HDFS-2246 is used + if this configuration parameter is true. + This is for the platforms other than Linux + where the new implementation based on HDFS-347 is not available. + + + + diff --git a/python/testing/hdfs/restart_docker_container.sh b/python/testing/hdfs/restart_docker_container.sh new file mode 100644 index 00000000000..cd6b194de43 --- /dev/null +++ b/python/testing/hdfs/restart_docker_container.sh @@ -0,0 +1,37 @@ +#!/usr/bin/env bash + +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http:#www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +export ARROW_TEST_NN_HOST=arrow-hdfs +export ARROW_TEST_IMPALA_HOST=$ARROW_TEST_NN_HOST +export ARROW_TEST_IMPALA_PORT=21050 +export ARROW_TEST_WEBHDFS_PORT=50070 +export ARROW_TEST_WEBHDFS_USER=ubuntu + +docker stop $ARROW_TEST_NN_HOST +docker rm $ARROW_TEST_NN_HOST + +docker run -d -it --name $ARROW_TEST_NN_HOST \ + -v $PWD:/io \ + --hostname $ARROW_TEST_NN_HOST \ + -p $ARROW_TEST_WEBHDFS_PORT -p $ARROW_TEST_IMPALA_PORT \ + arrow-hdfs-test + +while ! docker exec $ARROW_TEST_NN_HOST impala-shell -q 'SELECT VERSION()'; do + sleep 1 +done diff --git a/python/testing/hdfs/run_tests.sh b/python/testing/hdfs/run_tests.sh new file mode 100755 index 00000000000..09631ffd880 --- /dev/null +++ b/python/testing/hdfs/run_tests.sh @@ -0,0 +1,34 @@ +#!/usr/bin/env bash + +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http:#www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -ex + +HERE=$(cd `dirname "${BASH_SOURCE[0]:-$0}"` && pwd) + +source $HERE/../set_env_common.sh +source $HERE/../setup_toolchain.sh +source $HERE/../functions.sh + +git clone https://github.com/apache/arrow.git $ARROW_CHECKOUT + +use_clang +build_arrow +build_parquet + +$ARROW_CPP_BUILD_DIR/debug/io-hdfs-test diff --git a/python/testing/set_env_common.sh b/python/testing/set_env_common.sh new file mode 100644 index 00000000000..a57ce72621b --- /dev/null +++ b/python/testing/set_env_common.sh @@ -0,0 +1,71 @@ +#!/usr/bin/env bash + +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http:#www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +export MINICONDA=$HOME/miniconda +export CPP_TOOLCHAIN=$HOME/cpp-toolchain + +export PATH="$MINICONDA/bin:$PATH" +export CONDA_PKGS_DIRS=$HOME/.conda_packages + +export ARROW_CHECKOUT=$HOME/arrow +export BUILD_DIR=$ARROW_CHECKOUT + +export BUILD_OS_NAME=linux +export BUILD_TYPE=debug + +export ARROW_CPP_DIR=$BUILD_DIR/cpp +export ARROW_PYTHON_DIR=$BUILD_DIR/python +export ARROW_C_GLIB_DIR=$BUILD_DIR/c_glib +export ARROW_JAVA_DIR=${BUILD_DIR}/java +export ARROW_JS_DIR=${BUILD_DIR}/js +export ARROW_INTEGRATION_DIR=$BUILD_DIR/integration + +export CPP_BUILD_DIR=$BUILD_DIR/cpp-build + +export ARROW_CPP_INSTALL=$BUILD_DIR/cpp-install +export ARROW_CPP_BUILD_DIR=$BUILD_DIR/cpp-build +export ARROW_C_GLIB_INSTALL=$BUILD_DIR/c-glib-install + +export ARROW_BUILD_TOOLCHAIN=$CPP_TOOLCHAIN +export PARQUET_BUILD_TOOLCHAIN=$CPP_TOOLCHAIN + +export BOOST_ROOT=$CPP_TOOLCHAIN +export PATH=$CPP_TOOLCHAIN/bin:$PATH +export LD_LIBRARY_PATH=$CPP_TOOLCHAIN/lib:$LD_LIBRARY_PATH +export MAKE=ninja + +export VALGRIND="valgrind --tool=memcheck" + +export ARROW_HOME=$CPP_TOOLCHAIN +export PARQUET_HOME=$CPP_TOOLCHAIN + +# Arrow test variables + +export JAVA_HOME=/usr/lib/jvm/java-7-oracle +export HADOOP_HOME=/usr/lib/hadoop +export CLASSPATH=`$HADOOP_HOME/bin/hadoop classpath --glob` +export HADOOP_OPTS="$HADOOP_OPTS -Djava.library.path=$HADOOP_HOME/lib/native" +export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$HADOOP_HOME/lib/native/ + +export ARROW_HDFS_TEST_HOST=arrow-hdfs +export ARROW_HDFS_TEST_PORT=9000 +export ARROW_HDFS_TEST_USER=ubuntu +export ARROW_LIBHDFS_DIR=/usr/lib + +export LIBHDFS3_CONF=/io/hdfs/libhdfs3-hdfs-client.xml diff --git a/python/testing/setup_toolchain.sh b/python/testing/setup_toolchain.sh new file mode 100644 index 00000000000..d6d1231284e --- /dev/null +++ b/python/testing/setup_toolchain.sh @@ -0,0 +1,65 @@ +#!/usr/bin/env bash + +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http:#www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -e + +export PATH="$MINICONDA/bin:$PATH" +conda update -y -q conda +conda config --set auto_update_conda false +conda info -a + +conda config --set show_channel_urls True + +# Help with SSL timeouts to S3 +conda config --set remote_connect_timeout_secs 12 + +conda config --add channels https://repo.continuum.io/pkgs/free +conda config --add channels conda-forge +conda info -a + +# faster builds, please +conda install -y nomkl + +conda install --y conda-build jinja2 anaconda-client cmake curl + +# Set up C++ toolchain +conda create -y -q -p $CPP_TOOLCHAIN python=3.6 \ + jemalloc=4.4.0 \ + nomkl \ + boost-cpp \ + rapidjson \ + flatbuffers \ + gflags \ + lz4-c \ + snappy \ + zstd \ + brotli \ + zlib \ + git \ + cmake \ + curl \ + thrift-cpp \ + libhdfs3 \ + ninja + +if [ $BUILD_OS_NAME == "osx" ]; then + brew update > /dev/null + brew install jemalloc + brew install ccache +fi diff --git a/python/testing/test_hdfs.sh b/python/testing/test_hdfs.sh new file mode 100755 index 00000000000..7801361dd5c --- /dev/null +++ b/python/testing/test_hdfs.sh @@ -0,0 +1,24 @@ +#!/usr/bin/env bash + +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http:#www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -ex + +docker build -t arrow-hdfs-test -f hdfs/Dockerfile . +bash hdfs/restart_docker_container.sh +docker exec -it arrow-hdfs /io/hdfs/run_tests.sh From 591e7c6bc4b1b6b386d4feeda166a1a6f7f916a8 Mon Sep 17 00:00:00 2001 From: Wes McKinney Date: Wed, 26 Jul 2017 19:31:20 -0400 Subject: [PATCH 2/5] Add Python tests Change-Id: I40d3acd46802ecb2a37f4d83ed08a841645772ba --- python/testing/functions.sh | 29 +++++++++++++++++++ python/testing/hdfs/Dockerfile | 22 ++++++++++---- .../testing/hdfs/restart_docker_container.sh | 1 + python/testing/hdfs/run_tests.sh | 7 +++++ python/testing/set_env_common.sh | 1 - python/testing/test_hdfs.sh | 1 + 6 files changed, 54 insertions(+), 7 deletions(-) diff --git a/python/testing/functions.sh b/python/testing/functions.sh index 0583685182c..69f1b47bcdd 100644 --- a/python/testing/functions.sh +++ b/python/testing/functions.sh @@ -27,6 +27,34 @@ use_clang() { export CXX=clang++-4.0 } +bootstrap_python_env() { + PYTHON_VERSION=$1 + CONDA_ENV_DIR=$BUILD_DIR/pyarrow-test-$PYTHON_VERSION + + conda create -y -q -p $CONDA_ENV_DIR python=$PYTHON_VERSION cmake curl + source activate $CONDA_ENV_DIR + + python --version + which python + + # faster builds, please + conda install -y -q nomkl pip numpy pandas cython +} + +build_pyarrow() { + # Other stuff pip install + pushd $ARROW_PYTHON_DIR + pip install -r requirements.txt + python setup.py build_ext --with-parquet --with-plasma \ + install --single-version-externally-managed --record=record.text + popd + + python -c "import pyarrow.parquet" + python -c "import pyarrow.plasma" + + export PYARROW_PATH=$CONDA_PREFIX/lib/python$PYTHON_VERSION/site-packages/pyarrow +} + build_arrow() { mkdir -p $ARROW_CPP_BUILD_DIR pushd $ARROW_CPP_BUILD_DIR @@ -35,6 +63,7 @@ build_arrow() { -DCMAKE_BUILD_TYPE=$BUILD_TYPE \ -DCMAKE_INSTALL_PREFIX=$ARROW_HOME \ -DARROW_NO_DEPRECATED_API=ON \ + -DARROW_PYTHON=ON \ -DARROW_PLASMA=ON \ -DARROW_BOOST_USE_SHARED=off \ $ARROW_CPP_DIR diff --git a/python/testing/hdfs/Dockerfile b/python/testing/hdfs/Dockerfile index 01017069e18..0d4041a4408 100644 --- a/python/testing/hdfs/Dockerfile +++ b/python/testing/hdfs/Dockerfile @@ -18,11 +18,19 @@ # TODO Replace this with a complete clean image build FROM cpcloud86/impala:metastore -RUN sudo apt-add-repository -y ppa:ubuntu-toolchain-r/test && \ - sudo apt-get update && \ - sudo apt-get install -y \ +USER root + +RUN apt-add-repository -y ppa:ubuntu-toolchain-r/test && \ + apt-get update && \ + apt-get install -y \ gcc-4.9 \ g++-4.9 \ + build-essential \ + autotools-dev \ + autoconf \ + gtk-doc-tools \ + autoconf-archive \ + libgirepository1.0-dev \ libtool \ libjemalloc-dev \ ccache \ @@ -30,10 +38,12 @@ RUN sudo apt-add-repository -y ppa:ubuntu-toolchain-r/test && \ gdb RUN wget -O - http://llvm.org/apt/llvm-snapshot.gpg.key|sudo apt-key add - && \ - sudo apt-add-repository -y \ + apt-add-repository -y \ "deb http://llvm.org/apt/trusty/ llvm-toolchain-trusty-4.0 main" && \ - sudo apt-get update && \ - sudo apt-get install -y clang-4.0 clang-format-4.0 clang-tidy-4.0 + apt-get update && \ + apt-get install -y clang-4.0 clang-format-4.0 clang-tidy-4.0 + +USER ubuntu RUN wget -O /tmp/miniconda.sh https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh && \ bash /tmp/miniconda.sh -b -p /home/ubuntu/miniconda && \ diff --git a/python/testing/hdfs/restart_docker_container.sh b/python/testing/hdfs/restart_docker_container.sh index cd6b194de43..a8b05a1bdf4 100644 --- a/python/testing/hdfs/restart_docker_container.sh +++ b/python/testing/hdfs/restart_docker_container.sh @@ -29,6 +29,7 @@ docker rm $ARROW_TEST_NN_HOST docker run -d -it --name $ARROW_TEST_NN_HOST \ -v $PWD:/io \ --hostname $ARROW_TEST_NN_HOST \ + --shm-size=2gb \ -p $ARROW_TEST_WEBHDFS_PORT -p $ARROW_TEST_IMPALA_PORT \ arrow-hdfs-test diff --git a/python/testing/hdfs/run_tests.sh b/python/testing/hdfs/run_tests.sh index 09631ffd880..49b8fd84a4c 100755 --- a/python/testing/hdfs/run_tests.sh +++ b/python/testing/hdfs/run_tests.sh @@ -28,7 +28,14 @@ source $HERE/../functions.sh git clone https://github.com/apache/arrow.git $ARROW_CHECKOUT use_clang + +bootstrap_python_env 3.6 + build_arrow build_parquet +build_pyarrow + $ARROW_CPP_BUILD_DIR/debug/io-hdfs-test + +python -m pytest -vv -r sxX -s $PYARROW_PATH --parquet --hdfs diff --git a/python/testing/set_env_common.sh b/python/testing/set_env_common.sh index a57ce72621b..87e9e626539 100644 --- a/python/testing/set_env_common.sh +++ b/python/testing/set_env_common.sh @@ -48,7 +48,6 @@ export PARQUET_BUILD_TOOLCHAIN=$CPP_TOOLCHAIN export BOOST_ROOT=$CPP_TOOLCHAIN export PATH=$CPP_TOOLCHAIN/bin:$PATH export LD_LIBRARY_PATH=$CPP_TOOLCHAIN/lib:$LD_LIBRARY_PATH -export MAKE=ninja export VALGRIND="valgrind --tool=memcheck" diff --git a/python/testing/test_hdfs.sh b/python/testing/test_hdfs.sh index 7801361dd5c..5df94795695 100755 --- a/python/testing/test_hdfs.sh +++ b/python/testing/test_hdfs.sh @@ -22,3 +22,4 @@ set -ex docker build -t arrow-hdfs-test -f hdfs/Dockerfile . bash hdfs/restart_docker_container.sh docker exec -it arrow-hdfs /io/hdfs/run_tests.sh +docker stop arrow-hdfs From d12eea48156a3917d54e4ea4918f866559ffb125 Mon Sep 17 00:00:00 2001 From: Wes McKinney Date: Thu, 27 Jul 2017 11:18:09 -0400 Subject: [PATCH 3/5] Fix license headers Change-Id: I9819fb4f79ae202164dc4cf41c8d35961cff2589 --- python/testing/functions.sh | 2 +- python/testing/hdfs/Dockerfile | 2 +- python/testing/hdfs/libhdfs3-hdfs-client.xml | 2 +- python/testing/hdfs/restart_docker_container.sh | 2 +- python/testing/hdfs/run_tests.sh | 4 ++-- python/testing/set_env_common.sh | 4 ++-- python/testing/setup_toolchain.sh | 4 ++-- python/testing/test_hdfs.sh | 2 +- 8 files changed, 11 insertions(+), 11 deletions(-) diff --git a/python/testing/functions.sh b/python/testing/functions.sh index 69f1b47bcdd..6bc342bd794 100644 --- a/python/testing/functions.sh +++ b/python/testing/functions.sh @@ -8,7 +8,7 @@ # "License"); you may not use this file except in compliance # with the License. You may obtain a copy of the License at # -# http:#www.apache.org/licenses/LICENSE-2.0 +# http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, # software distributed under the License is distributed on an diff --git a/python/testing/hdfs/Dockerfile b/python/testing/hdfs/Dockerfile index 0d4041a4408..97355137ff3 100644 --- a/python/testing/hdfs/Dockerfile +++ b/python/testing/hdfs/Dockerfile @@ -6,7 +6,7 @@ # "License"); you may not use this file except in compliance # with the License. You may obtain a copy of the License at # -# http:#www.apache.org/licenses/LICENSE-2.0 +# http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, # software distributed under the License is distributed on an diff --git a/python/testing/hdfs/libhdfs3-hdfs-client.xml b/python/testing/hdfs/libhdfs3-hdfs-client.xml index fc651516c63..f929929b386 100644 --- a/python/testing/hdfs/libhdfs3-hdfs-client.xml +++ b/python/testing/hdfs/libhdfs3-hdfs-client.xml @@ -39,7 +39,7 @@ KDC --> dfs.ha.namenodes.phdcluster nn1,nn2 - + 6 dfs.namenode.rpc-address.phdcluster.nn1 diff --git a/python/testing/hdfs/restart_docker_container.sh b/python/testing/hdfs/restart_docker_container.sh index a8b05a1bdf4..15076cc2873 100644 --- a/python/testing/hdfs/restart_docker_container.sh +++ b/python/testing/hdfs/restart_docker_container.sh @@ -8,7 +8,7 @@ # "License"); you may not use this file except in compliance # with the License. You may obtain a copy of the License at # -# http:#www.apache.org/licenses/LICENSE-2.0 +# http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, # software distributed under the License is distributed on an diff --git a/python/testing/hdfs/run_tests.sh b/python/testing/hdfs/run_tests.sh index 49b8fd84a4c..e0d36df58a3 100755 --- a/python/testing/hdfs/run_tests.sh +++ b/python/testing/hdfs/run_tests.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash - +# # Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information @@ -8,7 +8,7 @@ # "License"); you may not use this file except in compliance # with the License. You may obtain a copy of the License at # -# http:#www.apache.org/licenses/LICENSE-2.0 +# http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, # software distributed under the License is distributed on an diff --git a/python/testing/set_env_common.sh b/python/testing/set_env_common.sh index 87e9e626539..00251f92be4 100644 --- a/python/testing/set_env_common.sh +++ b/python/testing/set_env_common.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash - +# # Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information @@ -8,7 +8,7 @@ # "License"); you may not use this file except in compliance # with the License. You may obtain a copy of the License at # -# http:#www.apache.org/licenses/LICENSE-2.0 +# http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, # software distributed under the License is distributed on an diff --git a/python/testing/setup_toolchain.sh b/python/testing/setup_toolchain.sh index d6d1231284e..c3837b45cbc 100644 --- a/python/testing/setup_toolchain.sh +++ b/python/testing/setup_toolchain.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash - +# # Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information @@ -8,7 +8,7 @@ # "License"); you may not use this file except in compliance # with the License. You may obtain a copy of the License at # -# http:#www.apache.org/licenses/LICENSE-2.0 +# http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, # software distributed under the License is distributed on an diff --git a/python/testing/test_hdfs.sh b/python/testing/test_hdfs.sh index 5df94795695..e764752869d 100755 --- a/python/testing/test_hdfs.sh +++ b/python/testing/test_hdfs.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash - +# # Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information From 4effee785c125a432c37a8d7daa96bd59725c439 Mon Sep 17 00:00:00 2001 From: Wes McKinney Date: Thu, 27 Jul 2017 11:21:36 -0400 Subject: [PATCH 4/5] Fix license header Change-Id: I820f8eb707df50c6d12602fe2d816c80b1402ee1 --- python/testing/README.md | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/python/testing/README.md b/python/testing/README.md index 2498445fe92..07970a231b5 100644 --- a/python/testing/README.md +++ b/python/testing/README.md @@ -1,15 +1,20 @@ # Testing tools for odds and ends From a96e166564409cf3a934d64f8856b1c0eed5da60 Mon Sep 17 00:00:00 2001 From: Wes McKinney Date: Thu, 27 Jul 2017 11:22:15 -0400 Subject: [PATCH 5/5] Fix header Change-Id: Ib247a679667a40365846507b6ea9795660226272 --- python/testing/test_hdfs.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/testing/test_hdfs.sh b/python/testing/test_hdfs.sh index e764752869d..016e54a66a6 100755 --- a/python/testing/test_hdfs.sh +++ b/python/testing/test_hdfs.sh @@ -8,7 +8,7 @@ # "License"); you may not use this file except in compliance # with the License. You may obtain a copy of the License at # -# http:#www.apache.org/licenses/LICENSE-2.0 +# http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, # software distributed under the License is distributed on an