diff --git a/.clang-format b/.clang-format index 06453dfbb25..9448dc8d8c8 100644 --- a/.clang-format +++ b/.clang-format @@ -15,6 +15,7 @@ # specific language governing permissions and limitations # under the License. --- -BasedOnStyle: Google -DerivePointerAlignment: false +BasedOnStyle: Google ColumnLimit: 90 +DerivePointerAlignment: false +IncludeBlocks: Preserve diff --git a/.env b/.env index cf3e6331e2c..7dcfa306bf0 100644 --- a/.env +++ b/.env @@ -51,7 +51,7 @@ FEDORA=33 UBUNTU=20.04 # Default versions for various dependencies -CLANG_TOOLS=8 +CLANG_TOOLS=12 CUDA=9.1 DASK=latest DOTNET=3.1 diff --git a/ci/docker/ubuntu-20.04-cpp.dockerfile b/ci/docker/ubuntu-20.04-cpp.dockerfile index c50f71bc913..7bd99aa1fb0 100644 --- a/ci/docker/ubuntu-20.04-cpp.dockerfile +++ b/ci/docker/ubuntu-20.04-cpp.dockerfile @@ -29,20 +29,27 @@ RUN echo "debconf debconf/frontend select Noninteractive" | \ # while debugging package list with docker build. ARG clang_tools ARG llvm -RUN if [ "${llvm}" -gt "10" ]; then \ +RUN latest_system_llvm=10 && \ + if [ ${llvm} -gt ${latest_system_llvm} -o \ + ${clang_tools} -gt ${latest_system_llvm} ]; then \ apt-get update -y -q && \ apt-get install -y -q --no-install-recommends \ apt-transport-https \ ca-certificates \ gnupg \ + lsb-release \ wget && \ wget -O - https://apt.llvm.org/llvm-snapshot.gpg.key | apt-key add - && \ - echo "deb https://apt.llvm.org/focal/ llvm-toolchain-focal-${llvm} main" > \ - /etc/apt/sources.list.d/llvm.list && \ - if [ "${clang_tools}" != "${llvm}" -a "${clang_tools}" -gt 10 ]; then \ - echo "deb https://apt.llvm.org/focal/ llvm-toolchain-focal-${clang_tools} main" > \ + code_name=$(lsb_release --codename --short) && \ + if [ ${llvm} -gt 10 ]; then \ + echo "deb https://apt.llvm.org/${code_name}/ llvm-toolchain-${code_name}-${llvm} main" > \ + /etc/apt/sources.list.d/llvm.list; \ + fi && \ + if [ ${clang_tools} -ne ${llvm} -a \ + ${clang_tools} -gt ${latest_system_llvm} ]; then \ + echo "deb https://apt.llvm.org/${code_name}/ llvm-toolchain-${code_name}-${clang_tools} main" > \ /etc/apt/sources.list.d/clang-tools.list; \ - fi \ + fi; \ fi && \ apt-get update -y -q && \ apt-get install -y -q --no-install-recommends \ diff --git a/ci/docker/ubuntu-21.04-cpp.dockerfile b/ci/docker/ubuntu-21.04-cpp.dockerfile index 2017d26b5bd..a6a88040540 100644 --- a/ci/docker/ubuntu-21.04-cpp.dockerfile +++ b/ci/docker/ubuntu-21.04-cpp.dockerfile @@ -15,7 +15,7 @@ # specific language governing permissions and limitations # under the License. -ARG base=amd64/ubuntu:20.04 +ARG base=amd64/ubuntu:21.04 FROM ${base} SHELL ["/bin/bash", "-o", "pipefail", "-c"] @@ -29,20 +29,27 @@ RUN echo "debconf debconf/frontend select Noninteractive" | \ # while debugging package list with docker build. ARG clang_tools ARG llvm -RUN if [ "${llvm}" -gt "10" ]; then \ +RUN latest_system_llvm=12 && \ + if [ ${llvm} -gt ${latest_system_llvm} -o \ + ${clang_tools} -gt ${latest_system_llvm} ]; then \ apt-get update -y -q && \ apt-get install -y -q --no-install-recommends \ apt-transport-https \ ca-certificates \ gnupg \ + lsb-release \ wget && \ wget -O - https://apt.llvm.org/llvm-snapshot.gpg.key | apt-key add - && \ - echo "deb https://apt.llvm.org/hirsute/ llvm-toolchain-hirsute-${llvm} main" > \ - /etc/apt/sources.list.d/llvm.list && \ - if [ "${clang_tools}" != "${llvm}" -a "${clang_tools}" -gt 10 ]; then \ - echo "deb https://apt.llvm.org/hirsute/ llvm-toolchain-hirsute-${clang_tools} main" > \ + code_name=$(lsb_release --codename --short) && \ + if [ ${llvm} -gt 10 ]; then \ + echo "deb https://apt.llvm.org/${code_name}/ llvm-toolchain-${code_name}-${llvm} main" > \ + /etc/apt/sources.list.d/llvm.list; \ + fi && \ + if [ ${clang_tools} -ne ${llvm} -a \ + ${clang_tools} -gt ${latest_system_llvm} ]; then \ + echo "deb https://apt.llvm.org/${code_name}/ llvm-toolchain-${code_name}-${clang_tools} main" > \ /etc/apt/sources.list.d/clang-tools.list; \ - fi \ + fi; \ fi && \ apt-get update -y -q && \ apt-get install -y -q --no-install-recommends \ diff --git a/cpp/src/arrow/compute/exec/expression_test.cc b/cpp/src/arrow/compute/exec/expression_test.cc index 94ca2074835..fa05ddf6422 100644 --- a/cpp/src/arrow/compute/exec/expression_test.cc +++ b/cpp/src/arrow/compute/exec/expression_test.cc @@ -1222,8 +1222,9 @@ TEST(Expression, SingleComparisonGuarantees) { all = false; } } - Simplify{filter}.WithGuarantee(guarantee).Expect( - all ? literal(true) : none ? literal(false) : filter); + Simplify{filter}.WithGuarantee(guarantee).Expect(all ? literal(true) + : none ? literal(false) + : filter); } } } diff --git a/cpp/src/arrow/compute/exec/hash_join_node_test.cc b/cpp/src/arrow/compute/exec/hash_join_node_test.cc index 13e2e65d675..b8a18893da8 100644 --- a/cpp/src/arrow/compute/exec/hash_join_node_test.cc +++ b/cpp/src/arrow/compute/exec/hash_join_node_test.cc @@ -281,8 +281,10 @@ struct RandomDataTypeConstraints { void OnlyInt(int int_size, bool allow_nulls) { Default(); - data_type_enabled_mask = - int_size == 8 ? kInt8 : int_size == 4 ? kInt4 : int_size == 2 ? kInt2 : kInt1; + data_type_enabled_mask = int_size == 8 ? kInt8 + : int_size == 4 ? kInt4 + : int_size == 2 ? kInt2 + : kInt1; if (!allow_nulls) { max_null_probability = 0.0; } @@ -1166,12 +1168,12 @@ void TestHashJoinDictionaryHelper( int expected_num_r_no_match, // Whether to swap two inputs to the hash join bool swap_sides) { - int64_t l_length = l_key.is_array() - ? l_key.array()->length - : l_payload.is_array() ? l_payload.array()->length : -1; - int64_t r_length = r_key.is_array() - ? r_key.array()->length - : r_payload.is_array() ? r_payload.array()->length : -1; + int64_t l_length = l_key.is_array() ? l_key.array()->length + : l_payload.is_array() ? l_payload.array()->length + : -1; + int64_t r_length = r_key.is_array() ? r_key.array()->length + : r_payload.is_array() ? r_payload.array()->length + : -1; ARROW_DCHECK(l_length >= 0 && r_length >= 0); constexpr int batch_multiplicity_for_parallel = 2; diff --git a/cpp/src/arrow/compute/exec/key_encode.cc b/cpp/src/arrow/compute/exec/key_encode.cc index e113af5fc5c..d5e208bfb74 100644 --- a/cpp/src/arrow/compute/exec/key_encode.cc +++ b/cpp/src/arrow/compute/exec/key_encode.cc @@ -572,10 +572,14 @@ void KeyEncoder::EncoderBinaryPair::Decode(uint32_t start_row, uint32_t num_rows uint32_t col_width1 = col_prep[0].metadata().fixed_length; uint32_t col_width2 = col_prep[1].metadata().fixed_length; - int log_col_width1 = - col_width1 == 8 ? 3 : col_width1 == 4 ? 2 : col_width1 == 2 ? 1 : 0; - int log_col_width2 = - col_width2 == 8 ? 3 : col_width2 == 4 ? 2 : col_width2 == 2 ? 1 : 0; + int log_col_width1 = col_width1 == 8 ? 3 + : col_width1 == 4 ? 2 + : col_width1 == 2 ? 1 + : 0; + int log_col_width2 = col_width2 == 8 ? 3 + : col_width2 == 4 ? 2 + : col_width2 == 2 ? 1 + : 0; bool is_row_fixed_length = rows.metadata().is_fixed_length; diff --git a/cpp/src/arrow/compute/exec/key_map.h b/cpp/src/arrow/compute/exec/key_map.h index cf539f4a99b..12c1e393c4a 100644 --- a/cpp/src/arrow/compute/exec/key_map.h +++ b/cpp/src/arrow/compute/exec/key_map.h @@ -153,8 +153,10 @@ class SwissTable { static int num_groupid_bits_from_log_blocks(int log_blocks) { int required_bits = log_blocks + 3; - return required_bits <= 8 ? 8 - : required_bits <= 16 ? 16 : required_bits <= 32 ? 32 : 64; + return required_bits <= 8 ? 8 + : required_bits <= 16 ? 16 + : required_bits <= 32 ? 32 + : 64; } // Use 32-bit hash for now diff --git a/cpp/src/arrow/io/memory_benchmark.cc b/cpp/src/arrow/io/memory_benchmark.cc index fbb34f38654..6af1807d161 100644 --- a/cpp/src/arrow/io/memory_benchmark.cc +++ b/cpp/src/arrow/io/memory_benchmark.cc @@ -49,10 +49,10 @@ using VectorType = __m512i; #define VectorSet _mm512_set1_epi32 #define VectorLoad _mm512_stream_load_si512 #define VectorLoadAsm(SRC, DST) \ - asm volatile("vmovaps %[src], %[dst]" : [ dst ] "=v"(DST) : [ src ] "m"(SRC) :) + asm volatile("vmovaps %[src], %[dst]" : [dst] "=v"(DST) : [src] "m"(SRC) :) #define VectorStreamLoad _mm512_stream_load_si512 #define VectorStreamLoadAsm(SRC, DST) \ - asm volatile("vmovntdqa %[src], %[dst]" : [ dst ] "=v"(DST) : [ src ] "m"(SRC) :) + asm volatile("vmovntdqa %[src], %[dst]" : [dst] "=v"(DST) : [src] "m"(SRC) :) #define VectorStreamWrite _mm512_stream_si512 #else @@ -63,10 +63,10 @@ using VectorType = __m256i; #define VectorSet _mm256_set1_epi32 #define VectorLoad _mm256_stream_load_si256 #define VectorLoadAsm(SRC, DST) \ - asm volatile("vmovaps %[src], %[dst]" : [ dst ] "=v"(DST) : [ src ] "m"(SRC) :) + asm volatile("vmovaps %[src], %[dst]" : [dst] "=v"(DST) : [src] "m"(SRC) :) #define VectorStreamLoad _mm256_stream_load_si256 #define VectorStreamLoadAsm(SRC, DST) \ - asm volatile("vmovntdqa %[src], %[dst]" : [ dst ] "=v"(DST) : [ src ] "m"(SRC) :) + asm volatile("vmovntdqa %[src], %[dst]" : [dst] "=v"(DST) : [src] "m"(SRC) :) #define VectorStreamWrite _mm256_stream_si256 #else // ARROW_HAVE_AVX2 not set @@ -75,10 +75,10 @@ using VectorType = __m128i; #define VectorSet _mm_set1_epi32 #define VectorLoad _mm_stream_load_si128 #define VectorLoadAsm(SRC, DST) \ - asm volatile("movaps %[src], %[dst]" : [ dst ] "=x"(DST) : [ src ] "m"(SRC) :) + asm volatile("movaps %[src], %[dst]" : [dst] "=x"(DST) : [src] "m"(SRC) :) #define VectorStreamLoad _mm_stream_load_si128 #define VectorStreamLoadAsm(SRC, DST) \ - asm volatile("movntdqa %[src], %[dst]" : [ dst ] "=x"(DST) : [ src ] "m"(SRC) :) + asm volatile("movntdqa %[src], %[dst]" : [dst] "=x"(DST) : [src] "m"(SRC) :) #define VectorStreamWrite _mm_stream_si128 #endif // ARROW_HAVE_AVX2 @@ -164,14 +164,14 @@ using VectorTypeDual = uint8x16x2_t; static void armv8_stream_load_pair(VectorType* src, VectorType* dst) { asm volatile("LDNP %[reg1], %[reg2], [%[from]]\n\t" - : [ reg1 ] "+r"(*dst), [ reg2 ] "+r"(*(dst + 1)) - : [ from ] "r"(src)); + : [reg1] "+r"(*dst), [reg2] "+r"(*(dst + 1)) + : [from] "r"(src)); } static void armv8_stream_store_pair(VectorType* src, VectorType* dst) { asm volatile("STNP %[reg1], %[reg2], [%[to]]\n\t" - : [ to ] "+r"(dst) - : [ reg1 ] "r"(*src), [ reg2 ] "r"(*(src + 1)) + : [to] "+r"(dst) + : [reg1] "r"(*src), [reg2] "r"(*(src + 1)) : "memory"); } @@ -179,7 +179,7 @@ static void armv8_stream_ldst_pair(VectorType* src, VectorType* dst) { asm volatile( "LDNP q1, q2, [%[from]]\n\t" "STNP q1, q2, [%[to]]\n\t" - : [ from ] "+r"(src), [ to ] "+r"(dst) + : [from] "+r"(src), [to] "+r"(dst) : : "memory", "v0", "v1", "v2", "v3"); } diff --git a/cpp/src/arrow/testing/util.cc b/cpp/src/arrow/testing/util.cc index 41297804884..19917185130 100644 --- a/cpp/src/arrow/testing/util.cc +++ b/cpp/src/arrow/testing/util.cc @@ -38,8 +38,8 @@ #endif #include "arrow/table.h" -#include "arrow/type.h" #include "arrow/testing/random.h" +#include "arrow/type.h" #include "arrow/util/io_util.h" #include "arrow/util/logging.h" #include "arrow/util/pcg_random.h" diff --git a/cpp/src/arrow/util/basic_decimal.cc b/cpp/src/arrow/util/basic_decimal.cc index 5f48613361d..0fbed2c47da 100644 --- a/cpp/src/arrow/util/basic_decimal.cc +++ b/cpp/src/arrow/util/basic_decimal.cc @@ -1353,9 +1353,9 @@ bool operator<(const BasicDecimal256& left, const BasicDecimal256& right) { const auto rhs_le = bit_util::little_endian::Make(right.native_endian_array()); return lhs_le[3] != rhs_le[3] ? static_cast(lhs_le[3]) < static_cast(rhs_le[3]) - : lhs_le[2] != rhs_le[2] ? lhs_le[2] < rhs_le[2] - : lhs_le[1] != rhs_le[1] ? lhs_le[1] < rhs_le[1] - : lhs_le[0] < rhs_le[0]; + : lhs_le[2] != rhs_le[2] ? lhs_le[2] < rhs_le[2] + : lhs_le[1] != rhs_le[1] ? lhs_le[1] < rhs_le[1] + : lhs_le[0] < rhs_le[0]; } BasicDecimal256 operator-(const BasicDecimal256& operand) { diff --git a/cpp/src/gandiva/expression_registry.cc b/cpp/src/gandiva/expression_registry.cc index c3a08fd3a4f..9bff97f5ad2 100644 --- a/cpp/src/gandiva/expression_registry.cc +++ b/cpp/src/gandiva/expression_registry.cc @@ -60,8 +60,8 @@ FunctionSignature ExpressionRegistry::FunctionSignatureIterator::operator*() { return *func_sig_it_; } -ExpressionRegistry::func_sig_iterator_type ExpressionRegistry::FunctionSignatureIterator:: -operator++(int increment) { +ExpressionRegistry::func_sig_iterator_type +ExpressionRegistry::FunctionSignatureIterator::operator++(int increment) { ++func_sig_it_; // point func_sig_it_ to first signature of next nativefunction if func_sig_it_ is // pointing to end diff --git a/cpp/src/parquet/arrow/reconstruct_internal_test.cc b/cpp/src/parquet/arrow/reconstruct_internal_test.cc index 495b69f9eab..9254e774b3e 100644 --- a/cpp/src/parquet/arrow/reconstruct_internal_test.cc +++ b/cpp/src/parquet/arrow/reconstruct_internal_test.cc @@ -113,9 +113,10 @@ class FileBuilder { auto typed_writer = checked_cast>*>(column_writer); - const int64_t num_values = static_cast( - (max_def_level > 0) ? def_levels.size() - : (max_rep_level > 0) ? rep_levels.size() : values.size()); + const int64_t num_values = + static_cast((max_def_level > 0) ? def_levels.size() + : (max_rep_level > 0) ? rep_levels.size() + : values.size()); const int64_t values_written = typed_writer->WriteBatch( num_values, LevelPointerOrNull(def_levels, max_def_level), LevelPointerOrNull(rep_levels, max_rep_level), values.data()); diff --git a/dev/tasks/tasks.yml b/dev/tasks/tasks.yml index 5e7b84bd417..33ccbc4de44 100644 --- a/dev/tasks/tasks.yml +++ b/dev/tasks/tasks.yml @@ -1186,7 +1186,6 @@ tasks: params: env: UBUNTU: 21.04 - CLANG_TOOLS: 9 # can remove this when >=9 is the default R_PRUNE_DEPS: TRUE flags: '-e ARROW_SOURCE_HOME="/arrow" -e FORCE_BUNDLED_BUILD=TRUE -e LIBARROW_BUILD=TRUE' image: ubuntu-r-only-r @@ -1198,7 +1197,6 @@ tasks: params: env: UBUNTU: 21.04 - CLANG_TOOLS: 9 # can remove this when >=9 is the default GCC_VERSION: 11 # S3 support is not buildable with gcc11 right now flags: '-e ARROW_SOURCE_HOME="/arrow" -e FORCE_BUNDLED_BUILD=TRUE -e LIBARROW_BUILD=TRUE -e ARROW_S3=OFF' diff --git a/docs/source/developers/cpp/building.rst b/docs/source/developers/cpp/building.rst index 7245cdb814b..0258412b554 100644 --- a/docs/source/developers/cpp/building.rst +++ b/docs/source/developers/cpp/building.rst @@ -398,7 +398,7 @@ CMake: LLVM and Clang Tools ~~~~~~~~~~~~~~~~~~~~ -We are currently using LLVM 8 for library builds and for other developer tools +We are currently using LLVM for library builds and for other developer tools such as code formatting with ``clang-format``. LLVM can be installed via most modern package managers (apt, yum, conda, Homebrew, vcpkg, chocolatey). diff --git a/docs/source/developers/cpp/development.rst b/docs/source/developers/cpp/development.rst index 4098f1c4e67..bd9e47f14ed 100644 --- a/docs/source/developers/cpp/development.rst +++ b/docs/source/developers/cpp/development.rst @@ -105,9 +105,11 @@ following checks: subcommand to :ref:`Archery `. This can also be fixed locally by running ``archery lint --cpplint --fix``. -In order to account for variations in the behavior of ``clang-format`` between -major versions of LLVM, we pin the version of ``clang-format`` used (current -LLVM 8). +In order to account for variations in the behavior of ``clang-format`` +between major versions of LLVM, we pin the version of ``clang-format`` +used. You can confirm the current pinned version by finding +the ``CLANG_TOOLS`` variable value in `.env +`_. Depending on how you installed clang-format, the build system may not be able to find it. You can provide an explicit path to your LLVM installation (or the diff --git a/r/lint.sh b/r/lint.sh index 629879e0489..91435e7e01a 100755 --- a/r/lint.sh +++ b/r/lint.sh @@ -26,7 +26,16 @@ SOURCE_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" CPP_BUILD_SUPPORT=$SOURCE_DIR/../cpp/build-support # Run clang-format -: ${CLANG_FORMAT:=$(. "${SOURCE_DIR}/../.env" && echo clang-format-${CLANG_TOOLS})} +if [ -z "${CLANG_FORMAT:-}" ]; then + CLANG_TOOLS=$(. "${SOURCE_DIR}/../.env" && echo ${CLANG_TOOLS}) + if type clang-format-${CLANG_TOOLS} >/dev/null 2>&1; then + CLANG_FORMAT=clang-format-${CLANG_TOOLS} + elif type brew >/dev/null 2>&1; then + CLANG_FORMAT=$(brew --prefix llvm@${CLANG_TOOLS})/bin/clang-format + else + CLANG_FORMAT=clang-format + fi +fi $CPP_BUILD_SUPPORT/run_clang_format.py \ --clang_format_binary=$CLANG_FORMAT \ --exclude_glob=$CPP_BUILD_SUPPORT/lint_exclusions.txt \ diff --git a/r/src/.clang-format b/r/src/.clang-format index 06453dfbb25..9448dc8d8c8 100644 --- a/r/src/.clang-format +++ b/r/src/.clang-format @@ -15,6 +15,7 @@ # specific language governing permissions and limitations # under the License. --- -BasedOnStyle: Google -DerivePointerAlignment: false +BasedOnStyle: Google ColumnLimit: 90 +DerivePointerAlignment: false +IncludeBlocks: Preserve diff --git a/r/src/nameof.h b/r/src/nameof.h index ae49880b827..912f0d0f298 100644 --- a/r/src/nameof.h +++ b/r/src/nameof.h @@ -55,9 +55,9 @@ const size_t typename_prefix = search(raw(), "double"); template size_t struct_class_prefix() { #ifdef _MSC_VER - return starts_with(raw() + typename_prefix, "struct ") - ? 7 - : starts_with(raw() + typename_prefix, "class ") ? 6 : 0; + return starts_with(raw() + typename_prefix, "struct ") ? 7 + : starts_with(raw() + typename_prefix, "class ") ? 6 + : 0; #else return 0; #endif diff --git a/r/vignettes/developers/workflow.Rmd b/r/vignettes/developers/workflow.Rmd index e316dd8fe56..2a1ee23ee94 100644 --- a/r/vignettes/developers/workflow.Rmd +++ b/r/vignettes/developers/workflow.Rmd @@ -87,16 +87,18 @@ Fix any style issues before committing with ./lint.sh --fix ``` -The lint script requires Python 3 and `clang-format-8`. If the command +The lint script requires Python 3 and `clang-format`. If the command isn't found, you can explicitly provide the path to it like: ```bash -CLANG_FORMAT=$(which clang-format-8) ./lint.sh +CLANG_FORMAT=/opt/llvm/bin/clang-format ./lint.sh ``` -On macOS, you can get this by installing LLVM via Homebrew and running the script as: +You can see what version of `clang-format` is required by the following +command: + ```bash -CLANG_FORMAT=$(brew --prefix llvm@8)/bin/clang-format ./lint.sh +(. ../.env && echo ${CLANG_TOOLS}) ``` _Note_ that the lint script requires Python 3 and the Python dependencies