diff --git a/ci/etc/valgrind-cran.supp b/ci/etc/valgrind-cran.supp new file mode 100644 index 000000000000..4d2922026082 --- /dev/null +++ b/ci/etc/valgrind-cran.supp @@ -0,0 +1,34 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +{ + # `testthat::skip()`s cause a valgrind error that does not show up on CRAN. + + Memcheck:Cond + fun:gregexpr_Regexc + fun:do_regexpr + fun:bcEval + fun:Rf_eval + fun:R_execClosure + fun:Rf_applyClosure + fun:bcEval + fun:Rf_eval + fun:forcePromise + fun:FORCE_PROMISE + fun:getvar + fun:bcEval +} diff --git a/ci/scripts/r_valgrind.sh b/ci/scripts/r_valgrind.sh new file mode 100755 index 000000000000..43f8c26739af --- /dev/null +++ b/ci/scripts/r_valgrind.sh @@ -0,0 +1,45 @@ +#!/usr/bin/env bash +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -ex + +: ${R_BIN:=RDvalgrind} + +source_dir=${1}/r + +${R_BIN} CMD INSTALL ${source_dir} +pushd ${source_dir}/tests + +export TEST_R_WITH_ARROW=TRUE +# to generate suppression files run: +# ${R_BIN} --vanilla -d "valgrind --tool=memcheck --leak-check=full --track-origins=yes --gen-suppressions=all --log-file=memcheck.log" -f testtthat.supp +${R_BIN} --vanilla -d "valgrind --tool=memcheck --leak-check=full --track-origins=yes --suppressions=/${1}/ci/etc/valgrind-cran.supp" -f testthat.R |& tee testthat.out + +# valgrind --error-exitcode=1 should return an erroring exit code that we can catch, +# but R eats that and returns 0, so we need to look at the output and make sure that +# we have 0 errors instead. +if [ $(grep -c "ERROR SUMMARY: 0 errors" testthat.out) != 1 ]; then + cat testthat.out + echo "Found Valgrind errors" + exit 1 +fi + +# We might also considering using the greps that LibthGBM uses: +# https://github.com/microsoft/LightGBM/blob/fa6d356555f9ef888acf5f5e259dca958ca24f6d/.ci/test_r_package_valgrind.sh#L20-L85 + +popd \ No newline at end of file diff --git a/dev/tasks/tasks.yml b/dev/tasks/tasks.yml index f82960bf3537..832d16d9f69b 100644 --- a/dev/tasks/tasks.yml +++ b/dev/tasks/tasks.yml @@ -862,6 +862,14 @@ tasks: FEDORA: 33 run: fedora-python + test-r-linux-valgrind: + ci: azure + template: docker-tests/azure.linux.yml + params: + env: + UBUNTU: 18.04 + run: ubuntu-r-valgrind + test-r-linux-as-cran: ci: github template: r/github.linux.cran.yml diff --git a/docker-compose.yml b/docker-compose.yml index 4158ee3ff649..d9ca731dbd3f 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -116,6 +116,7 @@ x-hierarchy: - ubuntu-cpp-sanitizer - ubuntu-cpp-thread-sanitizer - ubuntu-r-sanitizer + - ubuntu-r-valgrind - python-sdist - r # helper services @@ -1050,6 +1051,31 @@ services: /bin/bash -c " /arrow/ci/scripts/r_sanitize.sh /arrow" + ubuntu-r-valgrind: + # Only 18.04 and amd64 supported + # Usage: + # docker-compose build ubuntu-r-valgrind + # docker-compose run ubuntu-r-valgrind + image: ${REPO}:amd64-ubuntu-18.04-r-valgrind + build: + context: . + dockerfile: ci/docker/linux-r.dockerfile + cache_from: + - ${REPO}:amd64-ubuntu-18.04-r-valgrind + args: + base: wch1/r-debug:latest + r_bin: RDvalgrind + environment: + <<: *ccache + # AVX512 not supported by Valgrind (similar to ARROW-9851) some runners support AVX512 and some do not + # so some build might pass without this setting, but we want to ensure that we stay to AVX2 regardless of runner. + EXTRA_CMAKE_FLAGS: "-DARROW_RUNTIME_SIMD_LEVEL=AVX2" + volumes: *ubuntu-volumes + command: > + /bin/bash -c " + /arrow/ci/scripts/r_valgrind.sh /arrow" + + ################################# Go ######################################## debian-go: diff --git a/r/inst/build_arrow_static.sh b/r/inst/build_arrow_static.sh index e9c82a10e400..6fba5be34bcf 100755 --- a/r/inst/build_arrow_static.sh +++ b/r/inst/build_arrow_static.sh @@ -53,8 +53,8 @@ ${CMAKE} -DARROW_BOOST_USE_SHARED=OFF \ -DARROW_DATASET=${ARROW_DATASET:-ON} \ -DARROW_DEPENDENCY_SOURCE=BUNDLED \ -DARROW_FILESYSTEM=ON \ - -DARROW_JEMALLOC=${ARROW_JEMALLOC:-ON} \ - -DARROW_MIMALLOC=${ARROW_MIMALLOC:-$ARROW_DEFAULT_PARAM} \ + -DARROW_JEMALLOC=${ARROW_JEMALLOC:-$ARROW_DEFAULT_PARAM} \ + -DARROW_MIMALLOC=${ARROW_MIMALLOC:-ON} \ -DARROW_JSON=ON \ -DARROW_PARQUET=${ARROW_PARQUET:-ON} \ -DARROW_S3=${ARROW_S3:-$ARROW_DEFAULT_PARAM} \ diff --git a/r/tests/testthat/helper-skip.R b/r/tests/testthat/helper-skip.R index 6fb97da000d8..b1c7d66bec84 100644 --- a/r/tests/testthat/helper-skip.R +++ b/r/tests/testthat/helper-skip.R @@ -22,6 +22,11 @@ build_features <- c( ) skip_if_not_available <- function(feature) { + if (feature == "re2") { + # RE2 does not support valgrind (on purpose): https://github.com/google/re2/issues/177 + skip_on_valgrind() + } + yes <- feature %in% names(build_features) && build_features[feature] if (!yes) { skip(paste("Arrow C++ not built with", feature)) @@ -29,6 +34,8 @@ skip_if_not_available <- function(feature) { } skip_if_no_pyarrow <- function() { + skip_on_valgrind() + skip_if_not_installed("reticulate") if (!reticulate::py_module_available("pyarrow")) { skip("pyarrow not available for testing") @@ -49,6 +56,18 @@ skip_if_not_running_large_memory_tests <- function() { ) } +skip_on_valgrind <- function() { + # This does not actually skip on valgrind because we can't exactly detect it. + # Instead, it skips on CRAN when the OS is linux + and the R version is development + # (which is where valgrind is run as of this code) + linux_dev <- identical(tolower(Sys.info()[["sysname"]]), "linux") && + grepl("devel", R.version.string) + + if (linux_dev) { + skip_on_cran() + } +} + process_is_running <- function(x) { cmd <- sprintf("ps aux | grep '%s' | grep -v grep", x) tryCatch(system(cmd, ignore.stdout = TRUE) == 0, error = function(e) FALSE) diff --git a/r/tests/testthat/test-Array.R b/r/tests/testthat/test-Array.R index e064f81cdfaf..26d0a3005e42 100644 --- a/r/tests/testthat/test-Array.R +++ b/r/tests/testthat/test-Array.R @@ -53,6 +53,7 @@ test_that("binary Array", { expect_array_roundtrip(bin, fixed_size_binary(byte_width = 10)) # degenerate cases + skip_on_valgrind() # valgrind errors on these tests ARROW-12638 bin <- vctrs::new_vctr( list(1:10), class = "arrow_binary" diff --git a/r/tests/testthat/test-arrow.R b/r/tests/testthat/test-arrow.R index 2c901e1c96a0..2ab127e48000 100644 --- a/r/tests/testthat/test-arrow.R +++ b/r/tests/testthat/test-arrow.R @@ -62,6 +62,10 @@ test_that("check for an ArrowObject in functions use std::shared_ptr", { }) test_that("MemoryPool calls gc() to free memory when allocation fails (ARROW-10080)", { + # There is a valgrind error on this test because there cannot be memory allocated + # which is exactly what this test is checking, but we quiet this + skip_on_valgrind() + env <- new.env() trace(gc, print = FALSE, tracer = function() { env$gc_was_called <- TRUE diff --git a/r/tests/testthat/test-compute-aggregate.R b/r/tests/testthat/test-compute-aggregate.R index 0621b7779c73..51bfa05b9164 100644 --- a/r/tests/testthat/test-compute-aggregate.R +++ b/r/tests/testthat/test-compute-aggregate.R @@ -37,7 +37,11 @@ test_that("sum.Array", { floats <- c(floats, NA) na <- Array$create(floats) - expect_identical(as.numeric(sum(na)), sum(floats)) + if (!grepl("devel", R.version.string)) { + # Valgrind on R-devel confuses NaN and NA_real_ + # https://r.789695.n4.nabble.com/Difference-in-NA-behavior-in-R-devel-running-under-valgrind-td4768731.html + expect_identical(as.numeric(sum(na)), sum(floats)) + } expect_r6_class(sum(na, na.rm = TRUE), "Scalar") expect_identical(as.numeric(sum(na, na.rm = TRUE)), sum(floats, na.rm = TRUE)) @@ -78,7 +82,11 @@ test_that("mean.Array", { floats <- c(floats, NA) na <- Array$create(floats) - expect_identical(as.vector(mean(na)), mean(floats)) + if (!grepl("devel", R.version.string)) { + # Valgrind on R-devel confuses NaN and NA_real_ + # https://r.789695.n4.nabble.com/Difference-in-NA-behavior-in-R-devel-running-under-valgrind-td4768731.html + expect_identical(as.vector(mean(na)), mean(floats)) + } expect_r6_class(mean(na, na.rm = TRUE), "Scalar") expect_identical(as.vector(mean(na, na.rm = TRUE)), mean(floats, na.rm = TRUE))