diff --git a/ci/scripts/cpp_build.sh b/ci/scripts/cpp_build.sh index d47a6696e8f..46845d0e623 100755 --- a/ci/scripts/cpp_build.sh +++ b/ci/scripts/cpp_build.sh @@ -41,6 +41,11 @@ if [ "${ARROW_USE_CCACHE}" == "ON" ]; then ccache -s fi +if [ "${ARROW_USE_TSAN}" == "ON" ] && [ ! -x "${ASAN_SYMBOLIZER_PATH}" ]; then + echo -e "Invalid value for \$ASAN_SYMBOLIZER_PATH: ${ASAN_SYMBOLIZER_PATH}" + exit 1 +fi + mkdir -p ${build_dir} pushd ${build_dir} diff --git a/cpp/cmake_modules/SetupCxxFlags.cmake b/cpp/cmake_modules/SetupCxxFlags.cmake index 354461cbd27..f12f071642b 100644 --- a/cpp/cmake_modules/SetupCxxFlags.cmake +++ b/cpp/cmake_modules/SetupCxxFlags.cmake @@ -260,7 +260,6 @@ if("${BUILD_WARNING_LEVEL}" STREQUAL "CHECKIN") set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wdocumentation") set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wno-missing-braces") set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wno-unused-parameter") - set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wno-unknown-warning-option") set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wno-constant-logical-operand") elseif(CMAKE_CXX_COMPILER_ID STREQUAL "GNU") set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wall") @@ -342,7 +341,11 @@ if(MSVC) # Disable "switch statement contains 'default' but no 'case' labels" warning # (required for protobuf, see https://github.com/protocolbuffers/protobuf/issues/6885) set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} /wd4065") + elseif(CMAKE_CXX_COMPILER_ID STREQUAL "GNU") + # Avoid error when an unknown warning flag is passed + set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wno-unknown-warning-option") + if(CMAKE_CXX_COMPILER_VERSION VERSION_EQUAL "7.0" OR CMAKE_CXX_COMPILER_VERSION VERSION_GREATER "7.0") # Without this, gcc >= 7 warns related to changes in C++17 @@ -383,7 +386,7 @@ elseif(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang" OR CMAKE_CXX_COMPILER_ID STRE set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Qunused-arguments") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Qunused-arguments") - # Avoid clang error when an unknown warning flag is passed + # Avoid error when an unknown warning flag is passed set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wno-unknown-warning-option") # Add colors when paired with ninja set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fcolor-diagnostics") diff --git a/cpp/src/arrow/compute/exec/expression.cc b/cpp/src/arrow/compute/exec/expression.cc index 022584d5b39..bc9a9103f6d 100644 --- a/cpp/src/arrow/compute/exec/expression.cc +++ b/cpp/src/arrow/compute/exec/expression.cc @@ -28,7 +28,6 @@ #include "arrow/io/memory.h" #include "arrow/ipc/reader.h" #include "arrow/ipc/writer.h" -#include "arrow/util/atomic_shared_ptr.h" #include "arrow/util/hash_util.h" #include "arrow/util/key_value_metadata.h" #include "arrow/util/logging.h" @@ -43,11 +42,15 @@ using internal::checked_pointer_cast; namespace compute { -Expression::Expression(Call call) { - call.hash = std::hash{}(call.function_name); - for (const auto& arg : call.arguments) { - arrow::internal::hash_combine(call.hash, arg.hash()); +void Expression::Call::ComputeHash() { + hash = std::hash{}(function_name); + for (const auto& arg : arguments) { + arrow::internal::hash_combine(hash, arg.hash()); } +} + +Expression::Expression(Call call) { + call.ComputeHash(); impl_ = std::make_shared(std::move(call)); } diff --git a/cpp/src/arrow/compute/exec/expression.h b/cpp/src/arrow/compute/exec/expression.h index d06a923bb32..3810accf70a 100644 --- a/cpp/src/arrow/compute/exec/expression.h +++ b/cpp/src/arrow/compute/exec/expression.h @@ -43,6 +43,7 @@ class ARROW_EXPORT Expression { std::string function_name; std::vector arguments; std::shared_ptr options; + // Cached hash value size_t hash; // post-Bind properties: @@ -50,6 +51,8 @@ class ARROW_EXPORT Expression { const Kernel* kernel = NULLPTR; std::shared_ptr kernel_state; ValueDescr descr; + + void ComputeHash(); }; std::string ToString() const; diff --git a/cpp/src/arrow/util/async_generator.h b/cpp/src/arrow/util/async_generator.h index 1ac10ad7ce8..8c130c66193 100644 --- a/cpp/src/arrow/util/async_generator.h +++ b/cpp/src/arrow/util/async_generator.h @@ -17,7 +17,9 @@ #pragma once +#include #include +#include #include #include #include @@ -1253,7 +1255,9 @@ class BackgroundGenerator { it(std::move(it)), reading(false), finished(false), - should_shutdown(false) {} + should_shutdown(false) { + SetWorkerThreadId({}); // default-initialized thread id + } void ClearQueue() { while (!queue.empty()) { @@ -1312,11 +1316,28 @@ class BackgroundGenerator { return next; } + void SetWorkerThreadId(const std::thread::id tid) { + uint64_t equiv{0}; + // std::thread::id is trivially copyable as per C++ spec, + // so type punning as a uint64_t should work + static_assert(sizeof(std::thread::id) <= sizeof(uint64_t), + "std::thread::id can't fit into uint64_t"); + memcpy(&equiv, reinterpret_cast(&tid), sizeof(tid)); + worker_thread_id.store(equiv); + } + + std::thread::id GetWorkerThreadId() { + const auto equiv = worker_thread_id.load(); + std::thread::id tid; + memcpy(reinterpret_cast(&tid), &equiv, sizeof(tid)); + return tid; + } + internal::Executor* io_executor; const int max_q; const int q_restart; Iterator it; - std::thread::id worker_thread_id; + std::atomic worker_thread_id; // If true, the task is actively pumping items from the queue and does not need a // restart @@ -1344,7 +1365,7 @@ class BackgroundGenerator { /// /// It's a deadlock if we enter cleanup from /// the worker thread but it can happen if the consumer doesn't transfer away - assert(state->worker_thread_id != std::this_thread::get_id()); + assert(state->GetWorkerThreadId() != std::this_thread::get_id()); Future<> finish_fut; { auto lock = state->mutex.Lock(); @@ -1365,7 +1386,7 @@ class BackgroundGenerator { static void WorkerTask(std::shared_ptr state) { // We need to capture the state to read while outside the mutex bool reading = true; - state->worker_thread_id = std::this_thread::get_id(); + state->SetWorkerThreadId(std::this_thread::get_id()); while (reading) { auto next = state->it.Next(); // Need to capture state->waiting_future inside the mutex to mark finished outside @@ -1417,7 +1438,7 @@ class BackgroundGenerator { // reference it. We can safely transition to idle now. task_finished = state->task_finished; state->task_finished = Future<>(); - state->worker_thread_id = std::thread::id(); + state->SetWorkerThreadId({}); // default-initialized thread id } task_finished.MarkFinished(); } diff --git a/cpp/src/arrow/util/async_generator_test.cc b/cpp/src/arrow/util/async_generator_test.cc index 29c8d73ab6c..87c1737228e 100644 --- a/cpp/src/arrow/util/async_generator_test.cc +++ b/cpp/src/arrow/util/async_generator_test.cc @@ -15,6 +15,7 @@ // specific language governing permissions and limitations // under the License. +#include #include #include #include @@ -68,14 +69,14 @@ class TrackingGenerator { return state_->source(); } - int num_read() { return state_->num_read; } + int num_read() { return state_->num_read.load(); } private: struct State { explicit State(AsyncGenerator source) : source(std::move(source)), num_read(0) {} AsyncGenerator source; - int num_read; + std::atomic num_read; }; std::shared_ptr state_; diff --git a/dev/tasks/tasks.yml b/dev/tasks/tasks.yml index 34307bb2583..2df24e3aada 100644 --- a/dev/tasks/tasks.yml +++ b/dev/tasks/tasks.yml @@ -824,7 +824,10 @@ tasks: template: docker-tests/github.linux.yml params: env: + # clang-tools and llvm version need to be synchronized so as + # to have the right llvm-symbolizer version CLANG_TOOLS: 11 + LLVM: 11 UBUNTU: 20.04 image: ubuntu-cpp-thread-sanitizer diff --git a/docker-compose.yml b/docker-compose.yml index 79618a1cfed..c872ad42af6 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -365,6 +365,7 @@ services: <<: *ccache CC: clang-${CLANG_TOOLS} CXX: clang++-${CLANG_TOOLS} + ARROW_BUILD_STATIC: "OFF" ARROW_ENABLE_TIMING_TESTS: # inherit ARROW_FUZZING: "ON" # Check fuzz regressions ARROW_JEMALLOC: "OFF" @@ -399,6 +400,7 @@ services: <<: *ccache CC: clang-${CLANG_TOOLS} CXX: clang++-${CLANG_TOOLS} + ARROW_BUILD_STATIC: "OFF" ARROW_ENABLE_TIMING_TESTS: # inherit ARROW_DATASET: "ON" ARROW_JEMALLOC: "OFF"