From a7f085761677b6dfa0d780b303ad8aeb6f22644e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Fri, 4 Apr 2025 09:16:26 +0200 Subject: [PATCH 1/4] GH-46025: [C++] Use ARROW_CUDA_EXPORT instead of ARROW_EXPORT for libarrow_cuda --- .env | 2 +- cpp/src/arrow/gpu/CMakeLists.txt | 2 +- cpp/src/arrow/gpu/cuda_arrow_ipc.h | 6 +++--- cpp/src/arrow/gpu/cuda_context.h | 22 +++++++++++----------- cpp/src/arrow/gpu/cuda_internal.h | 3 ++- cpp/src/arrow/gpu/cuda_memory.h | 17 +++++++++-------- 6 files changed, 27 insertions(+), 25 deletions(-) diff --git a/.env b/.env index bdb74d89e1c..d2badc1e342 100644 --- a/.env +++ b/.env @@ -55,7 +55,7 @@ UBUNTU=22.04 # Default versions for various dependencies CLANG_TOOLS=14 CMAKE=3.25.0 -CUDA=11.2.2 +CUDA=11.7.1 DASK=latest DOTNET=8.0 GCC= diff --git a/cpp/src/arrow/gpu/CMakeLists.txt b/cpp/src/arrow/gpu/CMakeLists.txt index 7f8650c7dc8..f9ebb14be75 100644 --- a/cpp/src/arrow/gpu/CMakeLists.txt +++ b/cpp/src/arrow/gpu/CMakeLists.txt @@ -79,7 +79,7 @@ add_arrow_lib(arrow_cuda add_dependencies(arrow_cuda ${ARROW_CUDA_LIBRARIES}) foreach(LIB_TARGET ${ARROW_CUDA_LIBRARIES}) - target_compile_definitions(${LIB_TARGET} PRIVATE ARROW_EXPORTING) + target_compile_definitions(${LIB_TARGET} PRIVATE ARROW_CUDA_EXPORTING) endforeach() # CUDA build version diff --git a/cpp/src/arrow/gpu/cuda_arrow_ipc.h b/cpp/src/arrow/gpu/cuda_arrow_ipc.h index b7200a94b93..1cdacdf343d 100644 --- a/cpp/src/arrow/gpu/cuda_arrow_ipc.h +++ b/cpp/src/arrow/gpu/cuda_arrow_ipc.h @@ -22,9 +22,9 @@ #include "arrow/buffer.h" #include "arrow/type_fwd.h" -#include "arrow/util/visibility.h" #include "arrow/gpu/cuda_memory.h" +#include "arrow/gpu/visibility.h" namespace arrow { @@ -49,7 +49,7 @@ namespace cuda { /// \param[in] batch record batch to write /// \param[in] ctx CudaContext to allocate device memory from /// \return CudaBuffer or Status -ARROW_EXPORT +ARROW_CUDA_EXPORT Result> SerializeRecordBatch(const RecordBatch& batch, CudaContext* ctx); @@ -61,7 +61,7 @@ Result> SerializeRecordBatch(const RecordBatch& batc /// \param[in] buffer a CudaBuffer containing the complete IPC message /// \param[in] pool a MemoryPool to use for allocating space for the metadata /// \return RecordBatch or Status -ARROW_EXPORT +ARROW_CUDA_EXPORT Result> ReadRecordBatch( const std::shared_ptr& schema, const ipc::DictionaryMemo* dictionary_memo, const std::shared_ptr& buffer, MemoryPool* pool = default_memory_pool()); diff --git a/cpp/src/arrow/gpu/cuda_context.h b/cpp/src/arrow/gpu/cuda_context.h index 56c4f320352..35657590c67 100644 --- a/cpp/src/arrow/gpu/cuda_context.h +++ b/cpp/src/arrow/gpu/cuda_context.h @@ -24,8 +24,8 @@ #include #include "arrow/device.h" +#include "arrow/gpu/visibility.h" #include "arrow/result.h" -#include "arrow/util/visibility.h" namespace arrow { namespace cuda { @@ -41,7 +41,7 @@ class CudaMemoryManager; // XXX Should CudaContext be merged into CudaMemoryManager? -class ARROW_EXPORT CudaDeviceManager { +class ARROW_CUDA_EXPORT CudaDeviceManager { public: static Result Instance(); @@ -88,7 +88,7 @@ class ARROW_EXPORT CudaDeviceManager { /// /// Each CudaDevice instance is tied to a particular CUDA device /// (identified by its logical device number). -class ARROW_EXPORT CudaDevice : public Device { +class ARROW_CUDA_EXPORT CudaDevice : public Device { public: const char* type_name() const override; std::string ToString() const override; @@ -148,7 +148,7 @@ class ARROW_EXPORT CudaDevice : public Device { /// and freed using cuStreamCreate and cuStreamDestroy (or equivalent). /// Default construction will use the cuda default stream, and does not allow /// construction from literal 0 or nullptr. - class ARROW_EXPORT Stream : public Device::Stream { + class ARROW_CUDA_EXPORT Stream : public Device::Stream { public: ~Stream() = default; @@ -195,7 +195,7 @@ class ARROW_EXPORT CudaDevice : public Device { Result> WrapStream( void* device_stream, Stream::release_fn_t release_fn) override; - class ARROW_EXPORT SyncEvent : public Device::SyncEvent { + class ARROW_CUDA_EXPORT SyncEvent : public Device::SyncEvent { public: [[nodiscard]] CUevent value() const { if (sync_event_) { @@ -240,17 +240,17 @@ class ARROW_EXPORT CudaDevice : public Device { }; /// \brief Return whether a device instance is a CudaDevice -ARROW_EXPORT +ARROW_CUDA_EXPORT bool IsCudaDevice(const Device& device); /// \brief Cast a device instance to a CudaDevice /// /// An error is returned if the device is not a CudaDevice. -ARROW_EXPORT +ARROW_CUDA_EXPORT Result> AsCudaDevice(const std::shared_ptr& device); /// \brief MemoryManager implementation for CUDA -class ARROW_EXPORT CudaMemoryManager : public MemoryManager { +class ARROW_CUDA_EXPORT CudaMemoryManager : public MemoryManager { public: Result> GetBufferReader( std::shared_ptr buf) override; @@ -304,19 +304,19 @@ class ARROW_EXPORT CudaMemoryManager : public MemoryManager { }; /// \brief Return whether a MemoryManager instance is a CudaMemoryManager -ARROW_EXPORT +ARROW_CUDA_EXPORT bool IsCudaMemoryManager(const MemoryManager& mm); /// \brief Cast a MemoryManager instance to a CudaMemoryManager /// /// An error is returned if the MemoryManager is not a CudaMemoryManager. -ARROW_EXPORT +ARROW_CUDA_EXPORT Result> AsCudaMemoryManager( const std::shared_ptr& mm); /// \class CudaContext /// \brief Object-oriented interface to the low-level CUDA driver API -class ARROW_EXPORT CudaContext : public std::enable_shared_from_this { +class ARROW_CUDA_EXPORT CudaContext : public std::enable_shared_from_this { public: ~CudaContext(); diff --git a/cpp/src/arrow/gpu/cuda_internal.h b/cpp/src/arrow/gpu/cuda_internal.h index d70873634f8..71a9a8d8a05 100644 --- a/cpp/src/arrow/gpu/cuda_internal.h +++ b/cpp/src/arrow/gpu/cuda_internal.h @@ -25,6 +25,7 @@ #include #include "arrow/gpu/cuda_context.h" +#include "arrow/gpu/visibility.h" #include "arrow/status.h" namespace arrow { @@ -33,7 +34,7 @@ namespace internal { std::string CudaErrorDescription(CUresult err); -ARROW_EXPORT +ARROW_CUDA_EXPORT Status StatusFromCuda(CUresult res, const char* function_name = nullptr); #define CU_RETURN_NOT_OK(FUNC_NAME, STMT) \ diff --git a/cpp/src/arrow/gpu/cuda_memory.h b/cpp/src/arrow/gpu/cuda_memory.h index 488f4183730..ce4b4b11cb2 100644 --- a/cpp/src/arrow/gpu/cuda_memory.h +++ b/cpp/src/arrow/gpu/cuda_memory.h @@ -22,6 +22,7 @@ #include "arrow/buffer.h" #include "arrow/c/abi.h" +#include "arrow/gpu/visibility.h" #include "arrow/io/concurrency.h" #include "arrow/type_fwd.h" @@ -35,7 +36,7 @@ class CudaIpcMemHandle; /// \brief An Arrow buffer located on a GPU device /// /// Be careful using this in any Arrow code which may not be GPU-aware -class ARROW_EXPORT CudaBuffer : public Buffer { +class ARROW_CUDA_EXPORT CudaBuffer : public Buffer { public: // XXX deprecate? CudaBuffer(uint8_t* data, int64_t size, const std::shared_ptr& context, @@ -109,7 +110,7 @@ class ARROW_EXPORT CudaBuffer : public Buffer { /// \class CudaHostBuffer /// \brief Device-accessible CPU memory created using cudaHostAlloc -class ARROW_EXPORT CudaHostBuffer : public MutableBuffer { +class ARROW_CUDA_EXPORT CudaHostBuffer : public MutableBuffer { public: CudaHostBuffer(uint8_t* data, const int64_t size); @@ -121,7 +122,7 @@ class ARROW_EXPORT CudaHostBuffer : public MutableBuffer { /// \class CudaIpcHandle /// \brief A container for a CUDA IPC handle -class ARROW_EXPORT CudaIpcMemHandle { +class ARROW_CUDA_EXPORT CudaIpcMemHandle { public: ~CudaIpcMemHandle(); @@ -158,7 +159,7 @@ class ARROW_EXPORT CudaIpcMemHandle { /// pointing to CPU memory. /// Reading to a raw pointer, though, copies device memory into the host /// memory pointed to. -class ARROW_EXPORT CudaBufferReader +class ARROW_CUDA_EXPORT CudaBufferReader : public ::arrow::io::internal::RandomAccessFileConcurrencyWrapper { public: explicit CudaBufferReader(const std::shared_ptr& buffer); @@ -200,7 +201,7 @@ class ARROW_EXPORT CudaBufferReader /// \class CudaBufferWriter /// \brief File interface for writing to CUDA buffers, with optional buffering -class ARROW_EXPORT CudaBufferWriter : public io::WritableFile { +class ARROW_CUDA_EXPORT CudaBufferWriter : public io::WritableFile { public: explicit CudaBufferWriter(const std::shared_ptr& buffer); ~CudaBufferWriter() override; @@ -247,17 +248,17 @@ class ARROW_EXPORT CudaBufferWriter : public io::WritableFile { /// \param[in] device_number device to expose host memory /// \param[in] size number of bytes /// \return Host buffer or Status -ARROW_EXPORT +ARROW_CUDA_EXPORT Result> AllocateCudaHostBuffer(int device_number, const int64_t size); /// Low-level: get a device address through which the CPU data be accessed. -ARROW_EXPORT +ARROW_CUDA_EXPORT Result GetDeviceAddress(const uint8_t* cpu_data, const std::shared_ptr& ctx); /// Low-level: get a CPU address through which the device data be accessed. -ARROW_EXPORT +ARROW_CUDA_EXPORT Result GetHostAddress(uintptr_t device_ptr); ARROW_DEPRECATED( From 6ce6a12f0487bb6ce4e8150ecd6dc76b7ced8519 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Fri, 4 Apr 2025 09:19:47 +0200 Subject: [PATCH 2/4] Add missing new file to previous commit :) --- cpp/src/arrow/gpu/visibility.h | 49 ++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) create mode 100644 cpp/src/arrow/gpu/visibility.h diff --git a/cpp/src/arrow/gpu/visibility.h b/cpp/src/arrow/gpu/visibility.h new file mode 100644 index 00000000000..3ba75a12bdc --- /dev/null +++ b/cpp/src/arrow/gpu/visibility.h @@ -0,0 +1,49 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#if defined(_WIN32) || defined(__CYGWIN__) +# if defined(_MSC_VER) +# pragma warning(push) +# pragma warning(disable : 4251) +# else +# pragma GCC diagnostic ignored "-Wattributes" +# endif + +# ifdef ARROW_CUDA_STATIC +# define ARROW_CUDA_EXPORT +# elif defined(ARROW_CUDA_EXPORTING) +# define ARROW_CUDA_EXPORT __declspec(dllexport) +# else +# define ARROW_CUDA_EXPORT __declspec(dllimport) +# endif + +# define ARROW_CUDA_NO_EXPORT + +# if defined(_MSC_VER) +# pragma warning(pop) +# endif + +#else // Not Windows +# ifndef ARROW_CUDA_EXPORT +# define ARROW_CUDA_EXPORT __attribute__((visibility("default"))) +# endif +# ifndef ARROW_CUDA_NO_EXPORT +# define ARROW_CUDA_NO_EXPORT __attribute__((visibility("hidden"))) +# endif +#endif From f60ca031390ea1c5b1ddb367daa835bcd10083e4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Fri, 4 Apr 2025 12:29:26 +0200 Subject: [PATCH 3/4] Add new flags needed for pkg-config when static linking the library --- cpp/src/arrow/gpu/arrow-cuda.pc.in | 1 + 1 file changed, 1 insertion(+) diff --git a/cpp/src/arrow/gpu/arrow-cuda.pc.in b/cpp/src/arrow/gpu/arrow-cuda.pc.in index 173d7d91ef9..e3a4d7d6a14 100644 --- a/cpp/src/arrow/gpu/arrow-cuda.pc.in +++ b/cpp/src/arrow/gpu/arrow-cuda.pc.in @@ -25,3 +25,4 @@ Version: @ARROW_VERSION@ Requires: arrow cuda Libs: -L${libdir} -larrow_cuda Cflags: -I${includedir} +Cflags.private: -DARROW_CUDA_STATIC From 9aa1470710dc85d8299ce40c7e73c8393451c050 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Mon, 7 Apr 2025 12:27:27 +0200 Subject: [PATCH 4/4] Fix Cflags and Cflags.private to include correct flags when static linking even when only libarrow_cida.a is being built --- cpp/CMakeLists.txt | 7 +++++++ cpp/src/arrow/gpu/CMakeLists.txt | 11 +++++++++++ cpp/src/arrow/gpu/arrow-cuda.pc.in | 4 ++-- 3 files changed, 20 insertions(+), 2 deletions(-) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index d1b7f3f31bd..d6ecd2a355f 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -553,6 +553,13 @@ if(ARROW_BUILD_STATIC) string(APPEND ARROW_ACERO_PC_CFLAGS_PRIVATE " -DARROW_ACERO_STATIC") endif() +# For arrow-cuda.pc. +set(ARROW_CUDA_PC_CFLAGS "") +set(ARROW_CUDA_PC_CFLAGS_PRIVATE "") +if(ARROW_BUILD_STATIC) + string(APPEND ARROW_CUDA_PC_CFLAGS_PRIVATE " -DARROW_CUDA_STATIC") +endif() + # For arrow-dataset.pc. set(ARROW_DATASET_PC_CFLAGS "") set(ARROW_DATASET_PC_CFLAGS_PRIVATE "") diff --git a/cpp/src/arrow/gpu/CMakeLists.txt b/cpp/src/arrow/gpu/CMakeLists.txt index f9ebb14be75..9723cea7c23 100644 --- a/cpp/src/arrow/gpu/CMakeLists.txt +++ b/cpp/src/arrow/gpu/CMakeLists.txt @@ -45,6 +45,17 @@ else() set(ARROW_CUDA_SHARED_LINK_LIBS CUDA::cuda_driver) endif() +# If only libarrow_cuda.a is built, "pkg-config --cflags --libs +# arrow-cuda" will include build flags for static linking not shared +# linking. +# The variables ARROW_CUDA_PC_* (except ARROW_CUDA_PC_*_PRIVATE) are used +# specifically for the static linking case to ensure the correct flags +# are provided. +if(NOT ARROW_BUILD_SHARED AND ARROW_BUILD_STATIC) + string(APPEND ARROW_CUDA_PC_CFLAGS "${ARROW_CUDA_PC_CFLAGS_PRIVATE}") + set(ARROW_CUDA_PC_CFLAGS_PRIVATE "") +endif() + set(ARROW_CUDA_SRCS cuda_arrow_ipc.cc cuda_context.cc cuda_internal.cc cuda_memory.cc) set(ARROW_CUDA_PKG_CONFIG_NAME_ARGS) diff --git a/cpp/src/arrow/gpu/arrow-cuda.pc.in b/cpp/src/arrow/gpu/arrow-cuda.pc.in index e3a4d7d6a14..5a2b13fff55 100644 --- a/cpp/src/arrow/gpu/arrow-cuda.pc.in +++ b/cpp/src/arrow/gpu/arrow-cuda.pc.in @@ -24,5 +24,5 @@ Description: CUDA integration library for Apache Arrow Version: @ARROW_VERSION@ Requires: arrow cuda Libs: -L${libdir} -larrow_cuda -Cflags: -I${includedir} -Cflags.private: -DARROW_CUDA_STATIC +Cflags: -I${includedir}@ARROW_CUDA_PC_CFLAGS@ +Cflags.private:@ARROW_CUDA_PC_CFLAGS_PRIVATE@