diff --git a/.github/workflows/cpp.yml b/.github/workflows/cpp.yml index 5ccefa32725..6b381e15b89 100644 --- a/.github/workflows/cpp.yml +++ b/.github/workflows/cpp.yml @@ -141,7 +141,7 @@ jobs: fetch-depth: 0 submodules: recursive - name: Cache Docker Volumes - uses: actions/cache@13aacd865c20de90d75de3b17ebe84f7a17d57d2 # v4.0.0 + uses: actions/cache@v4 with: path: .docker key: ${{ matrix.image }}-${{ hashFiles('cpp/**') }} diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index 83f835d588a..b910bab457b 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -46,7 +46,7 @@ jobs: run: | ci/scripts/util_free_space.sh - name: Cache Docker Volumes - uses: actions/cache@13aacd865c20de90d75de3b17ebe84f7a17d57d2 # v4.0.0 + uses: actions/cache@v4 with: path: .docker key: debian-docs-${{ hashFiles('cpp/**') }} diff --git a/.github/workflows/docs_light.yml b/.github/workflows/docs_light.yml index 0e23394e8a4..1428c8392b9 100644 --- a/.github/workflows/docs_light.yml +++ b/.github/workflows/docs_light.yml @@ -53,7 +53,7 @@ jobs: with: fetch-depth: 0 - name: Cache Docker Volumes - uses: actions/cache@13aacd865c20de90d75de3b17ebe84f7a17d57d2 # v4.0.0 + uses: actions/cache@v4 with: path: .docker key: conda-docs-${{ hashFiles('cpp/**') }} diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml index aef81df0748..e82d51e52f6 100644 --- a/.github/workflows/integration.yml +++ b/.github/workflows/integration.yml @@ -100,7 +100,7 @@ jobs: run: | ci/scripts/util_free_space.sh - name: Cache Docker Volumes - uses: actions/cache@13aacd865c20de90d75de3b17ebe84f7a17d57d2 # v4.0.0 + uses: actions/cache@v4 with: path: .docker key: conda-${{ hashFiles('cpp/**') }} diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml index ba05fab65ad..673beb0b1a2 100644 --- a/.github/workflows/python.yml +++ b/.github/workflows/python.yml @@ -108,7 +108,7 @@ jobs: fetch-depth: 0 submodules: recursive - name: Cache Docker Volumes - uses: actions/cache@13aacd865c20de90d75de3b17ebe84f7a17d57d2 # v4.0.0 + uses: actions/cache@v4 with: path: .docker key: ${{ matrix.cache }}-${{ hashFiles('cpp/**') }} diff --git a/.github/workflows/r.yml b/.github/workflows/r.yml index b567773ecb9..557a9d5f369 100644 --- a/.github/workflows/r.yml +++ b/.github/workflows/r.yml @@ -145,7 +145,7 @@ jobs: run: | ci/scripts/util_free_space.sh - name: Cache Docker Volumes - uses: actions/cache@13aacd865c20de90d75de3b17ebe84f7a17d57d2 # v4.0.0 + uses: actions/cache@v4 with: path: .docker # As this key is identical on both matrix builds only one will be able to successfully cache, diff --git a/.github/workflows/r_nightly.yml b/.github/workflows/r_nightly.yml index 4fcb399c91f..2a1ff8eb08a 100644 --- a/.github/workflows/r_nightly.yml +++ b/.github/workflows/r_nightly.yml @@ -86,7 +86,7 @@ jobs: exit 1 fi - name: Cache Repo - uses: actions/cache@13aacd865c20de90d75de3b17ebe84f7a17d57d2 # v4.0.0 + uses: actions/cache@v4 with: path: repo key: r-nightly-${{ github.run_id }} diff --git a/.github/workflows/ruby.yml b/.github/workflows/ruby.yml index 589b74cd687..a362616afe6 100644 --- a/.github/workflows/ruby.yml +++ b/.github/workflows/ruby.yml @@ -84,7 +84,7 @@ jobs: fetch-depth: 0 submodules: recursive - name: Cache Docker Volumes - uses: actions/cache@13aacd865c20de90d75de3b17ebe84f7a17d57d2 # v4.0.0 + uses: actions/cache@v4 with: path: .docker key: ubuntu-${{ matrix.ubuntu }}-ruby-${{ hashFiles('cpp/**') }} diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake index abfe6d274f7..1d71283063e 100644 --- a/cpp/cmake_modules/ThirdpartyToolchain.cmake +++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake @@ -967,7 +967,8 @@ set(EP_COMMON_CMAKE_ARGS -DCMAKE_FIND_PACKAGE_NO_PACKAGE_REGISTRY=${CMAKE_FIND_PACKAGE_NO_PACKAGE_REGISTRY} -DCMAKE_INSTALL_LIBDIR=lib -DCMAKE_OSX_SYSROOT=${CMAKE_OSX_SYSROOT} - -DCMAKE_VERBOSE_MAKEFILE=${CMAKE_VERBOSE_MAKEFILE}) + -DCMAKE_VERBOSE_MAKEFILE=${CMAKE_VERBOSE_MAKEFILE} + -DCMAKE_POLICY_VERSION_MINIMUM=3.5) # if building with a toolchain file, pass that through if(CMAKE_TOOLCHAIN_FILE) @@ -1033,6 +1034,8 @@ macro(prepare_fetchcontent) set(CMAKE_COMPILE_WARNING_AS_ERROR FALSE) set(CMAKE_EXPORT_NO_PACKAGE_REGISTRY TRUE) set(CMAKE_MACOSX_RPATH ${ARROW_INSTALL_NAME_RPATH}) + set(CMAKE_POLICY_VERSION_MINIMUM 3.5) + if(MSVC) string(REPLACE "/WX" "" CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG}") string(REPLACE "/WX" "" CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG}") diff --git a/r/DESCRIPTION b/r/DESCRIPTION index a31e832519d..ffcacbf9ef0 100644 --- a/r/DESCRIPTION +++ b/r/DESCRIPTION @@ -1,6 +1,6 @@ Package: arrow Title: Integration to 'Apache' 'Arrow' -Version: 19.0.1 +Version: 19.0.1.1 Authors@R: c( person("Neal", "Richardson", email = "neal.p.richardson@gmail.com", role = c("aut")), person("Ian", "Cook", email = "ianmcook@gmail.com", role = c("aut")), diff --git a/r/NEWS.md b/r/NEWS.md index 36f3bd3496a..e8893e125d5 100644 --- a/r/NEWS.md +++ b/r/NEWS.md @@ -17,6 +17,13 @@ under the License. --> +# arrow 19.0.1.1 + +## Minor improvements and fixes + +- Updated internal code to comply with new CRAN requirements on non-API calls ([#45949](https://github.com/apache/arrow/issues/45949)) +- Enable building the bundled third-party libraries under CMake 4.0 ([#45987](https://github.com/apache/arrow/issues/45987)) + # arrow 19.0.1 This release primarily updates the underlying Arrow C++ version used by the diff --git a/r/src/Makevars.in b/r/src/Makevars.in index 0516d297263..7a09d6cff3a 100644 --- a/r/src/Makevars.in +++ b/r/src/Makevars.in @@ -27,3 +27,8 @@ PKG_CPPFLAGS=@cflags@ # PKG_CXXFLAGS=$(CXX_VISIBILITY) CXX_STD=CXX17 PKG_LIBS=@libs@ + +all: $(SHLIB) purify + +purify: $(SHLIB) + @rm -rf ../libarrow || true diff --git a/r/src/altrep.cpp b/r/src/altrep.cpp index 90a459e19cb..a4adf58997c 100644 --- a/r/src/altrep.cpp +++ b/r/src/altrep.cpp @@ -115,7 +115,7 @@ const std::shared_ptr& GetChunkedArray(SEXP alt) { // materialization is needed. // data2: starts as NULL, and becomes a standard R vector with the same // data if necessary: if materialization is needed, e.g. if we need -// to access its data pointer, with DATAPTR(). +// to access its data pointer, with INTEGER(), REAL(), etc. template struct AltrepVectorBase { // store the Array as an external pointer in data1, mark as immutable @@ -220,7 +220,14 @@ struct AltrepVectorPrimitive : public AltrepVectorBase(DATAPTR(copy))); + if constexpr (std::is_same_v) { + Get_region(alt, 0, size, REAL(copy)); + } else if constexpr (std::is_same_v) { + Get_region(alt, 0, size, INTEGER(copy)); + } else { + static_assert(std::is_same_v || std::is_same_v, + "ALTREP not implemented for this c_type"); + } // store as data2, this is now considered materialized SetRepresentation(alt, copy); @@ -269,13 +276,27 @@ struct AltrepVectorPrimitive : public AltrepVectorBase) { + return REAL(Materialize(alt)); + } else if constexpr (std::is_same_v) { + return INTEGER(Materialize(alt)); + } else { + static_assert(std::is_same_v || std::is_same_v, + "ALTREP not implemented for this c_type"); + } } // The value at position i static c_type Elt(SEXP alt, R_xlen_t i) { if (IsMaterialized(alt)) { - return reinterpret_cast(DATAPTR(Representation(alt)))[i]; + if constexpr (std::is_same_v) { + return REAL(Representation(alt))[i]; + } else if constexpr (std::is_same_v) { + return INTEGER(Representation(alt))[i]; + } else { + static_assert(std::is_same_v || std::is_same_v, + "ALTREP not implemented for this c_type"); + } } auto altrep_data = @@ -531,7 +552,7 @@ struct AltrepFactor : public AltrepVectorBase { SEXP copy = PROTECT(Rf_allocVector(INTSXP, size)); // copy the data from the array, through Get_region - Get_region(alt, 0, size, reinterpret_cast(DATAPTR(copy))); + Get_region(alt, 0, size, INTEGER(copy)); // store as data2, this is now considered materialized SetRepresentation(alt, copy); @@ -552,7 +573,7 @@ struct AltrepFactor : public AltrepVectorBase { return nullptr; } - static void* Dataptr(SEXP alt, Rboolean writeable) { return DATAPTR(Materialize(alt)); } + static void* Dataptr(SEXP alt, Rboolean writeable) { return INTEGER(Materialize(alt)); } static SEXP Duplicate(SEXP alt, Rboolean /* deep */) { // the representation integer vector @@ -892,7 +913,9 @@ struct AltrepVectorString : public AltrepVectorBase> { return s; } - static void* Dataptr(SEXP alt, Rboolean writeable) { return DATAPTR(Materialize(alt)); } + static void* Dataptr(SEXP alt, Rboolean writeable) { + return const_cast(DATAPTR_RO(Materialize(alt))); + } static SEXP Materialize(SEXP alt) { if (Base::IsMaterialized(alt)) { @@ -931,7 +954,9 @@ struct AltrepVectorString : public AltrepVectorBase> { } static const void* Dataptr_or_null(SEXP alt) { - if (Base::IsMaterialized(alt)) return DATAPTR(Representation(alt)); + if (Base::IsMaterialized(alt)) { + return DATAPTR_RO(alt); + } // otherwise give up return nullptr; @@ -1267,21 +1292,14 @@ sexp test_arrow_altrep_copy_by_dataptr(sexp x) { if (TYPEOF(x) == INTSXP) { cpp11::writable::integers out(Rf_xlength(x)); - int* ptr = reinterpret_cast(DATAPTR(x)); + int* ptr = INTEGER(x); for (R_xlen_t i = 0; i < n; i++) { out[i] = ptr[i]; } return out; } else if (TYPEOF(x) == REALSXP) { cpp11::writable::doubles out(Rf_xlength(x)); - double* ptr = reinterpret_cast(DATAPTR(x)); - for (R_xlen_t i = 0; i < n; i++) { - out[i] = ptr[i]; - } - return out; - } else if (TYPEOF(x) == STRSXP) { - cpp11::writable::strings out(Rf_xlength(x)); - SEXP* ptr = reinterpret_cast(DATAPTR(x)); + double* ptr = REAL(x); for (R_xlen_t i = 0; i < n; i++) { out[i] = ptr[i]; } diff --git a/r/src/arrow_types.h b/r/src/arrow_types.h index 05c8f6062da..9da02fd09fd 100644 --- a/r/src/arrow_types.h +++ b/r/src/arrow_types.h @@ -173,7 +173,7 @@ template class RBuffer : public MutableBuffer { public: explicit RBuffer(RVector vec) - : MutableBuffer(reinterpret_cast(DATAPTR(vec)), + : MutableBuffer(reinterpret_cast(getDataPointer(vec)), vec.size() * sizeof(typename RVector::value_type), arrow::CPUDevice::memory_manager(gc_memory_pool())), vec_(vec) {} @@ -181,6 +181,24 @@ class RBuffer : public MutableBuffer { private: // vec_ holds the memory RVector vec_; + + static void* getDataPointer(RVector& vec) { + if (TYPEOF(vec) == LGLSXP) { + return LOGICAL(vec); + } else if (TYPEOF(vec) == INTSXP) { + return INTEGER(vec); + } else if (TYPEOF(vec) == REALSXP) { + return REAL(vec); + } else if (TYPEOF(vec) == CPLXSXP) { + return COMPLEX(vec); + } else if (TYPEOF(vec) == STRSXP) { + // We don't want to expose the string data here, so we error + cpp11::stop("Operation not supported for string vectors."); + } else { + // raw + return RAW(vec); + } + } }; std::shared_ptr InferArrowTypeFromFactor(SEXP); diff --git a/r/src/r_to_arrow.cpp b/r/src/r_to_arrow.cpp index d2db11e14a7..32e186ceac7 100644 --- a/r/src/r_to_arrow.cpp +++ b/r/src/r_to_arrow.cpp @@ -1214,11 +1214,11 @@ bool can_reuse_memory(SEXP x, const std::shared_ptr& type) { // because MakeSimpleArray below will force materialization switch (type->id()) { case Type::INT32: - return TYPEOF(x) == INTSXP && !OBJECT(x); + return TYPEOF(x) == INTSXP && !Rf_isObject(x); case Type::DOUBLE: - return TYPEOF(x) == REALSXP && !OBJECT(x); + return TYPEOF(x) == REALSXP && !Rf_isObject(x); case Type::INT8: - return TYPEOF(x) == RAWSXP && !OBJECT(x); + return TYPEOF(x) == RAWSXP && !Rf_isObject(x); case Type::INT64: return TYPEOF(x) == REALSXP && Rf_inherits(x, "integer64"); default: @@ -1412,17 +1412,17 @@ bool vector_from_r_memory(SEXP x, const std::shared_ptr& type, switch (type->id()) { case Type::INT32: - return TYPEOF(x) == INTSXP && !OBJECT(x) && + return TYPEOF(x) == INTSXP && !Rf_isObject(x) && vector_from_r_memory_impl(x, type, columns, j, tasks); case Type::DOUBLE: - return TYPEOF(x) == REALSXP && !OBJECT(x) && + return TYPEOF(x) == REALSXP && !Rf_isObject(x) && vector_from_r_memory_impl(x, type, columns, j, tasks); case Type::UINT8: - return TYPEOF(x) == RAWSXP && !OBJECT(x) && + return TYPEOF(x) == RAWSXP && !Rf_isObject(x) && vector_from_r_memory_impl(x, type, columns, j, tasks); diff --git a/r/tests/testthat/test-altrep.R b/r/tests/testthat/test-altrep.R index 50bd40988e5..d1e90b6b59a 100644 --- a/r/tests/testthat/test-altrep.R +++ b/r/tests/testthat/test-altrep.R @@ -170,7 +170,7 @@ test_that("element access methods for int32 ALTREP with nulls", { expect_identical(test_arrow_altrep_copy_by_region(altrep, 123), original) expect_false(test_arrow_altrep_is_materialized(altrep)) - # because there are no nulls, DATAPTR() does not materialize + # because there are nulls, DATAPTR() does materialize expect_identical(test_arrow_altrep_copy_by_dataptr(altrep), original) expect_true(test_arrow_altrep_is_materialized(altrep)) @@ -193,7 +193,7 @@ test_that("element access methods for double ALTREP with nulls", { expect_identical(test_arrow_altrep_copy_by_region(altrep, 123), original) expect_false(test_arrow_altrep_is_materialized(altrep)) - # because there are no nulls, DATAPTR() does not materialize + # because there are nulls, DATAPTR() does materialize expect_identical(test_arrow_altrep_copy_by_dataptr(altrep), original) expect_true(test_arrow_altrep_is_materialized(altrep)) @@ -244,14 +244,13 @@ test_that("element access methods for character ALTREP", { expect_identical(test_arrow_altrep_copy_by_element(altrep), original) expect_false(test_arrow_altrep_is_materialized(altrep)) - # DATAPTR() should always materialize for strings - expect_identical(test_arrow_altrep_copy_by_dataptr(altrep), original) + # match() calls DATAPTR() internally which materializes the vector + match(altrep, c("1", "40", "999")) expect_true(test_arrow_altrep_is_materialized(altrep)) # test element access after materialization expect_true(test_arrow_altrep_is_materialized(altrep)) expect_identical(test_arrow_altrep_copy_by_element(altrep), original) - expect_identical(test_arrow_altrep_copy_by_dataptr(altrep), original) }) test_that("element access methods for character ALTREP from large_utf8()", { @@ -265,14 +264,13 @@ test_that("element access methods for character ALTREP from large_utf8()", { expect_identical(test_arrow_altrep_copy_by_element(altrep), original) expect_false(test_arrow_altrep_is_materialized(altrep)) - # DATAPTR() should always materialize for strings - expect_identical(test_arrow_altrep_copy_by_dataptr(altrep), original) + # match() calls DATAPTR() internally which materializes the vector + match(altrep, c("1", "40", "999")) expect_true(test_arrow_altrep_is_materialized(altrep)) # test element access after materialization expect_true(test_arrow_altrep_is_materialized(altrep)) expect_identical(test_arrow_altrep_copy_by_element(altrep), original) - expect_identical(test_arrow_altrep_copy_by_dataptr(altrep), original) }) test_that("empty vectors are not altrep", {