Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/cpp.yml
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@ jobs:
fetch-depth: 0
submodules: recursive
- name: Cache Docker Volumes
uses: actions/cache@13aacd865c20de90d75de3b17ebe84f7a17d57d2 # v4.0.0
uses: actions/cache@v4
with:
path: .docker
key: ${{ matrix.image }}-${{ hashFiles('cpp/**') }}
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/docs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ jobs:
run: |
ci/scripts/util_free_space.sh
- name: Cache Docker Volumes
uses: actions/cache@13aacd865c20de90d75de3b17ebe84f7a17d57d2 # v4.0.0
uses: actions/cache@v4
with:
path: .docker
key: debian-docs-${{ hashFiles('cpp/**') }}
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/docs_light.yml
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ jobs:
with:
fetch-depth: 0
- name: Cache Docker Volumes
uses: actions/cache@13aacd865c20de90d75de3b17ebe84f7a17d57d2 # v4.0.0
uses: actions/cache@v4
with:
path: .docker
key: conda-docs-${{ hashFiles('cpp/**') }}
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/integration.yml
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ jobs:
run: |
ci/scripts/util_free_space.sh
- name: Cache Docker Volumes
uses: actions/cache@13aacd865c20de90d75de3b17ebe84f7a17d57d2 # v4.0.0
uses: actions/cache@v4
with:
path: .docker
key: conda-${{ hashFiles('cpp/**') }}
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/python.yml
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ jobs:
fetch-depth: 0
submodules: recursive
- name: Cache Docker Volumes
uses: actions/cache@13aacd865c20de90d75de3b17ebe84f7a17d57d2 # v4.0.0
uses: actions/cache@v4
with:
path: .docker
key: ${{ matrix.cache }}-${{ hashFiles('cpp/**') }}
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/r.yml
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@ jobs:
run: |
ci/scripts/util_free_space.sh
- name: Cache Docker Volumes
uses: actions/cache@13aacd865c20de90d75de3b17ebe84f7a17d57d2 # v4.0.0
uses: actions/cache@v4
with:
path: .docker
# As this key is identical on both matrix builds only one will be able to successfully cache,
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/r_nightly.yml
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ jobs:
exit 1
fi
- name: Cache Repo
uses: actions/cache@13aacd865c20de90d75de3b17ebe84f7a17d57d2 # v4.0.0
uses: actions/cache@v4
with:
path: repo
key: r-nightly-${{ github.run_id }}
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/ruby.yml
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ jobs:
fetch-depth: 0
submodules: recursive
- name: Cache Docker Volumes
uses: actions/cache@13aacd865c20de90d75de3b17ebe84f7a17d57d2 # v4.0.0
uses: actions/cache@v4
with:
path: .docker
key: ubuntu-${{ matrix.ubuntu }}-ruby-${{ hashFiles('cpp/**') }}
Expand Down
5 changes: 4 additions & 1 deletion cpp/cmake_modules/ThirdpartyToolchain.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -967,7 +967,8 @@ set(EP_COMMON_CMAKE_ARGS
-DCMAKE_FIND_PACKAGE_NO_PACKAGE_REGISTRY=${CMAKE_FIND_PACKAGE_NO_PACKAGE_REGISTRY}
-DCMAKE_INSTALL_LIBDIR=lib
-DCMAKE_OSX_SYSROOT=${CMAKE_OSX_SYSROOT}
-DCMAKE_VERBOSE_MAKEFILE=${CMAKE_VERBOSE_MAKEFILE})
-DCMAKE_VERBOSE_MAKEFILE=${CMAKE_VERBOSE_MAKEFILE}
-DCMAKE_POLICY_VERSION_MINIMUM=3.5)

# if building with a toolchain file, pass that through
if(CMAKE_TOOLCHAIN_FILE)
Expand Down Expand Up @@ -1033,6 +1034,8 @@ macro(prepare_fetchcontent)
set(CMAKE_COMPILE_WARNING_AS_ERROR FALSE)
set(CMAKE_EXPORT_NO_PACKAGE_REGISTRY TRUE)
set(CMAKE_MACOSX_RPATH ${ARROW_INSTALL_NAME_RPATH})
set(CMAKE_POLICY_VERSION_MINIMUM 3.5)

if(MSVC)
string(REPLACE "/WX" "" CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG}")
string(REPLACE "/WX" "" CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG}")
Expand Down
2 changes: 1 addition & 1 deletion r/DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: arrow
Title: Integration to 'Apache' 'Arrow'
Version: 19.0.1
Version: 19.0.1.1
Authors@R: c(
person("Neal", "Richardson", email = "neal.p.richardson@gmail.com", role = c("aut")),
person("Ian", "Cook", email = "ianmcook@gmail.com", role = c("aut")),
Expand Down
7 changes: 7 additions & 0 deletions r/NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,13 @@
under the License.
-->

# arrow 19.0.1.1

## Minor improvements and fixes

- Updated internal code to comply with new CRAN requirements on non-API calls ([#45949](https://github.com/apache/arrow/issues/45949))
- Enable building the bundled third-party libraries under CMake 4.0 ([#45987](https://github.com/apache/arrow/issues/45987))
Comment on lines +22 to +25
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for these! Good summary


# arrow 19.0.1

This release primarily updates the underlying Arrow C++ version used by the
Expand Down
5 changes: 5 additions & 0 deletions r/src/Makevars.in
Original file line number Diff line number Diff line change
Expand Up @@ -27,3 +27,8 @@ PKG_CPPFLAGS=@cflags@
# PKG_CXXFLAGS=$(CXX_VISIBILITY)
CXX_STD=CXX17
PKG_LIBS=@libs@

all: $(SHLIB) purify

purify: $(SHLIB)
@rm -rf ../libarrow || true
52 changes: 35 additions & 17 deletions r/src/altrep.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ const std::shared_ptr<ChunkedArray>& GetChunkedArray(SEXP alt) {
// materialization is needed.
// data2: starts as NULL, and becomes a standard R vector with the same
// data if necessary: if materialization is needed, e.g. if we need
// to access its data pointer, with DATAPTR().
// to access its data pointer, with INTEGER(), REAL(), etc.
template <typename Impl>
struct AltrepVectorBase {
// store the Array as an external pointer in data1, mark as immutable
Expand Down Expand Up @@ -220,7 +220,14 @@ struct AltrepVectorPrimitive : public AltrepVectorBase<AltrepVectorPrimitive<sex
SEXP copy = PROTECT(Rf_allocVector(sexp_type, size));

// copy the data from the array, through Get_region
Get_region(alt, 0, size, reinterpret_cast<c_type*>(DATAPTR(copy)));
if constexpr (std::is_same_v<c_type, double>) {
Get_region(alt, 0, size, REAL(copy));
} else if constexpr (std::is_same_v<c_type, int>) {
Get_region(alt, 0, size, INTEGER(copy));
} else {
static_assert(std::is_same_v<c_type, double> || std::is_same_v<c_type, int>,
"ALTREP not implemented for this c_type");
}

// store as data2, this is now considered materialized
SetRepresentation(alt, copy);
Expand Down Expand Up @@ -269,13 +276,27 @@ struct AltrepVectorPrimitive : public AltrepVectorBase<AltrepVectorPrimitive<sex
}

// Otherwise we have to materialize and hand the pointer to data2
return DATAPTR(Materialize(alt));
if constexpr (std::is_same_v<c_type, double>) {
return REAL(Materialize(alt));
} else if constexpr (std::is_same_v<c_type, int>) {
return INTEGER(Materialize(alt));
} else {
static_assert(std::is_same_v<c_type, double> || std::is_same_v<c_type, int>,
"ALTREP not implemented for this c_type");
}
}

// The value at position i
static c_type Elt(SEXP alt, R_xlen_t i) {
if (IsMaterialized(alt)) {
return reinterpret_cast<c_type*>(DATAPTR(Representation(alt)))[i];
if constexpr (std::is_same_v<c_type, double>) {
return REAL(Representation(alt))[i];
} else if constexpr (std::is_same_v<c_type, int>) {
return INTEGER(Representation(alt))[i];
} else {
static_assert(std::is_same_v<c_type, double> || std::is_same_v<c_type, int>,
"ALTREP not implemented for this c_type");
}
}

auto altrep_data =
Expand Down Expand Up @@ -531,7 +552,7 @@ struct AltrepFactor : public AltrepVectorBase<AltrepFactor> {
SEXP copy = PROTECT(Rf_allocVector(INTSXP, size));

// copy the data from the array, through Get_region
Get_region(alt, 0, size, reinterpret_cast<int*>(DATAPTR(copy)));
Get_region(alt, 0, size, INTEGER(copy));

// store as data2, this is now considered materialized
SetRepresentation(alt, copy);
Expand All @@ -552,7 +573,7 @@ struct AltrepFactor : public AltrepVectorBase<AltrepFactor> {
return nullptr;
}

static void* Dataptr(SEXP alt, Rboolean writeable) { return DATAPTR(Materialize(alt)); }
static void* Dataptr(SEXP alt, Rboolean writeable) { return INTEGER(Materialize(alt)); }

static SEXP Duplicate(SEXP alt, Rboolean /* deep */) {
// the representation integer vector
Expand Down Expand Up @@ -892,7 +913,9 @@ struct AltrepVectorString : public AltrepVectorBase<AltrepVectorString<Type>> {
return s;
}

static void* Dataptr(SEXP alt, Rboolean writeable) { return DATAPTR(Materialize(alt)); }
static void* Dataptr(SEXP alt, Rboolean writeable) {
return const_cast<void*>(DATAPTR_RO(Materialize(alt)));
}

static SEXP Materialize(SEXP alt) {
if (Base::IsMaterialized(alt)) {
Expand Down Expand Up @@ -931,7 +954,9 @@ struct AltrepVectorString : public AltrepVectorBase<AltrepVectorString<Type>> {
}

static const void* Dataptr_or_null(SEXP alt) {
if (Base::IsMaterialized(alt)) return DATAPTR(Representation(alt));
if (Base::IsMaterialized(alt)) {
return DATAPTR_RO(alt);
}

// otherwise give up
return nullptr;
Expand Down Expand Up @@ -1267,21 +1292,14 @@ sexp test_arrow_altrep_copy_by_dataptr(sexp x) {

if (TYPEOF(x) == INTSXP) {
cpp11::writable::integers out(Rf_xlength(x));
int* ptr = reinterpret_cast<int*>(DATAPTR(x));
int* ptr = INTEGER(x);
for (R_xlen_t i = 0; i < n; i++) {
out[i] = ptr[i];
}
return out;
} else if (TYPEOF(x) == REALSXP) {
cpp11::writable::doubles out(Rf_xlength(x));
double* ptr = reinterpret_cast<double*>(DATAPTR(x));
for (R_xlen_t i = 0; i < n; i++) {
out[i] = ptr[i];
}
return out;
} else if (TYPEOF(x) == STRSXP) {
cpp11::writable::strings out(Rf_xlength(x));
SEXP* ptr = reinterpret_cast<SEXP*>(DATAPTR(x));
double* ptr = REAL(x);
for (R_xlen_t i = 0; i < n; i++) {
out[i] = ptr[i];
}
Expand Down
20 changes: 19 additions & 1 deletion r/src/arrow_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -173,14 +173,32 @@ template <typename RVector>
class RBuffer : public MutableBuffer {
public:
explicit RBuffer(RVector vec)
: MutableBuffer(reinterpret_cast<uint8_t*>(DATAPTR(vec)),
: MutableBuffer(reinterpret_cast<uint8_t*>(getDataPointer(vec)),
vec.size() * sizeof(typename RVector::value_type),
arrow::CPUDevice::memory_manager(gc_memory_pool())),
vec_(vec) {}

private:
// vec_ holds the memory
RVector vec_;

static void* getDataPointer(RVector& vec) {
if (TYPEOF(vec) == LGLSXP) {
return LOGICAL(vec);
} else if (TYPEOF(vec) == INTSXP) {
return INTEGER(vec);
} else if (TYPEOF(vec) == REALSXP) {
return REAL(vec);
} else if (TYPEOF(vec) == CPLXSXP) {
return COMPLEX(vec);
} else if (TYPEOF(vec) == STRSXP) {
// We don't want to expose the string data here, so we error
cpp11::stop("Operation not supported for string vectors.");
} else {
// raw
return RAW(vec);
}
}
};

std::shared_ptr<arrow::DataType> InferArrowTypeFromFactor(SEXP);
Expand Down
12 changes: 6 additions & 6 deletions r/src/r_to_arrow.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1214,11 +1214,11 @@ bool can_reuse_memory(SEXP x, const std::shared_ptr<arrow::DataType>& type) {
// because MakeSimpleArray below will force materialization
switch (type->id()) {
case Type::INT32:
return TYPEOF(x) == INTSXP && !OBJECT(x);
return TYPEOF(x) == INTSXP && !Rf_isObject(x);
case Type::DOUBLE:
return TYPEOF(x) == REALSXP && !OBJECT(x);
return TYPEOF(x) == REALSXP && !Rf_isObject(x);
case Type::INT8:
return TYPEOF(x) == RAWSXP && !OBJECT(x);
return TYPEOF(x) == RAWSXP && !Rf_isObject(x);
case Type::INT64:
return TYPEOF(x) == REALSXP && Rf_inherits(x, "integer64");
default:
Expand Down Expand Up @@ -1412,17 +1412,17 @@ bool vector_from_r_memory(SEXP x, const std::shared_ptr<DataType>& type,

switch (type->id()) {
case Type::INT32:
return TYPEOF(x) == INTSXP && !OBJECT(x) &&
return TYPEOF(x) == INTSXP && !Rf_isObject(x) &&
vector_from_r_memory_impl<cpp11::integers, Int32Type>(x, type, columns, j,
tasks);

case Type::DOUBLE:
return TYPEOF(x) == REALSXP && !OBJECT(x) &&
return TYPEOF(x) == REALSXP && !Rf_isObject(x) &&
vector_from_r_memory_impl<cpp11::doubles, DoubleType>(x, type, columns, j,
tasks);

case Type::UINT8:
return TYPEOF(x) == RAWSXP && !OBJECT(x) &&
return TYPEOF(x) == RAWSXP && !Rf_isObject(x) &&
vector_from_r_memory_impl<cpp11::raws, UInt8Type>(x, type, columns, j,
tasks);

Expand Down
14 changes: 6 additions & 8 deletions r/tests/testthat/test-altrep.R
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,7 @@ test_that("element access methods for int32 ALTREP with nulls", {
expect_identical(test_arrow_altrep_copy_by_region(altrep, 123), original)
expect_false(test_arrow_altrep_is_materialized(altrep))

# because there are no nulls, DATAPTR() does not materialize
# because there are nulls, DATAPTR() does materialize
expect_identical(test_arrow_altrep_copy_by_dataptr(altrep), original)
expect_true(test_arrow_altrep_is_materialized(altrep))

Expand All @@ -193,7 +193,7 @@ test_that("element access methods for double ALTREP with nulls", {
expect_identical(test_arrow_altrep_copy_by_region(altrep, 123), original)
expect_false(test_arrow_altrep_is_materialized(altrep))

# because there are no nulls, DATAPTR() does not materialize
# because there are nulls, DATAPTR() does materialize
expect_identical(test_arrow_altrep_copy_by_dataptr(altrep), original)
expect_true(test_arrow_altrep_is_materialized(altrep))

Expand Down Expand Up @@ -244,14 +244,13 @@ test_that("element access methods for character ALTREP", {
expect_identical(test_arrow_altrep_copy_by_element(altrep), original)
expect_false(test_arrow_altrep_is_materialized(altrep))

# DATAPTR() should always materialize for strings
expect_identical(test_arrow_altrep_copy_by_dataptr(altrep), original)
# match() calls DATAPTR() internally which materializes the vector
match(altrep, c("1", "40", "999"))
expect_true(test_arrow_altrep_is_materialized(altrep))

# test element access after materialization
expect_true(test_arrow_altrep_is_materialized(altrep))
expect_identical(test_arrow_altrep_copy_by_element(altrep), original)
expect_identical(test_arrow_altrep_copy_by_dataptr(altrep), original)
})

test_that("element access methods for character ALTREP from large_utf8()", {
Expand All @@ -265,14 +264,13 @@ test_that("element access methods for character ALTREP from large_utf8()", {
expect_identical(test_arrow_altrep_copy_by_element(altrep), original)
expect_false(test_arrow_altrep_is_materialized(altrep))

# DATAPTR() should always materialize for strings
expect_identical(test_arrow_altrep_copy_by_dataptr(altrep), original)
# match() calls DATAPTR() internally which materializes the vector
match(altrep, c("1", "40", "999"))
expect_true(test_arrow_altrep_is_materialized(altrep))

# test element access after materialization
expect_true(test_arrow_altrep_is_materialized(altrep))
expect_identical(test_arrow_altrep_copy_by_element(altrep), original)
expect_identical(test_arrow_altrep_copy_by_dataptr(altrep), original)
})

test_that("empty vectors are not altrep", {
Expand Down
Loading