Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion r/R/List.R
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,11 @@
#' @include R6.R

`arrow::ListType` <- R6Class("arrow::ListType",
inherit = `arrow::NestedType`
inherit = `arrow::NestedType`,
active = list(
value_field = function() shared_ptr(`arrow::Field`, ListType__value_field(self)),
value_type = function() `arrow::DataType`$dispatch(ListType__value_type(self))
)
)

#' @rdname DataType
Expand Down
14 changes: 14 additions & 0 deletions r/R/array.R
Original file line number Diff line number Diff line change
Expand Up @@ -118,12 +118,26 @@
)
)

`arrow::ListArray` <- R6Class("arrow::ListArray", inherit = `arrow::Array`,
public = list(
values = function() `arrow::Array`$dispatch(ListArray__values(self)),
value_length = function(i) ListArray__value_length(self, i),
value_offset = function(i) ListArray__value_offset(self, i),
raw_value_offsets = function() ListArray__raw_value_offsets(self)
),
active = list(
value_type = function() `arrow::DataType`$dispatch(ListArray__value_type(self))
)
)

`arrow::Array`$dispatch <- function(xp){
a <- shared_ptr(`arrow::Array`, xp)
if(a$type_id() == Type$DICTIONARY){
a <- shared_ptr(`arrow::DictionaryArray`, xp)
} else if (a$type_id() == Type$STRUCT) {
a <- shared_ptr(`arrow::StructArray`, xp)
} else if(a$type_id() == Type$LIST) {
a <- shared_ptr(`arrow::ListArray`, xp)
}
a
}
Expand Down
28 changes: 28 additions & 0 deletions r/R/arrowExports.R

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

31 changes: 31 additions & 0 deletions r/src/array.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -140,4 +140,35 @@ arrow::ArrayVector StructArray__Flatten(
return out;
}

// [[arrow::export]]
std::shared_ptr<arrow::DataType> ListArray__value_type(
const std::shared_ptr<arrow::ListArray>& array) {
return array->value_type();
}

// [[arrow::export]]
std::shared_ptr<arrow::Array> ListArray__values(
const std::shared_ptr<arrow::ListArray>& array) {
return array->values();
}

// [[arrow::export]]
int32_t ListArray__value_length(const std::shared_ptr<arrow::ListArray>& array,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These are "low-level" functions, they don't perform any bound checking, you can segfault with the wrong i. Do they need to be exported?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🤷‍♂ what's the rule about what is export worthy ?

int64_t i) {
return array->value_length(i);
}

// [[arrow::export]]
int32_t ListArray__value_offset(const std::shared_ptr<arrow::ListArray>& array,
int64_t i) {
return array->value_offset(i);
}

// [[arrow::export]]
Rcpp::IntegerVector ListArray__raw_value_offsets(
const std::shared_ptr<arrow::ListArray>& array) {
auto offsets = array->raw_value_offsets();
return Rcpp::IntegerVector(offsets, offsets + array->length());
}

#endif
45 changes: 45 additions & 0 deletions r/src/array__to_vector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -547,6 +547,47 @@ class Converter_Decimal : public Converter {
}
};

class Converter_List : public Converter {
public:
explicit Converter_List(const ArrayVector& arrays) : Converter(arrays) {}

SEXP Allocate(R_xlen_t n) const { return Rcpp::List(no_init(n)); }

Status Ingest_all_nulls(SEXP data, R_xlen_t start, R_xlen_t n) const {
// nothing to do, list contain NULL by default
return Status::OK();
}

Status Ingest_some_nulls(SEXP data, const std::shared_ptr<arrow::Array>& array,
R_xlen_t start, R_xlen_t n) const {
using internal::checked_cast;
auto list_array = checked_cast<arrow::ListArray*>(array.get());
auto values_array = list_array->values();

auto ingest_one = [&](R_xlen_t i) {
auto slice =
values_array->Slice(list_array->value_offset(i), list_array->value_length(i));
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a relatively costly thing to do. I'm not sure how we can escape it. Do you keep a reference to the generated Slice/Array, or does it get consumed and transformed in something else. See #4366 (comment) for reference.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

::Allocate() makes an R list, and the various ::Ingest.*() fill that list.

I'll look if/how I can do this without actually calling Slice(), might be easy enough to just scan through values_array I guess.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The challenging part is dealing with the bitmap and null_count.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This can be a huge timesink, I think it's ok for now, but be wary that this will be a problem on large Array.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see. I don't think it's too much trouble to skip using Slice and use the bitmap of the values_array etc ... I'll have a look.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🤔 yeah it's about the Array__as_vector(slice) which needs slice as a proper array already. The Converter api is designed to ingest all of an array, so I guess it would need some extra methods to only ingest a slice of an array without. I'll open another issue for that.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

SET_VECTOR_ELT(data, i + start, Array__as_vector(slice));
};

if (array->null_count()) {
internal::BitmapReader bitmap_reader(array->null_bitmap()->data(), array->offset(),
n);

for (R_xlen_t i = 0; i < n; i++, bitmap_reader.Next()) {
if (bitmap_reader.IsSet()) ingest_one(i);
}

} else {
for (R_xlen_t i = 0; i < n; i++) {
ingest_one(i);
}
}

return Status::OK();
}
};

class Converter_Int64 : public Converter {
public:
explicit Converter_Int64(const ArrayVector& arrays) : Converter(arrays) {}
Expand Down Expand Up @@ -658,9 +699,13 @@ std::shared_ptr<Converter> Converter::Make(const ArrayVector& arrays) {
case Type::DECIMAL:
return std::make_shared<arrow::r::Converter_Decimal>(arrays);

// nested
case Type::STRUCT:
return std::make_shared<arrow::r::Converter_Struct>(arrays);

case Type::LIST:
return std::make_shared<arrow::r::Converter_List>(arrays);

default:
break;
}
Expand Down
114 changes: 114 additions & 0 deletions r/src/arrowExports.cpp

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

12 changes: 12 additions & 0 deletions r/src/datatype.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -281,4 +281,16 @@ int StructType__GetFieldIndex(const std::shared_ptr<arrow::StructType>& type,
return type->GetFieldIndex(name);
}

// [[arrow::export]]
std::shared_ptr<arrow::Field> ListType__value_field(
const std::shared_ptr<arrow::ListType>& type) {
return type->value_field();
}

// [[arrow::export]]
std::shared_ptr<arrow::DataType> ListType__value_type(
const std::shared_ptr<arrow::ListType>& type) {
return type->value_type();
}

#endif
2 changes: 2 additions & 0 deletions r/tests/testthat/test-DataType.R
Original file line number Diff line number Diff line change
Expand Up @@ -297,6 +297,8 @@ test_that("list type works as expected", {
x$children(),
list(field("item", int32()))
)
expect_equal(x$value_type, int32())
expect_equal(x$value_field, field("item", int32()))
})

test_that("struct type works as expected", {
Expand Down
Loading