Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions r/R/ChunkedArray.R
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
`arrow::ChunkedArray` <- R6Class("arrow::ChunkedArray", inherit = `arrow::Object`,
public = list(
length = function() ChunkedArray__length(self),
chunk = function(i) shared_ptr(`arrow::Array`, ChunkedArray__chunk(self, i)),
chunk = function(i) `arrow::Array`$dispatch(ChunkedArray__chunk(self, i)),
as_vector = function() ChunkedArray__as_vector(self),
Slice = function(offset, length = NULL){
if (is.null(length)) {
Expand All @@ -50,7 +50,7 @@
active = list(
null_count = function() ChunkedArray__null_count(self),
num_chunks = function() ChunkedArray__num_chunks(self),
chunks = function() map(ChunkedArray__chunks(self), shared_ptr, class = `arrow::Array`),
chunks = function() map(ChunkedArray__chunks(self), ~ `arrow::Array`$dispatch(.x)),
type = function() `arrow::DataType`$dispatch(ChunkedArray__type(self))
)
)
Expand Down
9 changes: 7 additions & 2 deletions r/R/Struct.R
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,16 @@
#' @include R6.R

`arrow::StructType` <- R6Class("arrow::StructType",
inherit = `arrow::NestedType`
inherit = `arrow::NestedType`,
public = list(
GetFieldByName = function(name) shared_ptr(`arrow::Field`, StructType__GetFieldByName(self, name)),
GetFieldIndex = function(name) StructType__GetFieldIndex(self, name)
)
)

#' @rdname DataType
#' @export
struct <- function(...){
shared_ptr(`arrow::StructType`, struct_(.fields(list(...))))
xp <- struct_(.fields(list(...)))
shared_ptr(`arrow::StructType`, xp)
}
25 changes: 17 additions & 8 deletions r/R/array.R
Original file line number Diff line number Diff line change
Expand Up @@ -103,10 +103,27 @@
)
)

`arrow::DictionaryArray` <- R6Class("arrow::DictionaryArray", inherit = `arrow::Array`,
public = list(
indices = function() `arrow::Array`$dispatch(DictionaryArray__indices(self)),
dictionary = function() `arrow::Array`$dispatch(DictionaryArray__dictionary(self))
)
)

`arrow::StructArray` <- R6Class("arrow::StructArray", inherit = `arrow::Array`,
public = list(
field = function(i) `arrow::Array`$dispatch(StructArray__field(self, i)),
GetFieldByName = function(name) `arrow::Array`$dispatch(StructArray__GetFieldByName(self, name)),
Flatten = function() map(StructArray__Flatten(self), ~ `arrow::Array`$dispatch(.x))
)
)

`arrow::Array`$dispatch <- function(xp){
a <- shared_ptr(`arrow::Array`, xp)
if(a$type_id() == Type$DICTIONARY){
a <- shared_ptr(`arrow::DictionaryArray`, xp)
} else if (a$type_id() == Type$STRUCT) {
a <- shared_ptr(`arrow::StructArray`, xp)
}
a
}
Expand All @@ -126,11 +143,3 @@
array <- function(x, type = NULL){
`arrow::Array`$dispatch(Array__from_vector(x, type))
}

`arrow::DictionaryArray` <- R6Class("arrow::DictionaryArray", inherit = `arrow::Array`,
public = list(
indices = function() `arrow::Array`$dispatch(DictionaryArray__indices(self)),
dictionary = function() `arrow::Array`$dispatch(DictionaryArray__dictionary(self))
)
)

20 changes: 20 additions & 0 deletions r/R/arrowExports.R

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

21 changes: 21 additions & 0 deletions r/src/array.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -119,4 +119,25 @@ std::shared_ptr<arrow::Array> DictionaryArray__dictionary(
return array->dictionary();
}

// [[arrow::export]]
std::shared_ptr<arrow::Array> StructArray__field(
const std::shared_ptr<arrow::StructArray>& array, int i) {
return array->field(i);
}

// [[arrow::export]]
std::shared_ptr<arrow::Array> StructArray__GetFieldByName(
const std::shared_ptr<arrow::StructArray>& array, const std::string& name) {
return array->GetFieldByName(name);
}

// [[arrow::export]]
arrow::ArrayVector StructArray__Flatten(
const std::shared_ptr<arrow::StructArray>& array) {
int nf = array->num_fields();
arrow::ArrayVector out(nf);
STOP_IF_NOT_OK(array->Flatten(arrow::default_memory_pool(), &out));
return out;
}

#endif
62 changes: 62 additions & 0 deletions r/src/array__to_vector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -345,6 +345,65 @@ class Converter_Dictionary : public Converter {
}
};

class Converter_Struct : public Converter {
public:
explicit Converter_Struct(const ArrayVector& arrays) : Converter(arrays), converters() {
auto first_array =
internal::checked_cast<arrow::StructArray*>(Converter::arrays_[0].get());
int nf = first_array->num_fields();
for (int i = 0; i < nf; i++) {
converters.push_back(Converter::Make({first_array->field(i)}));
}
}

SEXP Allocate(R_xlen_t n) const {
// allocate a data frame column to host each array
auto first_array =
internal::checked_cast<arrow::StructArray*>(Converter::arrays_[0].get());
auto type = first_array->struct_type();
int nf = first_array->num_fields();
Rcpp::List out(nf);
Rcpp::CharacterVector colnames(nf);
for (int i = 0; i < nf; i++) {
out[i] = converters[i]->Allocate(n);
colnames[i] = type->child(i)->name();
}
IntegerVector rn(2);
rn[0] = NA_INTEGER;
rn[1] = -n;
Rf_setAttrib(out, symbols::row_names, rn);
Rf_setAttrib(out, R_NamesSymbol, colnames);
Rf_setAttrib(out, R_ClassSymbol, Rf_mkString("data.frame"));
return out;
}

Status Ingest_all_nulls(SEXP data, R_xlen_t start, R_xlen_t n) const {
int nf = converters.size();
for (int i = 0; i < nf; i++) {
STOP_IF_NOT_OK(converters[i]->Ingest_all_nulls(VECTOR_ELT(data, i), start, n));
}
return Status::OK();
}

Status Ingest_some_nulls(SEXP data, const std::shared_ptr<arrow::Array>& array,
R_xlen_t start, R_xlen_t n) const {
auto struct_array = internal::checked_cast<arrow::StructArray*>(array.get());
int nf = converters.size();
// Flatten() deals with merging of nulls
ArrayVector arrays(nf);
STOP_IF_NOT_OK(struct_array->Flatten(default_memory_pool(), &arrays));
for (int i = 0; i < nf; i++) {
STOP_IF_NOT_OK(
converters[i]->Ingest_some_nulls(VECTOR_ELT(data, i), arrays[i], start, n));
}

return Status::OK();
}

private:
std::vector<std::shared_ptr<Converter>> converters;
};

double ms_to_seconds(int64_t ms) { return static_cast<double>(ms / 1000); }

class Converter_Date64 : public Converter {
Expand Down Expand Up @@ -599,6 +658,9 @@ std::shared_ptr<Converter> Converter::Make(const ArrayVector& arrays) {
case Type::DECIMAL:
return std::make_shared<arrow::r::Converter_Decimal>(arrays);

case Type::STRUCT:
return std::make_shared<arrow::r::Converter_Struct>(arrays);

default:
break;
}
Expand Down
84 changes: 84 additions & 0 deletions r/src/arrowExports.cpp

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion r/src/arrow_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ struct symbols {
static SEXP xp;
static SEXP dot_Internal;
static SEXP inspect;
static SEXP row_names;
};
} // namespace r
} // namespace arrow
Expand Down Expand Up @@ -172,9 +173,9 @@ inline std::shared_ptr<T> extract(SEXP x) {
#include <arrow/ipc/feather.h>
#include <arrow/ipc/reader.h>
#include <arrow/ipc/writer.h>
#include <arrow/json/reader.h>
#include <arrow/type.h>
#include <arrow/util/compression.h>
#include <arrow/json/reader.h>

RCPP_EXPOSED_ENUM_NODECL(arrow::Type::type)
RCPP_EXPOSED_ENUM_NODECL(arrow::DateUnit)
Expand Down
12 changes: 12 additions & 0 deletions r/src/datatype.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -269,4 +269,16 @@ bool DictionaryType__ordered(const std::shared_ptr<arrow::DictionaryType>& type)
return type->ordered();
}

// [[arrow::export]]
std::shared_ptr<arrow::Field> StructType__GetFieldByName(
const std::shared_ptr<arrow::StructType>& type, const std::string& name) {
return type->GetFieldByName(name);
}

// [[arrow::export]]
int StructType__GetFieldIndex(const std::shared_ptr<arrow::StructType>& type,
const std::string& name) {
return type->GetFieldIndex(name);
}

#endif
1 change: 1 addition & 0 deletions r/src/symbols.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ SEXP symbols::units = Rf_install("units");
SEXP symbols::xp = Rf_install(".:xp:.");
SEXP symbols::dot_Internal = Rf_install(".Internal");
SEXP symbols::inspect = Rf_install("inspect");
SEXP symbols::row_names = Rf_install("row.names");

void inspect(SEXP obj) {
Rcpp::Shield<SEXP> call_inspect(Rf_lang2(symbols::inspect, obj));
Expand Down
7 changes: 7 additions & 0 deletions r/tests/testthat/test-DataType.R
Original file line number Diff line number Diff line change
Expand Up @@ -311,6 +311,13 @@ test_that("struct type works as expected", {
x$children(),
list(field("x", int32()), field("y", boolean()))
)
expect_equal(x$GetFieldIndex("x"), 0L)
expect_equal(x$GetFieldIndex("y"), 1L)
expect_equal(x$GetFieldIndex("z"), -1L)

expect_equal(x$GetFieldByName("x"), field("x", int32()))
expect_equal(x$GetFieldByName("y"), field("y", boolean()))
expect_null(x$GetFieldByName("z"))
})

test_that("DictionaryType works as expected (ARROW-3355)", {
Expand Down
Loading