Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
2f835b3
R support for binary arrays
romainfrancois Jun 22, 2020
ba32ce3
added degenerate test case
romainfrancois Jun 22, 2020
34ac484
using Rf_isNull()
romainfrancois Jun 23, 2020
2bfa333
reserve before append
romainfrancois Jun 26, 2020
ad8e72e
... actually Reserve()
romainfrancois Jun 26, 2020
da6fb1a
Less return()
romainfrancois Jun 26, 2020
cd5e2c9
make vctrs::list_of() from List arrays to keep a ptype that matches t…
romainfrancois Jun 26, 2020
0a368aa
additional DataType types, currently not handled
romainfrancois Jun 26, 2020
77e9e46
+ large_binary()
romainfrancois Jun 26, 2020
140dc24
Converter_Binary becomes a template
romainfrancois Jun 26, 2020
0b3051f
BinaryVectorConverter becomes a template to handle both Binary and La…
romainfrancois Jun 26, 2020
a24b840
+ large_utf8()
romainfrancois Jun 26, 2020
9715530
string vector -> large string array
romainfrancois Jun 26, 2020
e934e9a
Large string -> character vector
romainfrancois Jun 26, 2020
d00ccc2
+ StringVectorConverter<> to remove special case for strings
romainfrancois Jun 26, 2020
e4653d6
large_list_of()
romainfrancois Jun 26, 2020
64f5b9f
LargeListArray support
romainfrancois Jun 26, 2020
a3385e4
Update r/tests/testthat/test-Array.R
romainfrancois Jun 26, 2020
249bf51
Update r/tests/testthat/test-Array.R
romainfrancois Jun 26, 2020
ff10327
add as= argument to expect_array_roundtrip()
romainfrancois Jun 27, 2020
db60114
(Repeatedly) convert to UTF-8; tests pass
nealrichardson Jul 1, 2020
3dc462c
Update vignette
nealrichardson Jul 1, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions r/NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,9 @@ export(int16)
export(int32)
export(int64)
export(int8)
export(large_binary)
export(large_list_of)
export(large_utf8)
export(last_col)
export(list_of)
export(map_batches)
Expand Down
18 changes: 18 additions & 0 deletions r/R/array.R
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,8 @@ Array <- R6Class("Array",
shared_ptr(StructArray, self$pointer())
} else if (type_id == Type$LIST) {
shared_ptr(ListArray, self$pointer())
} else if (type_id == Type$LARGE_LIST){
shared_ptr(LargeListArray, self$pointer())
} else {
self
}
Expand Down Expand Up @@ -230,6 +232,22 @@ ListArray <- R6Class("ListArray", inherit = Array,
)
)

#' @rdname array
#' @usage NULL
#' @format NULL
#' @export
LargeListArray <- R6Class("LargeListArray", inherit = Array,
public = list(
values = function() Array$create(LargeListArray__values(self)),
value_length = function(i) LargeListArray__value_length(self, i),
value_offset = function(i) LargeListArray__value_offset(self, i),
raw_value_offsets = function() LargeListArray__raw_value_offsets(self)
),
active = list(
value_type = function() DataType$create(LargeListArray__value_type(self))
)
)

#' @export
length.Array <- function(x) x$length()

Expand Down
40 changes: 40 additions & 0 deletions r/R/arrowExports.R

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 7 additions & 1 deletion r/R/enums.R
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,13 @@ Type <- enum("Type::type",
SPARSE_UNION = 26L,
DENSE_UNION = 27L,
DICTIONARY = 28L,
MAP = 29L
MAP = 29L,
EXTENSION = 30L,
FIXED_SIZE_LIST = 31L,
DURATION = 32L,
LARGE_STRING = 33L,
LARGE_BINARY = 34L,
LARGE_LIST = 35L
)

#' @rdname enums
Expand Down
12 changes: 12 additions & 0 deletions r/R/list.R
Original file line number Diff line number Diff line change
Expand Up @@ -28,3 +28,15 @@ ListType <- R6Class("ListType",
#' @rdname data-type
#' @export
list_of <- function(type) shared_ptr(ListType, list__(type))

LargeListType <- R6Class("LargeListType",
inherit = NestedType,
active = list(
value_field = function() shared_ptr(Field, LargeListType__value_field(self)),
value_type = function() DataType$create(LargeListType__value_type(self))
)
)

#' @rdname data-type
#' @export
large_list_of <- function(type) shared_ptr(LargeListType, large_list__(type))
22 changes: 20 additions & 2 deletions r/R/type.R
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ DataType <- R6Class("DataType",
FLOAT = float32(),
DOUBLE = float64(),
STRING = utf8(),
BINARY = stop("Type BINARY not implemented yet"),
BINARY = binary(),
DATE32 = date32(),
DATE64 = date64(),
TIMESTAMP = shared_ptr(Timestamp, self$pointer()),
Expand All @@ -73,7 +73,13 @@ DataType <- R6Class("DataType",
SPARSE_UNION = stop("Type SPARSE_UNION not implemented yet"),
DENSE_UNION = stop("Type DENSE_UNION not implemented yet"),
DICTIONARY = shared_ptr(DictionaryType, self$pointer()),
MAP = stop("Type MAP not implemented yet")
MAP = stop("Type MAP not implemented yet"),
EXTENSION = stop("Type EXTENSION not implemented yet"),
FIXED_SIZE_LIST = stop("Type FIXED_SIZE_LIST not implemented yet"),
DURATION = stop("Type DURATION not implemented yet"),
LARGE_STRING = large_utf8(),
LARGE_BINARY = large_binary(),
LARGE_LIST = shared_ptr(LargeListType, self$pointer())
)
}
),
Expand Down Expand Up @@ -141,8 +147,10 @@ Float32 <- R6Class("Float32", inherit = FixedWidthType)
Float64 <- R6Class("Float64", inherit = FixedWidthType)
Boolean <- R6Class("Boolean", inherit = FixedWidthType)
Utf8 <- R6Class("Utf8", inherit = DataType)
LargeUtf8 <- R6Class("LargeUtf8", inherit = DataType)
Binary <- R6Class("Binary", inherit = DataType)
FixedSizeBinary <- R6Class("FixedSizeBinary", inherit = FixedWidthType)
LargeBinary <- R6Class("LargeBinary", inherit = DataType)

DateType <- R6Class("DateType",
inherit = FixedWidthType,
Expand Down Expand Up @@ -286,6 +294,10 @@ bool <- boolean
#' @export
utf8 <- function() shared_ptr(Utf8, Utf8__initialize())

#' @rdname data-type
#' @export
large_utf8 <- function() shared_ptr(LargeUtf8, LargeUtf8__initialize())

#' @rdname data-type
#' @export
binary <- function(byte_width = NULL) {
Expand All @@ -296,6 +308,12 @@ binary <- function(byte_width = NULL) {
}
}

#' @rdname data-type
#' @export
large_binary <- function() {
shared_ptr(LargeBinary, LargeBinary__initialize())
}

#' @rdname data-type
#' @export
string <- utf8
Expand Down
9 changes: 9 additions & 0 deletions r/man/data-type.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion r/man/enums.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

31 changes: 31 additions & 0 deletions r/src/array.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -206,29 +206,60 @@ std::shared_ptr<arrow::DataType> ListArray__value_type(
return array->value_type();
}

// [[arrow::export]]
std::shared_ptr<arrow::DataType> LargeListArray__value_type(
const std::shared_ptr<arrow::LargeListArray>& array) {
return array->value_type();
}

// [[arrow::export]]
std::shared_ptr<arrow::Array> ListArray__values(
const std::shared_ptr<arrow::ListArray>& array) {
return array->values();
}

// [[arrow::export]]
std::shared_ptr<arrow::Array> LargeListArray__values(
const std::shared_ptr<arrow::LargeListArray>& array) {
return array->values();
}

// [[arrow::export]]
int32_t ListArray__value_length(const std::shared_ptr<arrow::ListArray>& array,
int64_t i) {
return array->value_length(i);
}

// [[arrow::export]]
int64_t LargeListArray__value_length(const std::shared_ptr<arrow::LargeListArray>& array,
int64_t i) {
return array->value_length(i);
}

// [[arrow::export]]
int32_t ListArray__value_offset(const std::shared_ptr<arrow::ListArray>& array,
int64_t i) {
return array->value_offset(i);
}

// [[arrow::export]]
int64_t LargeListArray__value_offset(const std::shared_ptr<arrow::LargeListArray>& array,
int64_t i) {
return array->value_offset(i);
}

// [[arrow::export]]
Rcpp::IntegerVector ListArray__raw_value_offsets(
const std::shared_ptr<arrow::ListArray>& array) {
auto offsets = array->raw_value_offsets();
return Rcpp::IntegerVector(offsets, offsets + array->length());
}

// [[arrow::export]]
Rcpp::IntegerVector LargeListArray__raw_value_offsets(
const std::shared_ptr<arrow::LargeListArray>& array) {
auto offsets = array->raw_value_offsets();
return Rcpp::IntegerVector(offsets, offsets + array->length());
}

#endif
Loading