Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion r/NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@ S3method(CompressedOutputStream,fs_path)
S3method(FeatherTableReader,"arrow::io::RandomAccessFile")
S3method(FeatherTableReader,"arrow::ipc::feather::TableReader")
S3method(FeatherTableReader,character)
S3method(FeatherTableReader,default)
S3method(FeatherTableReader,fs_path)
S3method(FeatherTableReader,raw)
S3method(FeatherTableWriter,"arrow::io::OutputStream")
S3method(FixedSizeBufferWriter,"arrow::Buffer")
S3method(FixedSizeBufferWriter,default)
Expand Down Expand Up @@ -65,6 +65,7 @@ S3method(names,"arrow::RecordBatch")
S3method(parquet_file_reader,"arrow::io::RandomAccessFile")
S3method(parquet_file_reader,character)
S3method(parquet_file_reader,fs_path)
S3method(parquet_file_reader,raw)
S3method(print,"arrow-enum")
S3method(read_message,"arrow::io::InputStream")
S3method(read_message,"arrow::ipc::MessageReader")
Expand Down
1 change: 1 addition & 0 deletions r/NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
# arrow 0.14.1.9000

* `read_csv_arrow()` supports more parsing options, including `col_names` and `skip`
* `read_parquet()` and `read_feather()` can ingest data from a `raw` vector ([ARROW-6278](https://issues.apache.org/jira/browse/ARROW-6278))

# arrow 0.14.1

Expand Down
10 changes: 5 additions & 5 deletions r/R/feather.R
Original file line number Diff line number Diff line change
Expand Up @@ -137,11 +137,6 @@ FeatherTableReader <- function(file, mmap = TRUE, ...){
UseMethod("FeatherTableReader")
}

#' @export
FeatherTableReader.default <- function(file, mmap = TRUE, ...) {
stop("unsupported")
}

#' @export
FeatherTableReader.character <- function(file, mmap = TRUE, ...) {
FeatherTableReader(fs::path_abs(file), mmap = mmap, ...)
Expand All @@ -157,6 +152,11 @@ FeatherTableReader.fs_path <- function(file, mmap = TRUE, ...) {
FeatherTableReader(stream)
}

#' @export
FeatherTableReader.raw <- function(file, mmap = TRUE, ...) {
FeatherTableReader(BufferReader(file), mmap = mmap, ...)
}

#' @export
`FeatherTableReader.arrow::io::RandomAccessFile` <- function(file, mmap = TRUE, ...){
unique_ptr(`arrow::ipc::feather::TableReader`, ipc___feather___TableReader__Open(file))
Expand Down
5 changes: 5 additions & 0 deletions r/R/parquet.R
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,11 @@ parquet_file_reader.character <- function(file, props = parquet_arrow_reader_pro
parquet_file_reader(fs::path_abs(file), props = parquet_arrow_reader_properties(), memory_map = memory_map, ...)
}

#' @export
parquet_file_reader.raw <- function(file, props = parquet_arrow_reader_properties(), memory_map = TRUE, ...) {
parquet_file_reader(BufferReader(file), props = parquet_arrow_reader_properties(), memory_map = memory_map, ...)
}

#' Read a Parquet file
#'
#' '[Parquet](https://parquet.apache.org/)' is a columnar storage file format.
Expand Down
65 changes: 21 additions & 44 deletions r/tests/testthat/test-feather.R
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,15 @@

context("Feather")

test_that("feather read/write round trip", {
tib <- tibble::tibble(x = 1:10, y = rnorm(10), z = letters[1:10])
feather_file <- tempfile()
tib <- tibble::tibble(x = 1:10, y = rnorm(10), z = letters[1:10])

tf1 <- tempfile()
write_feather(tib, tf1)
expect_true(fs::file_exists(tf1))
test_that("Write a feather file", {
write_feather(tib, feather_file)
expect_true(fs::file_exists(feather_file))
})

test_that("feather read/write round trip", {
tf2 <- fs::path_abs(tempfile())
write_feather(tib, tf2)
expect_true(fs::file_exists(tf2))
Expand All @@ -34,7 +36,7 @@ test_that("feather read/write round trip", {
stream$close()
expect_true(fs::file_exists(tf3))

tab1 <- read_feather(tf1)
tab1 <- read_feather(feather_file)
expect_is(tab1, "data.frame")

tab2 <- read_feather(tf2)
Expand All @@ -57,76 +59,51 @@ test_that("feather read/write round trip", {
expect_equal(tib, tab4)
expect_equal(tib, tab5)

unlink(tf1)
unlink(tf2)
unlink(tf3)
})

test_that("feather handles col_select = <names>", {
tib <- tibble::tibble(x = 1:10, y = rnorm(10), z = letters[1:10])

tf1 <- tempfile()
write_feather(tib, tf1)
expect_true(fs::file_exists(tf1))

tab1 <- read_feather(tf1, col_select = c("x", "y"))
tab1 <- read_feather(feather_file, col_select = c("x", "y"))
expect_is(tab1, "data.frame")

expect_equal(tib$x, tab1$x)
expect_equal(tib$y, tab1$y)

unlink(tf1)
})

test_that("feather handles col_select = <integer>", {
tib <- tibble::tibble(x = 1:10, y = rnorm(10), z = letters[1:10])

tf1 <- tempfile()
write_feather(tib, tf1)
expect_true(fs::file_exists(tf1))

tab1 <- read_feather(tf1, col_select = 1:2)
tab1 <- read_feather(feather_file, col_select = 1:2)
expect_is(tab1, "data.frame")

expect_equal(tib$x, tab1$x)
expect_equal(tib$y, tab1$y)
unlink(tf1)
})

test_that("feather handles col_select = <tidyselect helper>", {
tib <- tibble::tibble(x = 1:10, y = rnorm(10), z = letters[1:10])

tf1 <- tempfile()
write_feather(tib, tf1)
expect_true(fs::file_exists(tf1))

tab1 <- read_feather(tf1, col_select = everything())
tab1 <- read_feather(feather_file, col_select = everything())
expect_identical(tib, tab1)

tab2 <- read_feather(tf1, col_select = starts_with("x"))
tab2 <- read_feather(feather_file, col_select = starts_with("x"))
expect_identical(tab2, tib[, "x", drop = FALSE])

tab3 <- read_feather(tf1, col_select = c(starts_with("x"), contains("y")))
tab3 <- read_feather(feather_file, col_select = c(starts_with("x"), contains("y")))
expect_identical(tab3, tib[, c("x", "y"), drop = FALSE])

tab4 <- read_feather(tf1, col_select = -z)
tab4 <- read_feather(feather_file, col_select = -z)
expect_identical(tab4, tib[, c("x", "y"), drop = FALSE])

unlink(tf1)
})

test_that("feather read/write round trip", {
tib <- tibble::tibble(x = 1:10, y = rnorm(10), z = letters[1:10])

tf1 <- tempfile()
write_feather(tib, tf1)
expect_true(fs::file_exists(tf1))

tab1 <- read_feather(tf1, as_tibble = FALSE)
tab1 <- read_feather(feather_file, as_tibble = FALSE)
expect_is(tab1, "arrow::Table")

expect_equal(tib, as.data.frame(tab1))
unlink(tf1)
})

test_that("Read feather from raw vector", {
test_raw <- readBin(feather_file, what = "raw", n = 5000)
df <- read_feather(test_raw)
expect_is(df, "data.frame")
})

unlink(feather_file)
6 changes: 6 additions & 0 deletions r/tests/testthat/test-parquet.R
Original file line number Diff line number Diff line change
Expand Up @@ -43,3 +43,9 @@ test_that("read_parquet() supports col_select", {
df <- read_parquet(pq_file, col_select = starts_with("c"))
expect_equal(names(df), c("carat", "cut", "color", "clarity"))
})

test_that("read_parquet() with raw data", {
test_raw <- readBin(pq_file, what = "raw", n = 5000)
df <- read_parquet(test_raw)
expect_identical(dim(df), c(10L, 11L))
})