From 93468aa641fdb98ac56b94ec312845b17b20d1a8 Mon Sep 17 00:00:00 2001 From: Neal Richardson Date: Fri, 8 Jul 2022 09:45:42 -0400 Subject: [PATCH 1/2] Remove long-deprecated functions and add tests to check deprecation warnings for more recent ones --- r/DESCRIPTION | 1 - r/NAMESPACE | 4 +-- r/NEWS.md | 11 ++++--- r/R/dataset-scan.R | 18 ---------- r/R/deprecated.R | 40 ---------------------- r/R/dplyr-union.R | 2 +- r/man/FileSystem.Rd | 1 + r/man/Scanner.Rd | 3 -- r/man/arrow_info.Rd | 3 ++ r/man/read_ipc_stream.Rd | 11 +++---- r/man/write_ipc_stream.Rd | 7 ++-- r/tests/testthat/test-Table.R | 53 ++++++------------------------ r/tests/testthat/test-arrow-info.R | 4 +++ r/tests/testthat/test-dataset.R | 18 ---------- r/tests/testthat/test-type.R | 9 +++++ 15 files changed, 42 insertions(+), 143 deletions(-) delete mode 100644 r/R/deprecated.R diff --git a/r/DESCRIPTION b/r/DESCRIPTION index 5385877696e..e2670572af3 100644 --- a/r/DESCRIPTION +++ b/r/DESCRIPTION @@ -88,7 +88,6 @@ Collate: 'dataset-partition.R' 'dataset-scan.R' 'dataset-write.R' - 'deprecated.R' 'dictionary.R' 'dplyr-arrange.R' 'dplyr-collect.R' diff --git a/r/NAMESPACE b/r/NAMESPACE index e98cdd51fb7..5762df9eb0c 100644 --- a/r/NAMESPACE +++ b/r/NAMESPACE @@ -195,6 +195,7 @@ export(FileType) export(FixedSizeListArray) export(FixedSizeListType) export(FragmentScanOptions) +export(GcsFileSystem) export(HivePartitioning) export(HivePartitioningFactory) export(InMemoryDataset) @@ -251,6 +252,7 @@ export(arrow_available) export(arrow_info) export(arrow_table) export(arrow_with_dataset) +export(arrow_with_gcs) export(arrow_with_json) export(arrow_with_parquet) export(arrow_with_s3) @@ -330,7 +332,6 @@ export(null) export(num_range) export(one_of) export(open_dataset) -export(read_arrow) export(read_csv_arrow) export(read_delim_arrow) export(read_feather) @@ -366,7 +367,6 @@ export(utf8) export(value_counts) export(vctrs_extension_array) export(vctrs_extension_type) -export(write_arrow) export(write_csv_arrow) export(write_dataset) export(write_feather) diff --git a/r/NEWS.md b/r/NEWS.md index d88be229640..45a963ca48e 100644 --- a/r/NEWS.md +++ b/r/NEWS.md @@ -22,6 +22,7 @@ * `lubridate::parse_date_time()` datetime parser: * `orders` with year, month, day, hours, minutes, and seconds components are supported. * the `orders` argument in the Arrow binding works as follows: `orders` are transformed into `formats` which subsequently get applied in turn. There is no `select_formats` parameter and no inference takes place (like is the case in `lubridate::parse_date_time()`). +* `read_arrow()` and `write_arrow()`, deprecated since 1.0.0 (July 2020), have been removed. Use the `read/write_feather()` and `read/write_ipc_stream()` functions depending on whether you're working with the Arrow IPC file or stream format, respectively. # arrow 8.0.0 @@ -50,7 +51,7 @@ ## Enhancements to date and time support -* `read_csv_arrow()`'s readr-style type `T` is mapped to `timestamp(unit = "ns")` +* `read_csv_arrow()`'s readr-style type `T` is mapped to `timestamp(unit = "ns")` instead of `timestamp(unit = "s")`. * For Arrow dplyr queries, added additional `{lubridate}` features and fixes: * New component extraction functions: @@ -86,14 +87,14 @@ record batches, arrays, chunked arrays, record batch readers, schemas, and data types. This allows other packages to define custom conversions from their types to Arrow objects, including extension arrays. -* Custom [extension types and arrays](https://arrow.apache.org/docs/format/Columnar.html#extension-types) +* Custom [extension types and arrays](https://arrow.apache.org/docs/format/Columnar.html#extension-types) can be created and registered, allowing other packages to define their own array types. Extension arrays wrap regular Arrow array types and provide customized behavior and/or storage. See description and an example with `?new_extension_type`. -* Implemented a generic extension type and as_arrow_array() methods for all objects where - `vctrs::vec_is()` returns TRUE (i.e., any object that can be used as a column in a - `tibble::tibble()`), provided that the underlying `vctrs::vec_data()` can be converted +* Implemented a generic extension type and as_arrow_array() methods for all objects where + `vctrs::vec_is()` returns TRUE (i.e., any object that can be used as a column in a + `tibble::tibble()`), provided that the underlying `vctrs::vec_data()` can be converted to an Arrow Array. ## Concatenation Support diff --git a/r/R/dataset-scan.R b/r/R/dataset-scan.R index a8da1fb60d5..cca92b676fe 100644 --- a/r/R/dataset-scan.R +++ b/r/R/dataset-scan.R @@ -33,8 +33,6 @@ #' * `filter`: A `Expression` to filter the scanned rows by, or `TRUE` (default) #' to keep all rows. #' * `use_threads`: logical: should scanning use multithreading? Default `TRUE` -#' * `use_async`: logical: deprecated, this field no longer has any effect on -#' behavior. #' * `...`: Additional arguments, currently ignored #' @section Methods: #' `ScannerBuilder` has the following methods: @@ -45,7 +43,6 @@ #' - `$UseThreads(threads)`: logical: should the scan use multithreading? #' The method's default input is `TRUE`, but you must call the method to enable #' multithreading because the scanner default is `FALSE`. -#' - `$UseAsync(use_async)`: logical: deprecated, has no effect #' - `$BatchSize(batch_size)`: integer: Maximum row count of scanned record #' batches, default is 32K. If scanned record batches are overflowing memory #' then this method can be called to reduce their size. @@ -73,19 +70,11 @@ Scanner$create <- function(dataset, projection = NULL, filter = TRUE, use_threads = option_use_threads(), - use_async = NULL, batch_size = NULL, fragment_scan_options = NULL, ...) { stop_if_no_datasets() - if (!is.null(use_async)) { - .Deprecated(msg = paste( - "The parameter 'use_async' is deprecated", - "and will be removed in a future release." - )) - } - if (inherits(dataset, "arrow_dplyr_query")) { if (is_collapsed(dataset)) { # TODO: Is there a way to get a RecordBatchReader rather than evaluating? @@ -258,13 +247,6 @@ ScannerBuilder <- R6Class("ScannerBuilder", dataset___ScannerBuilder__UseThreads(self, threads) self }, - UseAsync = function(use_async = TRUE) { - .Deprecated(msg = paste( - "The function 'UseAsync' is deprecated and", - "will be removed in a future release." - )) - self - }, BatchSize = function(batch_size) { dataset___ScannerBuilder__BatchSize(self, batch_size) self diff --git a/r/R/deprecated.R b/r/R/deprecated.R deleted file mode 100644 index e8848c4aa1f..00000000000 --- a/r/R/deprecated.R +++ /dev/null @@ -1,40 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -#' @rdname read_ipc_stream -#' @export -read_arrow <- function(file, ...) { - .Deprecated(msg = "Use 'read_ipc_stream' or 'read_feather' instead.") - if (inherits(file, "raw")) { - read_ipc_stream(file, ...) - } else { - read_feather(file, ...) - } -} - -#' @rdname write_ipc_stream -#' @export -write_arrow <- function(x, sink, ...) { - .Deprecated(msg = "Use 'write_ipc_stream' or 'write_feather' instead.") - if (inherits(sink, "raw")) { - # HACK for sparklyr - # Note that this returns a new R raw vector, not the one passed as `sink` - write_to_raw(x) - } else { - write_feather(x, sink, ...) - } -} diff --git a/r/R/dplyr-union.R b/r/R/dplyr-union.R index 3252d4cecf0..2c0120190ea 100644 --- a/r/R/dplyr-union.R +++ b/r/R/dplyr-union.R @@ -21,7 +21,7 @@ union.arrow_dplyr_query <- function(x, y, ...) { x <- as_adq(x) y <- as_adq(y) - distinct(union_all(x, y)) + dplyr::distinct(dplyr::union_all(x, y)) } union.Dataset <- union.ArrowTabular <- union.RecordBatchReader <- union.arrow_dplyr_query diff --git a/r/man/FileSystem.Rd b/r/man/FileSystem.Rd index 1ed01644650..41d9e925140 100644 --- a/r/man/FileSystem.Rd +++ b/r/man/FileSystem.Rd @@ -5,6 +5,7 @@ \alias{FileSystem} \alias{LocalFileSystem} \alias{S3FileSystem} +\alias{GcsFileSystem} \alias{SubTreeFileSystem} \title{FileSystem classes} \description{ diff --git a/r/man/Scanner.Rd b/r/man/Scanner.Rd index d37383e1653..8f3d708c4ea 100644 --- a/r/man/Scanner.Rd +++ b/r/man/Scanner.Rd @@ -21,8 +21,6 @@ named list of expressions \item \code{filter}: A \code{Expression} to filter the scanned rows by, or \code{TRUE} (default) to keep all rows. \item \code{use_threads}: logical: should scanning use multithreading? Default \code{TRUE} -\item \code{use_async}: logical: deprecated, this field no longer has any effect on -behavior. \item \code{...}: Additional arguments, currently ignored } } @@ -37,7 +35,6 @@ by \code{cols}, a character vector of column names \item \verb{$UseThreads(threads)}: logical: should the scan use multithreading? The method's default input is \code{TRUE}, but you must call the method to enable multithreading because the scanner default is \code{FALSE}. -\item \verb{$UseAsync(use_async)}: logical: deprecated, has no effect \item \verb{$BatchSize(batch_size)}: integer: Maximum row count of scanned record batches, default is 32K. If scanned record batches are overflowing memory then this method can be called to reduce their size. diff --git a/r/man/arrow_info.Rd b/r/man/arrow_info.Rd index dcf13b0866a..5c3bed4aa27 100644 --- a/r/man/arrow_info.Rd +++ b/r/man/arrow_info.Rd @@ -7,6 +7,7 @@ \alias{arrow_with_substrait} \alias{arrow_with_parquet} \alias{arrow_with_s3} +\alias{arrow_with_gcs} \alias{arrow_with_json} \title{Report information on the package's capabilities} \usage{ @@ -22,6 +23,8 @@ arrow_with_parquet() arrow_with_s3() +arrow_with_gcs() + arrow_with_json() } \value{ diff --git a/r/man/read_ipc_stream.Rd b/r/man/read_ipc_stream.Rd index 4cd1e4757ed..567ee9882be 100644 --- a/r/man/read_ipc_stream.Rd +++ b/r/man/read_ipc_stream.Rd @@ -1,12 +1,9 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/deprecated.R, R/ipc-stream.R -\name{read_arrow} -\alias{read_arrow} +% Please edit documentation in R/ipc-stream.R +\name{read_ipc_stream} \alias{read_ipc_stream} \title{Read Arrow IPC stream format} \usage{ -read_arrow(file, ...) - read_ipc_stream(file, as_data_frame = TRUE, ...) } \arguments{ @@ -16,10 +13,10 @@ If a file name or URI, an Arrow \link{InputStream} will be opened and closed when finished. If an input stream is provided, it will be left open.} -\item{...}{extra parameters passed to \code{read_feather()}.} - \item{as_data_frame}{Should the function return a \code{data.frame} (default) or an Arrow \link{Table}?} + +\item{...}{extra parameters passed to \code{read_feather()}.} } \value{ A \code{data.frame} if \code{as_data_frame} is \code{TRUE} (the default), or an diff --git a/r/man/write_ipc_stream.Rd b/r/man/write_ipc_stream.Rd index bcc805b5438..60c31977322 100644 --- a/r/man/write_ipc_stream.Rd +++ b/r/man/write_ipc_stream.Rd @@ -1,12 +1,9 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/deprecated.R, R/ipc-stream.R -\name{write_arrow} -\alias{write_arrow} +% Please edit documentation in R/ipc-stream.R +\name{write_ipc_stream} \alias{write_ipc_stream} \title{Write Arrow IPC stream format} \usage{ -write_arrow(x, sink, ...) - write_ipc_stream(x, sink, ...) } \arguments{ diff --git a/r/tests/testthat/test-Table.R b/r/tests/testthat/test-Table.R index 791e3ce2988..769f4d83297 100644 --- a/r/tests/testthat/test-Table.R +++ b/r/tests/testthat/test-Table.R @@ -15,49 +15,6 @@ # specific language governing permissions and limitations # under the License. -# Common fixtures used in many tests -tbl <- tibble::tibble( - int = 1:10, - dbl = as.numeric(1:10), - lgl = sample(c(TRUE, FALSE, NA), 10, replace = TRUE), - chr = letters[1:10], - fct = factor(letters[1:10]) -) -tab <- Table$create(tbl) - -test_that("read_table handles various input streams (ARROW-3450, ARROW-3505)", { - tbl <- tibble::tibble( - int = 1:10, dbl = as.numeric(1:10), - lgl = sample(c(TRUE, FALSE, NA), 10, replace = TRUE), - chr = letters[1:10] - ) - tab <- Table$create(!!!tbl) - - tf <- tempfile() - on.exit(unlink(tf)) - expect_deprecated( - write_arrow(tab, tf), - "write_feather" - ) - - tab1 <- read_feather(tf, as_data_frame = FALSE) - tab2 <- read_feather(normalizePath(tf), as_data_frame = FALSE) - - readable_file <- ReadableFile$create(tf) - expect_deprecated( - tab3 <- read_arrow(readable_file, as_data_frame = FALSE), - "read_feather" - ) - readable_file$close() - - mmap_file <- mmap_open(tf) - mmap_file$close() - - expect_equal(tab, tab1) - expect_equal(tab, tab2) - expect_equal(tab, tab3) -}) - test_that("Table cast (ARROW-3741)", { tab <- Table$create(x = 1:10, y = 1:10) @@ -98,6 +55,16 @@ test_that("Table $column and $field", { expect_error(tab$field("one")) }) +# Common fixtures used in some of the following tests +tbl <- tibble::tibble( + int = 1:10, + dbl = as.numeric(1:10), + lgl = sample(c(TRUE, FALSE, NA), 10, replace = TRUE), + chr = letters[1:10], + fct = factor(letters[1:10]) +) +tab <- Table$create(tbl) + test_that("[, [[, $ for Table", { expect_identical(names(tab), names(tbl)) diff --git a/r/tests/testthat/test-arrow-info.R b/r/tests/testthat/test-arrow-info.R index 9eac6081486..addd3d740c2 100644 --- a/r/tests/testthat/test-arrow-info.R +++ b/r/tests/testthat/test-arrow-info.R @@ -21,3 +21,7 @@ test_that("arrow_info()", { options(arrow.foo = FALSE) expect_output(print(arrow_info()), "arrow.foo") }) + +test_that("arrow_available() is deprecated", { + expect_deprecated(arrow_available(), "always") +}) diff --git a/r/tests/testthat/test-dataset.R b/r/tests/testthat/test-dataset.R index 02880314c25..8fb9f32c2ad 100644 --- a/r/tests/testthat/test-dataset.R +++ b/r/tests/testthat/test-dataset.R @@ -798,24 +798,6 @@ test_that("Scanner$ScanBatches", { batches <- ds$NewScan()$Finish()$ScanBatches() table <- Table$create(!!!batches) expect_equal(as.data.frame(table), rbind(df1, df2)) - - expect_deprecated(ds$NewScan()$UseAsync(TRUE), paste( - "The function", - "'UseAsync' is deprecated and will be removed in a future release." - )) - expect_deprecated(ds$NewScan()$UseAsync(FALSE), paste( - "The function", - "'UseAsync' is deprecated and will be removed in a future release." - )) - - expect_deprecated(Scanner$create(ds, use_async = TRUE), paste( - "The parameter 'use_async' is deprecated and will be removed in a future", - "release." - )) - expect_deprecated(Scanner$create(ds, use_async = FALSE), paste( - "The parameter 'use_async' is deprecated and will be removed in a future", - "release." - )) }) test_that("Scanner$ToRecordBatchReader()", { diff --git a/r/tests/testthat/test-type.R b/r/tests/testthat/test-type.R index 46da8c9f8c5..d7c6da0792c 100644 --- a/r/tests/testthat/test-type.R +++ b/r/tests/testthat/test-type.R @@ -284,3 +284,12 @@ test_that("infer_type() infers type for vctrs", { vctrs_extension_type(vec[integer(0)]) ) }) + +test_that("type() is deprecated", { + a <- Array$create(1:10) + expect_deprecated( + a_type <- type(a), + "infer_type" + ) + expect_equal(a_type, a$type) +}) From e431b3b2b440096faad26002f93376f40a1c64ee Mon Sep 17 00:00:00 2001 From: Neal Richardson Date: Fri, 8 Jul 2022 11:08:43 -0400 Subject: [PATCH 2/2] Doc with latest roxygen2 --- r/DESCRIPTION | 2 +- r/man/ArrayData.Rd | 6 ++++-- r/man/Scalar.Rd | 6 ++++-- r/man/array.Rd | 6 ++++-- r/man/arrow-package.Rd | 2 +- 5 files changed, 14 insertions(+), 8 deletions(-) diff --git a/r/DESCRIPTION b/r/DESCRIPTION index e2670572af3..2cbbec054a7 100644 --- a/r/DESCRIPTION +++ b/r/DESCRIPTION @@ -40,7 +40,7 @@ Imports: utils, vctrs Roxygen: list(markdown = TRUE, r6 = FALSE, load = "source") -RoxygenNote: 7.1.2 +RoxygenNote: 7.2.0 Config/testthat/edition: 3 VignetteBuilder: knitr Suggests: diff --git a/r/man/ArrayData.Rd b/r/man/ArrayData.Rd index 383ab317d1e..2e27c6cfca5 100644 --- a/r/man/ArrayData.Rd +++ b/r/man/ArrayData.Rd @@ -9,14 +9,16 @@ The \code{ArrayData} class allows you to get and inspect the data inside an \code{arrow::Array}. } \section{Usage}{ -\preformatted{data <- Array$create(x)$data() + + +\if{html}{\out{
}}\preformatted{data <- Array$create(x)$data() data$type data$length data$null_count data$offset data$buffers -} +}\if{html}{\out{
}} } \section{Methods}{ diff --git a/r/man/Scalar.Rd b/r/man/Scalar.Rd index d814c623372..e9eac70776b 100644 --- a/r/man/Scalar.Rd +++ b/r/man/Scalar.Rd @@ -17,12 +17,14 @@ The \code{Scalar$create()} factory method instantiates a \code{Scalar} and takes } \section{Usage}{ -\preformatted{a <- Scalar$create(x) + + +\if{html}{\out{
}}\preformatted{a <- Scalar$create(x) length(a) print(a) a == a -} +}\if{html}{\out{
}} } \section{Methods}{ diff --git a/r/man/array.Rd b/r/man/array.Rd index 371c53ac87a..5a4bc40d95e 100644 --- a/r/man/array.Rd +++ b/r/man/array.Rd @@ -41,12 +41,14 @@ but not limited to strings only) } \section{Usage}{ -\preformatted{a <- Array$create(x) + + +\if{html}{\out{
}}\preformatted{a <- Array$create(x) length(a) print(a) a == a -} +}\if{html}{\out{
}} } \section{Methods}{ diff --git a/r/man/arrow-package.Rd b/r/man/arrow-package.Rd index 2a0143d02e5..e1b6808f6bf 100644 --- a/r/man/arrow-package.Rd +++ b/r/man/arrow-package.Rd @@ -6,7 +6,7 @@ \alias{arrow-package} \title{arrow: Integration to 'Apache' 'Arrow'} \description{ -'Apache' 'Arrow' is a cross-language development platform for in-memory data. It specifies a standardized language-independent columnar memory format for flat and hierarchical data, organized for efficient analytic operations on modern hardware. This package provides an interface to the 'Arrow C++' library. +'Apache' 'Arrow' \url{https://arrow.apache.org/} is a cross-language development platform for in-memory data. It specifies a standardized language-independent columnar memory format for flat and hierarchical data, organized for efficient analytic operations on modern hardware. This package provides an interface to the 'Arrow C++' library. } \seealso{ Useful links: