diff --git a/r/DESCRIPTION b/r/DESCRIPTION
index 5385877696e..2cbbec054a7 100644
--- a/r/DESCRIPTION
+++ b/r/DESCRIPTION
@@ -40,7 +40,7 @@ Imports:
utils,
vctrs
Roxygen: list(markdown = TRUE, r6 = FALSE, load = "source")
-RoxygenNote: 7.1.2
+RoxygenNote: 7.2.0
Config/testthat/edition: 3
VignetteBuilder: knitr
Suggests:
@@ -88,7 +88,6 @@ Collate:
'dataset-partition.R'
'dataset-scan.R'
'dataset-write.R'
- 'deprecated.R'
'dictionary.R'
'dplyr-arrange.R'
'dplyr-collect.R'
diff --git a/r/NAMESPACE b/r/NAMESPACE
index e98cdd51fb7..5762df9eb0c 100644
--- a/r/NAMESPACE
+++ b/r/NAMESPACE
@@ -195,6 +195,7 @@ export(FileType)
export(FixedSizeListArray)
export(FixedSizeListType)
export(FragmentScanOptions)
+export(GcsFileSystem)
export(HivePartitioning)
export(HivePartitioningFactory)
export(InMemoryDataset)
@@ -251,6 +252,7 @@ export(arrow_available)
export(arrow_info)
export(arrow_table)
export(arrow_with_dataset)
+export(arrow_with_gcs)
export(arrow_with_json)
export(arrow_with_parquet)
export(arrow_with_s3)
@@ -330,7 +332,6 @@ export(null)
export(num_range)
export(one_of)
export(open_dataset)
-export(read_arrow)
export(read_csv_arrow)
export(read_delim_arrow)
export(read_feather)
@@ -366,7 +367,6 @@ export(utf8)
export(value_counts)
export(vctrs_extension_array)
export(vctrs_extension_type)
-export(write_arrow)
export(write_csv_arrow)
export(write_dataset)
export(write_feather)
diff --git a/r/NEWS.md b/r/NEWS.md
index d88be229640..45a963ca48e 100644
--- a/r/NEWS.md
+++ b/r/NEWS.md
@@ -22,6 +22,7 @@
* `lubridate::parse_date_time()` datetime parser:
* `orders` with year, month, day, hours, minutes, and seconds components are supported.
* the `orders` argument in the Arrow binding works as follows: `orders` are transformed into `formats` which subsequently get applied in turn. There is no `select_formats` parameter and no inference takes place (like is the case in `lubridate::parse_date_time()`).
+* `read_arrow()` and `write_arrow()`, deprecated since 1.0.0 (July 2020), have been removed. Use the `read/write_feather()` and `read/write_ipc_stream()` functions depending on whether you're working with the Arrow IPC file or stream format, respectively.
# arrow 8.0.0
@@ -50,7 +51,7 @@
## Enhancements to date and time support
-* `read_csv_arrow()`'s readr-style type `T` is mapped to `timestamp(unit = "ns")`
+* `read_csv_arrow()`'s readr-style type `T` is mapped to `timestamp(unit = "ns")`
instead of `timestamp(unit = "s")`.
* For Arrow dplyr queries, added additional `{lubridate}` features and fixes:
* New component extraction functions:
@@ -86,14 +87,14 @@
record batches, arrays, chunked arrays, record batch readers, schemas, and
data types. This allows other packages to define custom conversions from their
types to Arrow objects, including extension arrays.
-* Custom [extension types and arrays](https://arrow.apache.org/docs/format/Columnar.html#extension-types)
+* Custom [extension types and arrays](https://arrow.apache.org/docs/format/Columnar.html#extension-types)
can be created and registered, allowing other packages to
define their own array types. Extension arrays wrap regular Arrow array types and
provide customized behavior and/or storage. See description and an example with
`?new_extension_type`.
-* Implemented a generic extension type and as_arrow_array() methods for all objects where
- `vctrs::vec_is()` returns TRUE (i.e., any object that can be used as a column in a
- `tibble::tibble()`), provided that the underlying `vctrs::vec_data()` can be converted
+* Implemented a generic extension type and as_arrow_array() methods for all objects where
+ `vctrs::vec_is()` returns TRUE (i.e., any object that can be used as a column in a
+ `tibble::tibble()`), provided that the underlying `vctrs::vec_data()` can be converted
to an Arrow Array.
## Concatenation Support
diff --git a/r/R/dataset-scan.R b/r/R/dataset-scan.R
index a8da1fb60d5..cca92b676fe 100644
--- a/r/R/dataset-scan.R
+++ b/r/R/dataset-scan.R
@@ -33,8 +33,6 @@
#' * `filter`: A `Expression` to filter the scanned rows by, or `TRUE` (default)
#' to keep all rows.
#' * `use_threads`: logical: should scanning use multithreading? Default `TRUE`
-#' * `use_async`: logical: deprecated, this field no longer has any effect on
-#' behavior.
#' * `...`: Additional arguments, currently ignored
#' @section Methods:
#' `ScannerBuilder` has the following methods:
@@ -45,7 +43,6 @@
#' - `$UseThreads(threads)`: logical: should the scan use multithreading?
#' The method's default input is `TRUE`, but you must call the method to enable
#' multithreading because the scanner default is `FALSE`.
-#' - `$UseAsync(use_async)`: logical: deprecated, has no effect
#' - `$BatchSize(batch_size)`: integer: Maximum row count of scanned record
#' batches, default is 32K. If scanned record batches are overflowing memory
#' then this method can be called to reduce their size.
@@ -73,19 +70,11 @@ Scanner$create <- function(dataset,
projection = NULL,
filter = TRUE,
use_threads = option_use_threads(),
- use_async = NULL,
batch_size = NULL,
fragment_scan_options = NULL,
...) {
stop_if_no_datasets()
- if (!is.null(use_async)) {
- .Deprecated(msg = paste(
- "The parameter 'use_async' is deprecated",
- "and will be removed in a future release."
- ))
- }
-
if (inherits(dataset, "arrow_dplyr_query")) {
if (is_collapsed(dataset)) {
# TODO: Is there a way to get a RecordBatchReader rather than evaluating?
@@ -258,13 +247,6 @@ ScannerBuilder <- R6Class("ScannerBuilder",
dataset___ScannerBuilder__UseThreads(self, threads)
self
},
- UseAsync = function(use_async = TRUE) {
- .Deprecated(msg = paste(
- "The function 'UseAsync' is deprecated and",
- "will be removed in a future release."
- ))
- self
- },
BatchSize = function(batch_size) {
dataset___ScannerBuilder__BatchSize(self, batch_size)
self
diff --git a/r/R/deprecated.R b/r/R/deprecated.R
deleted file mode 100644
index e8848c4aa1f..00000000000
--- a/r/R/deprecated.R
+++ /dev/null
@@ -1,40 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-#' @rdname read_ipc_stream
-#' @export
-read_arrow <- function(file, ...) {
- .Deprecated(msg = "Use 'read_ipc_stream' or 'read_feather' instead.")
- if (inherits(file, "raw")) {
- read_ipc_stream(file, ...)
- } else {
- read_feather(file, ...)
- }
-}
-
-#' @rdname write_ipc_stream
-#' @export
-write_arrow <- function(x, sink, ...) {
- .Deprecated(msg = "Use 'write_ipc_stream' or 'write_feather' instead.")
- if (inherits(sink, "raw")) {
- # HACK for sparklyr
- # Note that this returns a new R raw vector, not the one passed as `sink`
- write_to_raw(x)
- } else {
- write_feather(x, sink, ...)
- }
-}
diff --git a/r/R/dplyr-union.R b/r/R/dplyr-union.R
index 3252d4cecf0..2c0120190ea 100644
--- a/r/R/dplyr-union.R
+++ b/r/R/dplyr-union.R
@@ -21,7 +21,7 @@ union.arrow_dplyr_query <- function(x, y, ...) {
x <- as_adq(x)
y <- as_adq(y)
- distinct(union_all(x, y))
+ dplyr::distinct(dplyr::union_all(x, y))
}
union.Dataset <- union.ArrowTabular <- union.RecordBatchReader <- union.arrow_dplyr_query
diff --git a/r/man/ArrayData.Rd b/r/man/ArrayData.Rd
index 383ab317d1e..2e27c6cfca5 100644
--- a/r/man/ArrayData.Rd
+++ b/r/man/ArrayData.Rd
@@ -9,14 +9,16 @@ The \code{ArrayData} class allows you to get and inspect the data
inside an \code{arrow::Array}.
}
\section{Usage}{
-\preformatted{data <- Array$create(x)$data()
+
+
+\if{html}{\out{
}}\preformatted{data <- Array$create(x)$data()
data$type
data$length
data$null_count
data$offset
data$buffers
-}
+}\if{html}{\out{
}}
}
\section{Methods}{
diff --git a/r/man/FileSystem.Rd b/r/man/FileSystem.Rd
index 1ed01644650..41d9e925140 100644
--- a/r/man/FileSystem.Rd
+++ b/r/man/FileSystem.Rd
@@ -5,6 +5,7 @@
\alias{FileSystem}
\alias{LocalFileSystem}
\alias{S3FileSystem}
+\alias{GcsFileSystem}
\alias{SubTreeFileSystem}
\title{FileSystem classes}
\description{
diff --git a/r/man/Scalar.Rd b/r/man/Scalar.Rd
index d814c623372..e9eac70776b 100644
--- a/r/man/Scalar.Rd
+++ b/r/man/Scalar.Rd
@@ -17,12 +17,14 @@ The \code{Scalar$create()} factory method instantiates a \code{Scalar} and takes
}
\section{Usage}{
-\preformatted{a <- Scalar$create(x)
+
+
+\if{html}{\out{}}\preformatted{a <- Scalar$create(x)
length(a)
print(a)
a == a
-}
+}\if{html}{\out{
}}
}
\section{Methods}{
diff --git a/r/man/Scanner.Rd b/r/man/Scanner.Rd
index d37383e1653..8f3d708c4ea 100644
--- a/r/man/Scanner.Rd
+++ b/r/man/Scanner.Rd
@@ -21,8 +21,6 @@ named list of expressions
\item \code{filter}: A \code{Expression} to filter the scanned rows by, or \code{TRUE} (default)
to keep all rows.
\item \code{use_threads}: logical: should scanning use multithreading? Default \code{TRUE}
-\item \code{use_async}: logical: deprecated, this field no longer has any effect on
-behavior.
\item \code{...}: Additional arguments, currently ignored
}
}
@@ -37,7 +35,6 @@ by \code{cols}, a character vector of column names
\item \verb{$UseThreads(threads)}: logical: should the scan use multithreading?
The method's default input is \code{TRUE}, but you must call the method to enable
multithreading because the scanner default is \code{FALSE}.
-\item \verb{$UseAsync(use_async)}: logical: deprecated, has no effect
\item \verb{$BatchSize(batch_size)}: integer: Maximum row count of scanned record
batches, default is 32K. If scanned record batches are overflowing memory
then this method can be called to reduce their size.
diff --git a/r/man/array.Rd b/r/man/array.Rd
index 371c53ac87a..5a4bc40d95e 100644
--- a/r/man/array.Rd
+++ b/r/man/array.Rd
@@ -41,12 +41,14 @@ but not limited to strings only)
}
\section{Usage}{
-\preformatted{a <- Array$create(x)
+
+
+\if{html}{\out{}}\preformatted{a <- Array$create(x)
length(a)
print(a)
a == a
-}
+}\if{html}{\out{
}}
}
\section{Methods}{
diff --git a/r/man/arrow-package.Rd b/r/man/arrow-package.Rd
index 2a0143d02e5..e1b6808f6bf 100644
--- a/r/man/arrow-package.Rd
+++ b/r/man/arrow-package.Rd
@@ -6,7 +6,7 @@
\alias{arrow-package}
\title{arrow: Integration to 'Apache' 'Arrow'}
\description{
-'Apache' 'Arrow' is a cross-language development platform for in-memory data. It specifies a standardized language-independent columnar memory format for flat and hierarchical data, organized for efficient analytic operations on modern hardware. This package provides an interface to the 'Arrow C++' library.
+'Apache' 'Arrow' \url{https://arrow.apache.org/} is a cross-language development platform for in-memory data. It specifies a standardized language-independent columnar memory format for flat and hierarchical data, organized for efficient analytic operations on modern hardware. This package provides an interface to the 'Arrow C++' library.
}
\seealso{
Useful links:
diff --git a/r/man/arrow_info.Rd b/r/man/arrow_info.Rd
index dcf13b0866a..5c3bed4aa27 100644
--- a/r/man/arrow_info.Rd
+++ b/r/man/arrow_info.Rd
@@ -7,6 +7,7 @@
\alias{arrow_with_substrait}
\alias{arrow_with_parquet}
\alias{arrow_with_s3}
+\alias{arrow_with_gcs}
\alias{arrow_with_json}
\title{Report information on the package's capabilities}
\usage{
@@ -22,6 +23,8 @@ arrow_with_parquet()
arrow_with_s3()
+arrow_with_gcs()
+
arrow_with_json()
}
\value{
diff --git a/r/man/read_ipc_stream.Rd b/r/man/read_ipc_stream.Rd
index 4cd1e4757ed..567ee9882be 100644
--- a/r/man/read_ipc_stream.Rd
+++ b/r/man/read_ipc_stream.Rd
@@ -1,12 +1,9 @@
% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/deprecated.R, R/ipc-stream.R
-\name{read_arrow}
-\alias{read_arrow}
+% Please edit documentation in R/ipc-stream.R
+\name{read_ipc_stream}
\alias{read_ipc_stream}
\title{Read Arrow IPC stream format}
\usage{
-read_arrow(file, ...)
-
read_ipc_stream(file, as_data_frame = TRUE, ...)
}
\arguments{
@@ -16,10 +13,10 @@ If a file name or URI, an Arrow \link{InputStream} will be opened and
closed when finished. If an input stream is provided, it will be left
open.}
-\item{...}{extra parameters passed to \code{read_feather()}.}
-
\item{as_data_frame}{Should the function return a \code{data.frame} (default) or
an Arrow \link{Table}?}
+
+\item{...}{extra parameters passed to \code{read_feather()}.}
}
\value{
A \code{data.frame} if \code{as_data_frame} is \code{TRUE} (the default), or an
diff --git a/r/man/write_ipc_stream.Rd b/r/man/write_ipc_stream.Rd
index bcc805b5438..60c31977322 100644
--- a/r/man/write_ipc_stream.Rd
+++ b/r/man/write_ipc_stream.Rd
@@ -1,12 +1,9 @@
% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/deprecated.R, R/ipc-stream.R
-\name{write_arrow}
-\alias{write_arrow}
+% Please edit documentation in R/ipc-stream.R
+\name{write_ipc_stream}
\alias{write_ipc_stream}
\title{Write Arrow IPC stream format}
\usage{
-write_arrow(x, sink, ...)
-
write_ipc_stream(x, sink, ...)
}
\arguments{
diff --git a/r/tests/testthat/test-Table.R b/r/tests/testthat/test-Table.R
index 791e3ce2988..769f4d83297 100644
--- a/r/tests/testthat/test-Table.R
+++ b/r/tests/testthat/test-Table.R
@@ -15,49 +15,6 @@
# specific language governing permissions and limitations
# under the License.
-# Common fixtures used in many tests
-tbl <- tibble::tibble(
- int = 1:10,
- dbl = as.numeric(1:10),
- lgl = sample(c(TRUE, FALSE, NA), 10, replace = TRUE),
- chr = letters[1:10],
- fct = factor(letters[1:10])
-)
-tab <- Table$create(tbl)
-
-test_that("read_table handles various input streams (ARROW-3450, ARROW-3505)", {
- tbl <- tibble::tibble(
- int = 1:10, dbl = as.numeric(1:10),
- lgl = sample(c(TRUE, FALSE, NA), 10, replace = TRUE),
- chr = letters[1:10]
- )
- tab <- Table$create(!!!tbl)
-
- tf <- tempfile()
- on.exit(unlink(tf))
- expect_deprecated(
- write_arrow(tab, tf),
- "write_feather"
- )
-
- tab1 <- read_feather(tf, as_data_frame = FALSE)
- tab2 <- read_feather(normalizePath(tf), as_data_frame = FALSE)
-
- readable_file <- ReadableFile$create(tf)
- expect_deprecated(
- tab3 <- read_arrow(readable_file, as_data_frame = FALSE),
- "read_feather"
- )
- readable_file$close()
-
- mmap_file <- mmap_open(tf)
- mmap_file$close()
-
- expect_equal(tab, tab1)
- expect_equal(tab, tab2)
- expect_equal(tab, tab3)
-})
-
test_that("Table cast (ARROW-3741)", {
tab <- Table$create(x = 1:10, y = 1:10)
@@ -98,6 +55,16 @@ test_that("Table $column and $field", {
expect_error(tab$field("one"))
})
+# Common fixtures used in some of the following tests
+tbl <- tibble::tibble(
+ int = 1:10,
+ dbl = as.numeric(1:10),
+ lgl = sample(c(TRUE, FALSE, NA), 10, replace = TRUE),
+ chr = letters[1:10],
+ fct = factor(letters[1:10])
+)
+tab <- Table$create(tbl)
+
test_that("[, [[, $ for Table", {
expect_identical(names(tab), names(tbl))
diff --git a/r/tests/testthat/test-arrow-info.R b/r/tests/testthat/test-arrow-info.R
index 9eac6081486..addd3d740c2 100644
--- a/r/tests/testthat/test-arrow-info.R
+++ b/r/tests/testthat/test-arrow-info.R
@@ -21,3 +21,7 @@ test_that("arrow_info()", {
options(arrow.foo = FALSE)
expect_output(print(arrow_info()), "arrow.foo")
})
+
+test_that("arrow_available() is deprecated", {
+ expect_deprecated(arrow_available(), "always")
+})
diff --git a/r/tests/testthat/test-dataset.R b/r/tests/testthat/test-dataset.R
index 02880314c25..8fb9f32c2ad 100644
--- a/r/tests/testthat/test-dataset.R
+++ b/r/tests/testthat/test-dataset.R
@@ -798,24 +798,6 @@ test_that("Scanner$ScanBatches", {
batches <- ds$NewScan()$Finish()$ScanBatches()
table <- Table$create(!!!batches)
expect_equal(as.data.frame(table), rbind(df1, df2))
-
- expect_deprecated(ds$NewScan()$UseAsync(TRUE), paste(
- "The function",
- "'UseAsync' is deprecated and will be removed in a future release."
- ))
- expect_deprecated(ds$NewScan()$UseAsync(FALSE), paste(
- "The function",
- "'UseAsync' is deprecated and will be removed in a future release."
- ))
-
- expect_deprecated(Scanner$create(ds, use_async = TRUE), paste(
- "The parameter 'use_async' is deprecated and will be removed in a future",
- "release."
- ))
- expect_deprecated(Scanner$create(ds, use_async = FALSE), paste(
- "The parameter 'use_async' is deprecated and will be removed in a future",
- "release."
- ))
})
test_that("Scanner$ToRecordBatchReader()", {
diff --git a/r/tests/testthat/test-type.R b/r/tests/testthat/test-type.R
index 46da8c9f8c5..d7c6da0792c 100644
--- a/r/tests/testthat/test-type.R
+++ b/r/tests/testthat/test-type.R
@@ -284,3 +284,12 @@ test_that("infer_type() infers type for vctrs", {
vctrs_extension_type(vec[integer(0)])
)
})
+
+test_that("type() is deprecated", {
+ a <- Array$create(1:10)
+ expect_deprecated(
+ a_type <- type(a),
+ "infer_type"
+ )
+ expect_equal(a_type, a$type)
+})