From 0d45908da1729f13c9a2a78a90f779fcb686a2d0 Mon Sep 17 00:00:00 2001
From: David Li
Date: Thu, 15 Apr 2021 11:03:39 -0400
Subject: [PATCH] ARROW-12408: [R] Delete Scan()
---
r/NEWS.md | 1 +
r/R/arrowExports.R | 4 ----
r/R/dataset-scan.R | 7 +------
r/src/arrowExports.cpp | 16 ----------------
r/src/dataset.cpp | 29 -----------------------------
r/tests/testthat/test-dataset.R | 10 +---------
6 files changed, 3 insertions(+), 64 deletions(-)
diff --git a/r/NEWS.md b/r/NEWS.md
index cd8c31fb8b0..312f99e3f41 100644
--- a/r/NEWS.md
+++ b/r/NEWS.md
@@ -45,6 +45,7 @@ Over 100 functions can now be called on Arrow objects inside a `dplyr` verb:
* `write_dataset()` now defaults to `format = "parquet"` and better validates the `format` argument
* Invalid input for `schema` in `open_dataset()` is now correctly handled
* Collecting 0 columns from a Dataset now no longer returns all of the columns
+* The `Scanner$Scan()` method has been removed; use `Scanner$ScanBatches()`
## Other improvements
diff --git a/r/R/arrowExports.R b/r/R/arrowExports.R
index c432a135524..51cdcf85df0 100644
--- a/r/R/arrowExports.R
+++ b/r/R/arrowExports.R
@@ -512,10 +512,6 @@ dataset___Scanner__head <- function(scanner, n){
.Call(`_arrow_dataset___Scanner__head`, scanner, n)
}
-dataset___Scanner__Scan <- function(scanner){
- .Call(`_arrow_dataset___Scanner__Scan`, scanner)
-}
-
dataset___Scanner__schema <- function(sc){
.Call(`_arrow_dataset___Scanner__schema`, sc)
}
diff --git a/r/R/dataset-scan.R b/r/R/dataset-scan.R
index 8bec8978098..750401e1736 100644
--- a/r/R/dataset-scan.R
+++ b/r/R/dataset-scan.R
@@ -56,12 +56,7 @@
Scanner <- R6Class("Scanner", inherit = ArrowObject,
public = list(
ToTable = function() dataset___Scanner__ToTable(self),
- ScanBatches = function() dataset___Scanner__ScanBatches(self),
- Scan = function() {
- # Planned for removal in ARROW-11782
- .Deprecated("ScanBatches")
- dataset___Scanner__Scan(self)
- }
+ ScanBatches = function() dataset___Scanner__ScanBatches(self)
),
active = list(
schema = function() dataset___Scanner__schema(self)
diff --git a/r/src/arrowExports.cpp b/r/src/arrowExports.cpp
index 42532e6c3c2..87f0130eeff 100644
--- a/r/src/arrowExports.cpp
+++ b/r/src/arrowExports.cpp
@@ -2015,21 +2015,6 @@ extern "C" SEXP _arrow_dataset___Scanner__head(SEXP scanner_sexp, SEXP n_sexp){
}
#endif
-// dataset.cpp
-#if defined(ARROW_R_WITH_DATASET)
-cpp11::list dataset___Scanner__Scan(const std::shared_ptr& scanner);
-extern "C" SEXP _arrow_dataset___Scanner__Scan(SEXP scanner_sexp){
-BEGIN_CPP11
- arrow::r::Input&>::type scanner(scanner_sexp);
- return cpp11::as_sexp(dataset___Scanner__Scan(scanner));
-END_CPP11
-}
-#else
-extern "C" SEXP _arrow_dataset___Scanner__Scan(SEXP scanner_sexp){
- Rf_error("Cannot call dataset___Scanner__Scan(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. ");
-}
-#endif
-
// dataset.cpp
#if defined(ARROW_R_WITH_DATASET)
std::shared_ptr dataset___Scanner__schema(const std::shared_ptr& sc);
@@ -6747,7 +6732,6 @@ static const R_CallMethodDef CallEntries[] = {
{ "_arrow_dataset___Scanner__ToTable", (DL_FUNC) &_arrow_dataset___Scanner__ToTable, 1},
{ "_arrow_dataset___Scanner__ScanBatches", (DL_FUNC) &_arrow_dataset___Scanner__ScanBatches, 1},
{ "_arrow_dataset___Scanner__head", (DL_FUNC) &_arrow_dataset___Scanner__head, 2},
- { "_arrow_dataset___Scanner__Scan", (DL_FUNC) &_arrow_dataset___Scanner__Scan, 1},
{ "_arrow_dataset___Scanner__schema", (DL_FUNC) &_arrow_dataset___Scanner__schema, 1},
{ "_arrow_dataset___ScanTask__get_batches", (DL_FUNC) &_arrow_dataset___ScanTask__get_batches, 1},
{ "_arrow_dataset___Dataset__Write", (DL_FUNC) &_arrow_dataset___Dataset__Write, 6},
diff --git a/r/src/dataset.cpp b/r/src/dataset.cpp
index af321d75db6..7c6e44964bf 100644
--- a/r/src/dataset.cpp
+++ b/r/src/dataset.cpp
@@ -450,35 +450,6 @@ std::shared_ptr dataset___Scanner__head(
return ValueOrStop(arrow::Table::FromRecordBatches(std::move(batches)));
}
-// TODO (ARROW-11782) Remove calls to Scan()
-#if defined(__GNUC__) || defined(__clang__)
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
-#elif defined(_MSC_VER)
-#pragma warning(push)
-#pragma warning(disable : 4996)
-#endif
-
-// [[dataset::export]]
-cpp11::list dataset___Scanner__Scan(const std::shared_ptr& scanner) {
- auto it = ValueOrStop(scanner->Scan());
- std::vector> out;
- std::shared_ptr scan_task;
- // TODO(npr): can this iteration be parallelized?
- for (auto st : it) {
- scan_task = ValueOrStop(st);
- out.push_back(scan_task);
- }
-
- return arrow::r::to_r_list(out);
-}
-
-#if defined(__GNUC__) || defined(__clang__)
-#pragma GCC diagnostic pop
-#elif defined(_MSC_VER)
-#pragma warning(pop)
-#endif
-
// [[dataset::export]]
std::shared_ptr dataset___Scanner__schema(
const std::shared_ptr& sc) {
diff --git a/r/tests/testthat/test-dataset.R b/r/tests/testthat/test-dataset.R
index eb7408c982f..4570c1f5762 100644
--- a/r/tests/testthat/test-dataset.R
+++ b/r/tests/testthat/test-dataset.R
@@ -1345,14 +1345,6 @@ test_that("Dataset and query print methods", {
)
})
-test_that("Scanner$Scan is deprecated", {
- ds <- open_dataset(ipc_dir, partitioning = "part", format = "feather")
- expect_deprecated(
- ds$NewScan()$Finish()$Scan(),
- "ScanBatches"
- )
-})
-
test_that("Scanner$ScanBatches", {
ds <- open_dataset(ipc_dir, format = "feather")
batches <- ds$NewScan()$Finish()$ScanBatches()
@@ -1388,7 +1380,7 @@ test_that("Assembling a Dataset manually and getting a Table", {
fmt <- FileFormat$create("parquet")
factory <- FileSystemDatasetFactory$create(fs, selector, NULL, fmt, partitioning = partitioning)
expect_r6_class(factory, "FileSystemDatasetFactory")
-
+
schm <- factory$Inspect()
expect_r6_class(schm, "Schema")