diff --git a/r/R/arrowExports.R b/r/R/arrowExports.R index 46948097388..2980e6e5f3a 100644 --- a/r/R/arrowExports.R +++ b/r/R/arrowExports.R @@ -1364,6 +1364,14 @@ parquet___arrow___ArrowReaderProperties__set_read_dictionary <- function(propert invisible(.Call(`_arrow_parquet___arrow___ArrowReaderProperties__set_read_dictionary`, properties, column_index, read_dict)) } +parquet___arrow___ArrowReaderProperties__set_coerce_int96_timestamp_unit <- function(properties, unit) { + invisible(.Call(`_arrow_parquet___arrow___ArrowReaderProperties__set_coerce_int96_timestamp_unit`, properties, unit)) +} + +parquet___arrow___ArrowReaderProperties__get_coerce_int96_timestamp_unit <- function(properties) { + .Call(`_arrow_parquet___arrow___ArrowReaderProperties__get_coerce_int96_timestamp_unit`, properties) +} + parquet___arrow___FileReader__OpenFile <- function(file, props) { .Call(`_arrow_parquet___arrow___FileReader__OpenFile`, file, props) } diff --git a/r/R/parquet.R b/r/R/parquet.R index 11be2c051de..3a07c224ed6 100644 --- a/r/R/parquet.R +++ b/r/R/parquet.R @@ -577,6 +577,12 @@ ParquetArrowReaderProperties <- R6Class("ParquetArrowReaderProperties", }, set_read_dictionary = function(column_index, read_dict) { parquet___arrow___ArrowReaderProperties__set_read_dictionary(self, column_index, read_dict) + }, + coerce_int96_timestamp_unit = function() { + parquet___arrow___ArrowReaderProperties__get_coerce_int96_timestamp_unit(self) + }, + set_coerce_int96_timestamp_unit = function(unit) { + parquet___arrow___ArrowReaderProperties__set_coerce_int96_timestamp_unit(self, unit) } ), active = list( diff --git a/r/src/arrowExports.cpp b/r/src/arrowExports.cpp index 86b74234eca..9c7229a696a 100644 --- a/r/src/arrowExports.cpp +++ b/r/src/arrowExports.cpp @@ -5351,6 +5351,38 @@ extern "C" SEXP _arrow_parquet___arrow___ArrowReaderProperties__set_read_diction } #endif +// parquet.cpp +#if defined(ARROW_R_WITH_PARQUET) +void parquet___arrow___ArrowReaderProperties__set_coerce_int96_timestamp_unit(const std::shared_ptr& properties, arrow::TimeUnit::type unit); +extern "C" SEXP _arrow_parquet___arrow___ArrowReaderProperties__set_coerce_int96_timestamp_unit(SEXP properties_sexp, SEXP unit_sexp){ +BEGIN_CPP11 + arrow::r::Input&>::type properties(properties_sexp); + arrow::r::Input::type unit(unit_sexp); + parquet___arrow___ArrowReaderProperties__set_coerce_int96_timestamp_unit(properties, unit); + return R_NilValue; +END_CPP11 +} +#else +extern "C" SEXP _arrow_parquet___arrow___ArrowReaderProperties__set_coerce_int96_timestamp_unit(SEXP properties_sexp, SEXP unit_sexp){ + Rf_error("Cannot call parquet___arrow___ArrowReaderProperties__set_coerce_int96_timestamp_unit(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. "); +} +#endif + +// parquet.cpp +#if defined(ARROW_R_WITH_PARQUET) +arrow::TimeUnit::type parquet___arrow___ArrowReaderProperties__get_coerce_int96_timestamp_unit(const std::shared_ptr& properties); +extern "C" SEXP _arrow_parquet___arrow___ArrowReaderProperties__get_coerce_int96_timestamp_unit(SEXP properties_sexp){ +BEGIN_CPP11 + arrow::r::Input&>::type properties(properties_sexp); + return cpp11::as_sexp(parquet___arrow___ArrowReaderProperties__get_coerce_int96_timestamp_unit(properties)); +END_CPP11 +} +#else +extern "C" SEXP _arrow_parquet___arrow___ArrowReaderProperties__get_coerce_int96_timestamp_unit(SEXP properties_sexp){ + Rf_error("Cannot call parquet___arrow___ArrowReaderProperties__get_coerce_int96_timestamp_unit(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. "); +} +#endif + // parquet.cpp #if defined(ARROW_R_WITH_PARQUET) std::shared_ptr parquet___arrow___FileReader__OpenFile(const std::shared_ptr& file, const std::shared_ptr& props); @@ -7618,6 +7650,8 @@ static const R_CallMethodDef CallEntries[] = { { "_arrow_parquet___arrow___ArrowReaderProperties__get_use_threads", (DL_FUNC) &_arrow_parquet___arrow___ArrowReaderProperties__get_use_threads, 2}, { "_arrow_parquet___arrow___ArrowReaderProperties__get_read_dictionary", (DL_FUNC) &_arrow_parquet___arrow___ArrowReaderProperties__get_read_dictionary, 2}, { "_arrow_parquet___arrow___ArrowReaderProperties__set_read_dictionary", (DL_FUNC) &_arrow_parquet___arrow___ArrowReaderProperties__set_read_dictionary, 3}, + { "_arrow_parquet___arrow___ArrowReaderProperties__set_coerce_int96_timestamp_unit", (DL_FUNC) &_arrow_parquet___arrow___ArrowReaderProperties__set_coerce_int96_timestamp_unit, 2}, + { "_arrow_parquet___arrow___ArrowReaderProperties__get_coerce_int96_timestamp_unit", (DL_FUNC) &_arrow_parquet___arrow___ArrowReaderProperties__get_coerce_int96_timestamp_unit, 1}, { "_arrow_parquet___arrow___FileReader__OpenFile", (DL_FUNC) &_arrow_parquet___arrow___FileReader__OpenFile, 2}, { "_arrow_parquet___arrow___FileReader__ReadTable1", (DL_FUNC) &_arrow_parquet___arrow___FileReader__ReadTable1, 1}, { "_arrow_parquet___arrow___FileReader__ReadTable2", (DL_FUNC) &_arrow_parquet___arrow___FileReader__ReadTable2, 2}, diff --git a/r/src/parquet.cpp b/r/src/parquet.cpp index 5de7ca8fac4..5d5fd9b7f46 100644 --- a/r/src/parquet.cpp +++ b/r/src/parquet.cpp @@ -69,6 +69,20 @@ void parquet___arrow___ArrowReaderProperties__set_read_dictionary( properties->set_read_dictionary(column_index, read_dict); } +// [[parquet::export]] +void parquet___arrow___ArrowReaderProperties__set_coerce_int96_timestamp_unit( + const std::shared_ptr& properties, + arrow::TimeUnit::type unit) { + properties->set_coerce_int96_timestamp_unit(unit); +} + +// [[parquet::export]] +arrow::TimeUnit::type +parquet___arrow___ArrowReaderProperties__get_coerce_int96_timestamp_unit( + const std::shared_ptr& properties) { + return properties->coerce_int96_timestamp_unit(); +} + // [[parquet::export]] std::shared_ptr parquet___arrow___FileReader__OpenFile( const std::shared_ptr& file, diff --git a/r/tests/testthat/test-parquet.R b/r/tests/testthat/test-parquet.R index c6533de91ec..07e6ae5caa7 100644 --- a/r/tests/testthat/test-parquet.R +++ b/r/tests/testthat/test-parquet.R @@ -324,3 +324,31 @@ test_that("ParquetFileWrite chunk_size calculation doesn't have integer overflow # but our max_chunks is respected expect_equal(calculate_chunk_size(101, 1, 25, 2), 51) }) + +test_that("deprecated int96 timestamp unit can be specified when reading Parquet files", { + tf <- tempfile() + on.exit(unlink(tf)) + + table <- Table$create( + some_datetime = as.POSIXct("2001-01-01 12:34:56.789") + ) + + write_parquet( + table, + tf, + use_deprecated_int96_timestamps = TRUE + ) + + props <- ParquetArrowReaderProperties$create() + props$set_coerce_int96_timestamp_unit(TimeUnit$MILLI) + expect_identical(props$coerce_int96_timestamp_unit(), TimeUnit$MILLI) + + result <- read_parquet( + tf, + as_data_frame = FALSE, + props = props + ) + + expect_identical(result$some_datetime$type$unit(), TimeUnit$MILLI) + expect_true(result$some_datetime == table$some_datetime) +})