From 7c8cbc2a2054d7ff202e6460c63c52599175e521 Mon Sep 17 00:00:00 2001 From: Will Jones Date: Mon, 28 Feb 2022 14:10:02 -0800 Subject: [PATCH 01/35] First pass at R bindings --- cpp/src/arrow/config.cc | 19 +++++++++++++++++++ cpp/src/arrow/config.h | 10 ++++++++++ r/R/arrow-package.R | 8 ++++++++ r/R/arrowExports.R | 4 ++++ r/src/arrowExports.cpp | 17 +++++++++++++++++ r/src/config.cpp | 7 +++++++ 6 files changed, 65 insertions(+) diff --git a/cpp/src/arrow/config.cc b/cpp/src/arrow/config.cc index 93df10b097c..a501e4519de 100644 --- a/cpp/src/arrow/config.cc +++ b/cpp/src/arrow/config.cc @@ -21,6 +21,7 @@ #include "arrow/util/config.h" #include "arrow/util/cpu_info.h" +#include "arrow/vendored/datetime.h" namespace arrow { @@ -76,4 +77,22 @@ RuntimeInfo GetRuntimeInfo() { return info; } +Status Initialize(const ArrowGlobalOptions& options) { + if (options.tz_db_path != nullptr) { + #if !USE_OS_TZDB + try { + arrow_vendored::date::set_install(*options.tz_db_path); + arrow_vendored::date::reload_tzdb(); + } catch(const std::runtime_error& e) { + return Status::ExecutionError(e.msg); + } catch(const std::system_error& e) { + return Status::ExecutionError(e.msg); + } + #else + return Status::Invalid("Arrow was set to use OS timezone database at compile time, so it cannot be set at runtime."); + #endif // !USE_OS_TZDB + } + return Status::OK(); +} + } // namespace arrow diff --git a/cpp/src/arrow/config.h b/cpp/src/arrow/config.h index a485b91a4a5..e282f9ec7db 100644 --- a/cpp/src/arrow/config.h +++ b/cpp/src/arrow/config.h @@ -20,6 +20,7 @@ #include #include "arrow/util/config.h" // IWYU pragma: export +#include "arrow/status.h" #include "arrow/util/visibility.h" namespace arrow { @@ -77,4 +78,13 @@ const BuildInfo& GetBuildInfo(); ARROW_EXPORT RuntimeInfo GetRuntimeInfo(); +struct ArrowGlobalOptions { + /// Path to text timezone database. This is only configurable on Windows, + /// which does not have a compatible OS timezone database. + std::string* tz_db_path; +}; + +ARROW_EXPORT +Status Initialize(const ArrowGlobalOptions& options); + } // namespace arrow diff --git a/r/R/arrow-package.R b/r/R/arrow-package.R index 9424fc9228d..1c470ca4404 100644 --- a/r/R/arrow-package.R +++ b/r/R/arrow-package.R @@ -65,6 +65,14 @@ # Disable multithreading on Windows # See https://issues.apache.org/jira/browse/ARROW-8379 options(arrow.use_threads = FALSE) + + # Try to set timezone database + if ("tzdb" %in% rownames(installed.packages())) { + tzdb::tzdb_initialize() + set_timezone_database(tzdb::tzdb_path()) + } else { + warning("tzdb not installed. Timezones will not be available.") + } } invisible() diff --git a/r/R/arrowExports.R b/r/R/arrowExports.R index 6774ef39d4f..55886b2ea03 100644 --- a/r/R/arrowExports.R +++ b/r/R/arrowExports.R @@ -468,6 +468,10 @@ runtime_info <- function() { .Call(`_arrow_runtime_info`) } +set_timezone_database <- function(path) { + invisible(.Call(`_arrow_set_timezone_database`, path)) +} + csv___WriteOptions__initialize <- function(options) { .Call(`_arrow_csv___WriteOptions__initialize`, options) } diff --git a/r/src/arrowExports.cpp b/r/src/arrowExports.cpp index 5774a793813..d9d577c6966 100644 --- a/r/src/arrowExports.cpp +++ b/r/src/arrowExports.cpp @@ -1840,6 +1840,22 @@ extern "C" SEXP _arrow_runtime_info(){ } #endif +// config.cpp +#if defined(ARROW_R_WITH_ARROW) +void set_timezone_database(std::string& path); +extern "C" SEXP _arrow_set_timezone_database(SEXP path_sexp){ +BEGIN_CPP11 + arrow::r::Input::type path(path_sexp); + set_timezone_database(path); + return R_NilValue; +END_CPP11 +} +#else +extern "C" SEXP _arrow_set_timezone_database(SEXP path_sexp){ + Rf_error("Cannot call set_timezone_database(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. "); +} +#endif + // csv.cpp #if defined(ARROW_R_WITH_ARROW) std::shared_ptr csv___WriteOptions__initialize(cpp11::list options); @@ -7741,6 +7757,7 @@ static const R_CallMethodDef CallEntries[] = { { "_arrow_compute__GetFunctionNames", (DL_FUNC) &_arrow_compute__GetFunctionNames, 0}, { "_arrow_build_info", (DL_FUNC) &_arrow_build_info, 0}, { "_arrow_runtime_info", (DL_FUNC) &_arrow_runtime_info, 0}, + { "_arrow_set_timezone_database", (DL_FUNC) &_arrow_set_timezone_database, 1}, { "_arrow_csv___WriteOptions__initialize", (DL_FUNC) &_arrow_csv___WriteOptions__initialize, 1}, { "_arrow_csv___ReadOptions__initialize", (DL_FUNC) &_arrow_csv___ReadOptions__initialize, 1}, { "_arrow_csv___ParseOptions__initialize", (DL_FUNC) &_arrow_csv___ParseOptions__initialize, 1}, diff --git a/r/src/config.cpp b/r/src/config.cpp index 497843573bb..16f8068da05 100644 --- a/r/src/config.cpp +++ b/r/src/config.cpp @@ -34,4 +34,11 @@ std::vector runtime_info() { return {info.simd_level, info.detected_simd_level}; } +// [[arrow::export]] +void set_timezone_database(std::string& path) { + ArrowGlobalOptions options; + options.tz_db_path = &path; + StopIfNotOk(arrow::Initialize(options)) +} + #endif From d92b1ab84ab4b4029ff2f4f74aab774138e020f0 Mon Sep 17 00:00:00 2001 From: Will Jones Date: Tue, 1 Mar 2022 08:42:34 -0800 Subject: [PATCH 02/35] Enable R timezone unit tests --- cpp/src/arrow/config.cc | 6 ++--- cpp/src/arrow/config.h | 2 +- r/R/arrow-package.R | 2 +- r/src/arrowExports.cpp | 14 +++++------ r/src/config.cpp | 13 ++++++---- r/tests/testthat/test-dplyr-funcs-datetime.R | 25 +------------------- 6 files changed, 21 insertions(+), 41 deletions(-) diff --git a/cpp/src/arrow/config.cc b/cpp/src/arrow/config.cc index a501e4519de..ed900ef9da8 100644 --- a/cpp/src/arrow/config.cc +++ b/cpp/src/arrow/config.cc @@ -77,16 +77,14 @@ RuntimeInfo GetRuntimeInfo() { return info; } -Status Initialize(const ArrowGlobalOptions& options) { +Status Initialize(const ArrowGlobalOptions& options) noexcept { if (options.tz_db_path != nullptr) { #if !USE_OS_TZDB try { arrow_vendored::date::set_install(*options.tz_db_path); arrow_vendored::date::reload_tzdb(); } catch(const std::runtime_error& e) { - return Status::ExecutionError(e.msg); - } catch(const std::system_error& e) { - return Status::ExecutionError(e.msg); + return Status::ExecutionError(e.what()); } #else return Status::Invalid("Arrow was set to use OS timezone database at compile time, so it cannot be set at runtime."); diff --git a/cpp/src/arrow/config.h b/cpp/src/arrow/config.h index e282f9ec7db..f7f9470ae5b 100644 --- a/cpp/src/arrow/config.h +++ b/cpp/src/arrow/config.h @@ -85,6 +85,6 @@ struct ArrowGlobalOptions { }; ARROW_EXPORT -Status Initialize(const ArrowGlobalOptions& options); +Status Initialize(const ArrowGlobalOptions& options) noexcept; } // namespace arrow diff --git a/r/R/arrow-package.R b/r/R/arrow-package.R index 1c470ca4404..b9a57d6ce94 100644 --- a/r/R/arrow-package.R +++ b/r/R/arrow-package.R @@ -69,7 +69,7 @@ # Try to set timezone database if ("tzdb" %in% rownames(installed.packages())) { tzdb::tzdb_initialize() - set_timezone_database(tzdb::tzdb_path()) + set_timezone_database(tzdb::tzdb_path("text")) } else { warning("tzdb not installed. Timezones will not be available.") } diff --git a/r/src/arrowExports.cpp b/r/src/arrowExports.cpp index d9d577c6966..c875206dada 100644 --- a/r/src/arrowExports.cpp +++ b/r/src/arrowExports.cpp @@ -1842,10 +1842,10 @@ extern "C" SEXP _arrow_runtime_info(){ // config.cpp #if defined(ARROW_R_WITH_ARROW) -void set_timezone_database(std::string& path); +void set_timezone_database(cpp11::strings path); extern "C" SEXP _arrow_set_timezone_database(SEXP path_sexp){ BEGIN_CPP11 - arrow::r::Input::type path(path_sexp); + arrow::r::Input::type path(path_sexp); set_timezone_database(path); return R_NilValue; END_CPP11 @@ -7635,11 +7635,11 @@ return Rf_ScalarLogical( ); } static const R_CallMethodDef CallEntries[] = { -{ "_arrow_available", (DL_FUNC)& _arrow_available, 0 }, -{ "_dataset_available", (DL_FUNC)& _dataset_available, 0 }, -{ "_parquet_available", (DL_FUNC)& _parquet_available, 0 }, -{ "_s3_available", (DL_FUNC)& _s3_available, 0 }, -{ "_json_available", (DL_FUNC)& _json_available, 0 }, + { "_arrow_available", (DL_FUNC)& _arrow_available, 0 }, + { "_dataset_available", (DL_FUNC)& _dataset_available, 0 }, + { "_parquet_available", (DL_FUNC)& _parquet_available, 0 }, + { "_s3_available", (DL_FUNC)& _s3_available, 0 }, + { "_json_available", (DL_FUNC)& _json_available, 0 }, { "_arrow_test_SET_STRING_ELT", (DL_FUNC) &_arrow_test_SET_STRING_ELT, 1}, { "_arrow_is_arrow_altrep", (DL_FUNC) &_arrow_is_arrow_altrep, 1}, { "_arrow_Array__Slice1", (DL_FUNC) &_arrow_Array__Slice1, 2}, diff --git a/r/src/config.cpp b/r/src/config.cpp index 16f8068da05..e05ba63202c 100644 --- a/r/src/config.cpp +++ b/r/src/config.cpp @@ -35,10 +35,15 @@ std::vector runtime_info() { } // [[arrow::export]] -void set_timezone_database(std::string& path) { - ArrowGlobalOptions options; - options.tz_db_path = &path; - StopIfNotOk(arrow::Initialize(options)) +void set_timezone_database(cpp11::strings path) { + auto paths = cpp11::as_cpp>(path); + if (path.size() != 1) { + cpp11::stop("Must provide a single path to the timezone database."); + } + + arrow::ArrowGlobalOptions options; + options.tz_db_path = &paths[0]; + arrow::StopIfNotOk(arrow::Initialize(options)); } #endif diff --git a/r/tests/testthat/test-dplyr-funcs-datetime.R b/r/tests/testthat/test-dplyr-funcs-datetime.R index d0afda8912d..c5bec17df7f 100644 --- a/r/tests/testthat/test-dplyr-funcs-datetime.R +++ b/r/tests/testthat/test-dplyr-funcs-datetime.R @@ -26,12 +26,7 @@ library(dplyr, warn.conflicts = FALSE) # TODO: consider reevaluating this workaround after ARROW-12980 withr::local_timezone("UTC") -# TODO: We should test on windows once ARROW-13168 is resolved. -if (tolower(Sys.info()[["sysname"]]) == "windows") { - test_date <- as.POSIXct("2017-01-01 00:00:11.3456789", tz = "") -} else { - test_date <- as.POSIXct("2017-01-01 00:00:11.3456789", tz = "Pacific/Marquesas") -} +test_date <- as.POSIXct("2017-01-01 00:00:11.3456789", tz = "Pacific/Marquesas") test_df <- tibble::tibble( @@ -120,8 +115,6 @@ test_that("errors in strptime", { }) test_that("strftime", { - skip_on_os("windows") # https://issues.apache.org/jira/browse/ARROW-13168 - times <- tibble( datetime = c(lubridate::ymd_hms("2018-10-07 19:04:05", tz = "Etc/GMT+6"), NA), date = c(as.Date("2021-01-01"), NA) @@ -209,8 +202,6 @@ test_that("strftime", { test_that("format_ISO8601", { # https://issues.apache.org/jira/projects/ARROW/issues/ARROW-15266 skip_if_not_available("re2") - # https://issues.apache.org/jira/browse/ARROW-13168 - skip_on_os("windows") times <- tibble(x = c(lubridate::ymd_hms("2018-10-07 19:04:05", tz = "Etc/GMT+6"), NA)) compare_dplyr_binding( @@ -356,8 +347,6 @@ test_that("extract month from timestamp", { test_df ) - skip_on_os("windows") # https://issues.apache.org/jira/browse/ARROW-13168 - compare_dplyr_binding( .input %>% # R returns ordered factor whereas Arrow returns character @@ -434,8 +423,6 @@ test_that("extract wday from timestamp", { test_df ) - skip_on_os("windows") # https://issues.apache.org/jira/browse/ARROW-13168 - compare_dplyr_binding( .input %>% mutate(x = wday(date, label = TRUE)) %>% @@ -582,8 +569,6 @@ test_that("extract month from date", { test_df ) - skip_on_os("windows") # https://issues.apache.org/jira/browse/ARROW-13168 - compare_dplyr_binding( .input %>% # R returns ordered factor whereas Arrow returns character @@ -634,8 +619,6 @@ test_that("extract wday from date", { test_df ) - skip_on_os("windows") # https://issues.apache.org/jira/browse/ARROW-13168 - compare_dplyr_binding( .input %>% mutate(x = wday(date, label = TRUE, abbr = TRUE)) %>% @@ -704,10 +687,6 @@ test_that("leap_year mirror lubridate", { }) test_that("am/pm mirror lubridate", { - - # https://issues.apache.org/jira/browse/ARROW-13168 - skip_on_os("windows") - compare_dplyr_binding( .input %>% mutate( @@ -805,8 +784,6 @@ test_that("dst extracts daylight savings time correctly", { test_df <- tibble( dates = as.POSIXct(c("2021-02-20", "2021-07-31", "2021-10-31", "2021-01-31"), tz = "Europe/London") ) - # https://issues.apache.org/jira/browse/ARROW-13168 - skip_on_os("windows") compare_dplyr_binding( .input %>% From bf823033027c34d0c0f657d06d12c7de1f2da739 Mon Sep 17 00:00:00 2001 From: Will Jones Date: Wed, 2 Mar 2022 12:43:43 -0800 Subject: [PATCH 03/35] Get timezone tests passing in R; locale still a problem --- .../compute/kernels/scalar_cast_string.cc | 7 ------ .../compute/kernels/scalar_temporal_unary.cc | 13 ----------- cpp/src/arrow/config.cc | 23 +++++++++++++------ cpp/src/arrow/config.h | 13 +++++++++-- r/src/config.cpp | 5 ++-- r/tests/testthat/test-dplyr-funcs-datetime.R | 16 ++++++------- 6 files changed, 37 insertions(+), 40 deletions(-) diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_string.cc b/cpp/src/arrow/compute/kernels/scalar_cast_string.cc index efc41622004..8ac2123d69e 100644 --- a/cpp/src/arrow/compute/kernels/scalar_cast_string.cc +++ b/cpp/src/arrow/compute/kernels/scalar_cast_string.cc @@ -150,12 +150,6 @@ struct TemporalToStringCastFunctor { return Status::OK(); })); } else { -#ifdef _WIN32 - // TODO(ARROW-13168): - return Status::NotImplemented( - "Casting a timestamp with time zone to string is not yet supported on " - "Windows."); -#else switch (ty.unit()) { case TimeUnit::SECOND: RETURN_NOT_OK(ConvertZoned(input, timezone, &builder)); @@ -176,7 +170,6 @@ struct TemporalToStringCastFunctor { DCHECK(false); return Status::NotImplemented("Unimplemented time unit"); } -#endif } std::shared_ptr output_array; RETURN_NOT_OK(builder.Finish(&output_array)); diff --git a/cpp/src/arrow/compute/kernels/scalar_temporal_unary.cc b/cpp/src/arrow/compute/kernels/scalar_temporal_unary.cc index ed08c367664..51b0552429f 100644 --- a/cpp/src/arrow/compute/kernels/scalar_temporal_unary.cc +++ b/cpp/src/arrow/compute/kernels/scalar_temporal_unary.cc @@ -1046,7 +1046,6 @@ struct RoundTemporal { // ---------------------------------------------------------------------- // Convert timestamps to a string representation with an arbitrary format -#ifndef _WIN32 Result GetLocale(const std::string& locale) { try { return std::locale(locale.c_str()); @@ -1130,18 +1129,6 @@ struct Strftime { return Status::OK(); } }; -#else -// TODO(ARROW-13168) -template -struct Strftime { - static Status Call(KernelContext* ctx, const Scalar& in, Scalar* out) { - return Status::NotImplemented("Strftime not yet implemented on windows."); - } - static Status Call(KernelContext* ctx, const ArrayData& in, ArrayData* out) { - return Status::NotImplemented("Strftime not yet implemented on windows."); - } -}; -#endif // ---------------------------------------------------------------------- // Convert timestamps from local timestamp without a timezone to timestamps with a diff --git a/cpp/src/arrow/config.cc b/cpp/src/arrow/config.cc index ed900ef9da8..21ebe7115e2 100644 --- a/cpp/src/arrow/config.cc +++ b/cpp/src/arrow/config.cc @@ -63,6 +63,8 @@ std::string MakeSimdLevelString(QueryFlagFunction&& query_flag) { } } +util::optional timezone_db_path; + }; // namespace const BuildInfo& GetBuildInfo() { return kBuildInfo; } @@ -74,21 +76,28 @@ RuntimeInfo GetRuntimeInfo() { MakeSimdLevelString([&](int64_t flags) { return cpu_info->IsSupported(flags); }); info.detected_simd_level = MakeSimdLevelString([&](int64_t flags) { return cpu_info->IsDetected(flags); }); + info.using_os_timezone_db = USE_OS_TZDB; +#if !USE_OS_TZDB + info.timezone_db_path = timezone_db_path; +#endif return info; } Status Initialize(const ArrowGlobalOptions& options) noexcept { - if (options.tz_db_path != nullptr) { - #if !USE_OS_TZDB + if (options.tz_db_path.has_value()) { +#if !USE_OS_TZDB try { - arrow_vendored::date::set_install(*options.tz_db_path); + arrow_vendored::date::set_install(options.tz_db_path.value()); arrow_vendored::date::reload_tzdb(); - } catch(const std::runtime_error& e) { + } catch (const std::runtime_error& e) { return Status::ExecutionError(e.what()); } - #else - return Status::Invalid("Arrow was set to use OS timezone database at compile time, so it cannot be set at runtime."); - #endif // !USE_OS_TZDB + timezone_db_path = options.tz_db_path.value(); +#else + return Status::Invalid( + "Arrow was set to use OS timezone database at compile time, so it cannot be set " + "at runtime."); +#endif // !USE_OS_TZDB } return Status::OK(); } diff --git a/cpp/src/arrow/config.h b/cpp/src/arrow/config.h index f7f9470ae5b..cb8b5f55039 100644 --- a/cpp/src/arrow/config.h +++ b/cpp/src/arrow/config.h @@ -19,8 +19,9 @@ #include -#include "arrow/util/config.h" // IWYU pragma: export #include "arrow/status.h" +#include "arrow/util/config.h" // IWYU pragma: export +#include "arrow/util/optional.h" #include "arrow/util/visibility.h" namespace arrow { @@ -63,6 +64,12 @@ struct RuntimeInfo { /// The SIMD level available on the OS and CPU std::string detected_simd_level; + + /// Whether using the OS-based timezone database + bool using_os_timezone_db; + + /// The path to the timezone database; is None + util::optional timezone_db_path; }; /// \brief Get runtime build info. @@ -81,9 +88,11 @@ RuntimeInfo GetRuntimeInfo(); struct ArrowGlobalOptions { /// Path to text timezone database. This is only configurable on Windows, /// which does not have a compatible OS timezone database. - std::string* tz_db_path; + util::optional tz_db_path; }; +// TODO: We need to tell users to run this before using timezones on Windows +// TODO: We need to run this before C++ unit tests on Windows ARROW_EXPORT Status Initialize(const ArrowGlobalOptions& options) noexcept; diff --git a/r/src/config.cpp b/r/src/config.cpp index e05ba63202c..2e8cc602c44 100644 --- a/r/src/config.cpp +++ b/r/src/config.cpp @@ -20,6 +20,7 @@ #if defined(ARROW_R_WITH_ARROW) #include +#include // [[arrow::export]] std::vector build_info() { @@ -40,9 +41,9 @@ void set_timezone_database(cpp11::strings path) { if (path.size() != 1) { cpp11::stop("Must provide a single path to the timezone database."); } - + arrow::ArrowGlobalOptions options; - options.tz_db_path = &paths[0]; + options.tz_db_path = arrow::util::make_optional(paths[0]); arrow::StopIfNotOk(arrow::Initialize(options)); } diff --git a/r/tests/testthat/test-dplyr-funcs-datetime.R b/r/tests/testthat/test-dplyr-funcs-datetime.R index c5bec17df7f..31736f97494 100644 --- a/r/tests/testthat/test-dplyr-funcs-datetime.R +++ b/r/tests/testthat/test-dplyr-funcs-datetime.R @@ -115,6 +115,8 @@ test_that("errors in strptime", { }) test_that("strftime", { + skip_on_os("windows") # TODO: Fix Windows locales + # Error on Windows: Cannot find locale 'English_United States.1252' times <- tibble( datetime = c(lubridate::ymd_hms("2018-10-07 19:04:05", tz = "Etc/GMT+6"), NA), date = c(as.Date("2021-01-01"), NA) @@ -340,6 +342,7 @@ test_that("extract quarter from timestamp", { }) test_that("extract month from timestamp", { + skip_on_os("windows") # TODO: Fix Windows locales compare_dplyr_binding( .input %>% mutate(x = month(datetime)) %>% @@ -402,6 +405,7 @@ test_that("extract day from timestamp", { }) test_that("extract wday from timestamp", { + skip_on_os("windows") # TODO: Fix Windows locales compare_dplyr_binding( .input %>% mutate(x = wday(datetime)) %>% @@ -525,15 +529,6 @@ test_that("extract quarter from date", { ) }) -test_that("extract month from date", { - compare_dplyr_binding( - .input %>% - mutate(x = month(date)) %>% - collect(), - test_df - ) -}) - test_that("extract isoweek from date", { compare_dplyr_binding( .input %>% @@ -561,7 +556,9 @@ test_that("extract week from date", { ) }) +# Duplicate? test_that("extract month from date", { + skip_on_os("windows") # TODO: Fix Windows locales compare_dplyr_binding( .input %>% mutate(x = month(date)) %>% @@ -598,6 +595,7 @@ test_that("extract day from date", { }) test_that("extract wday from date", { + skip_on_os("windows") # TODO: Fix Windows locales compare_dplyr_binding( .input %>% mutate(x = wday(date)) %>% From be6b344fadac0e3f7276c5d7061517faf2892c02 Mon Sep 17 00:00:00 2001 From: Will Jones Date: Wed, 2 Mar 2022 15:36:38 -0800 Subject: [PATCH 04/35] Force Windows time locale to be C --- r/R/dplyr-funcs-datetime.R | 15 ++++++++--- r/tests/testthat/test-dplyr-funcs-datetime.R | 27 ++++++++++---------- 2 files changed, 26 insertions(+), 16 deletions(-) diff --git a/r/R/dplyr-funcs-datetime.R b/r/R/dplyr-funcs-datetime.R index 8f5a7689c07..71d4e5ab69a 100644 --- a/r/R/dplyr-funcs-datetime.R +++ b/r/R/dplyr-funcs-datetime.R @@ -15,6 +15,15 @@ # specific language governing permissions and limitations # under the License. +check_locale <- function(locale = Sys.getlocale("LC_TIME")) { + if (tolower(Sys.info()[["sysname"]]) == "windows" & locale != "C") { + # MingW C++ std::locale only supports "C" and "POSIX" + stop(paste0("On Windows, locales other than 'C' are not supported in Arrow. ", + "Consider setting `Sys.setlocale('LC_TIME', 'C')`")) + } + locale +} + register_bindings_datetime <- function() { register_binding("strptime", function(x, format = "%Y-%m-%d %H:%M:%S", tz = NULL, unit = "ms") { @@ -48,7 +57,7 @@ register_bindings_datetime <- function() { } else { ts <- x } - Expression$create("strftime", ts, options = list(format = format, locale = Sys.getlocale("LC_TIME"))) + Expression$create("strftime", ts, options = list(format = format, locale = check_locale())) }) register_binding("format_ISO8601", function(x, usetz = FALSE, precision = NULL, ...) { @@ -95,7 +104,7 @@ register_bindings_datetime <- function() { } else { format <- "%A" } - return(Expression$create("strftime", x, options = list(format = format, locale = locale))) + return(Expression$create("strftime", x, options = list(format = format, locale = check_locale(locale)))) } Expression$create("day_of_week", x, options = list(count_from_zero = FALSE, week_start = week_start)) @@ -133,7 +142,7 @@ register_bindings_datetime <- function() { } else { format <- "%B" } - return(build_expr("strftime", x, options = list(format = format, locale = locale))) + return(build_expr("strftime", x, options = list(format = format, locale = check_locale(locale)))) } build_expr("month", x) diff --git a/r/tests/testthat/test-dplyr-funcs-datetime.R b/r/tests/testthat/test-dplyr-funcs-datetime.R index 31736f97494..bcb8bd1ca20 100644 --- a/r/tests/testthat/test-dplyr-funcs-datetime.R +++ b/r/tests/testthat/test-dplyr-funcs-datetime.R @@ -26,6 +26,10 @@ library(dplyr, warn.conflicts = FALSE) # TODO: consider reevaluating this workaround after ARROW-12980 withr::local_timezone("UTC") +if (tolower(Sys.info()[["sysname"]]) == "windows") { + withr::local_locale(LC_TIME = "C") +} + test_date <- as.POSIXct("2017-01-01 00:00:11.3456789", tz = "Pacific/Marquesas") @@ -115,8 +119,6 @@ test_that("errors in strptime", { }) test_that("strftime", { - skip_on_os("windows") # TODO: Fix Windows locales - # Error on Windows: Cannot find locale 'English_United States.1252' times <- tibble( datetime = c(lubridate::ymd_hms("2018-10-07 19:04:05", tz = "Etc/GMT+6"), NA), date = c(as.Date("2021-01-01"), NA) @@ -179,13 +181,16 @@ test_that("strftime", { # This check is due to differences in the way %c currently works in Arrow and R's strftime. # We can revisit after https://github.com/HowardHinnant/date/issues/704 is resolved. - expect_error( - times %>% - Table$create() %>% - mutate(x = strftime(datetime, format = "%c")) %>% - collect(), - "%c flag is not supported in non-C locales." - ) + # Skip on Windows because it must use C locale. + if (tolower(Sys.info()[["sysname"]]) != "windows") { + expect_error( + times %>% + Table$create() %>% + mutate(x = strftime(datetime, format = "%c")) %>% + collect(), + "%c flag is not supported in non-C locales." + ) + } # Output precision of %S depends on the input timestamp precision. # Timestamps with second precision are represented as integers while @@ -342,7 +347,6 @@ test_that("extract quarter from timestamp", { }) test_that("extract month from timestamp", { - skip_on_os("windows") # TODO: Fix Windows locales compare_dplyr_binding( .input %>% mutate(x = month(datetime)) %>% @@ -405,7 +409,6 @@ test_that("extract day from timestamp", { }) test_that("extract wday from timestamp", { - skip_on_os("windows") # TODO: Fix Windows locales compare_dplyr_binding( .input %>% mutate(x = wday(datetime)) %>% @@ -558,7 +561,6 @@ test_that("extract week from date", { # Duplicate? test_that("extract month from date", { - skip_on_os("windows") # TODO: Fix Windows locales compare_dplyr_binding( .input %>% mutate(x = month(date)) %>% @@ -595,7 +597,6 @@ test_that("extract day from date", { }) test_that("extract wday from date", { - skip_on_os("windows") # TODO: Fix Windows locales compare_dplyr_binding( .input %>% mutate(x = wday(date)) %>% From a1be1971c50027d162c429db41f4399496871f06 Mon Sep 17 00:00:00 2001 From: Will Jones Date: Wed, 2 Mar 2022 16:04:02 -0800 Subject: [PATCH 05/35] Try to fix onLoad warning --- r/R/arrow-package.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/r/R/arrow-package.R b/r/R/arrow-package.R index b9a57d6ce94..084b1e5c2db 100644 --- a/r/R/arrow-package.R +++ b/r/R/arrow-package.R @@ -67,7 +67,7 @@ options(arrow.use_threads = FALSE) # Try to set timezone database - if ("tzdb" %in% rownames(installed.packages())) { + if (length(find.package("tzdb", quiet=TRUE))) { tzdb::tzdb_initialize() set_timezone_database(tzdb::tzdb_path("text")) } else { From 89ce1bfa05b22749315812baa93bb00ffda8d743 Mon Sep 17 00:00:00 2001 From: Will Jones Date: Wed, 2 Mar 2022 16:16:12 -0800 Subject: [PATCH 06/35] Try requireNamespace --- r/R/arrow-package.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/r/R/arrow-package.R b/r/R/arrow-package.R index 084b1e5c2db..f392c26a389 100644 --- a/r/R/arrow-package.R +++ b/r/R/arrow-package.R @@ -67,7 +67,7 @@ options(arrow.use_threads = FALSE) # Try to set timezone database - if (length(find.package("tzdb", quiet=TRUE))) { + if (requireNamespace("tzdb", quietly = TRUE)) { tzdb::tzdb_initialize() set_timezone_database(tzdb::tzdb_path("text")) } else { From 191885ce133fff8ce387c42e3715aed80a228f9b Mon Sep 17 00:00:00 2001 From: Will Jones Date: Tue, 8 Mar 2022 12:50:24 -0800 Subject: [PATCH 07/35] Add tzdb download to CI --- .github/workflows/cpp.yml | 6 ++++ ci/scripts/download_tz_database.sh | 30 +++++++++++++++++++ .../arrow/compute/kernels/scalar_cast_test.cc | 26 ---------------- .../compute/kernels/scalar_temporal_test.cc | 6 ---- 4 files changed, 36 insertions(+), 32 deletions(-) create mode 100644 ci/scripts/download_tz_database.sh diff --git a/.github/workflows/cpp.yml b/.github/workflows/cpp.yml index fdc29fff5e4..7401fc489c9 100644 --- a/.github/workflows/cpp.yml +++ b/.github/workflows/cpp.yml @@ -239,6 +239,9 @@ jobs: with: fetch-depth: 0 submodules: recursive + - name: Download Timezone Database + shell: bash + run: ci/scripts/download_tz_database.sh - name: Build shell: bash run: ci/scripts/cpp_build.sh $(pwd) $(pwd)/build @@ -319,6 +322,9 @@ jobs: run: | export CMAKE_BUILD_PARALLEL_LEVEL=$NUMBER_OF_PROCESSORS ci/scripts/cpp_build.sh "$(pwd)" "$(pwd)/build" + - name: Download Timezone Database + shell: bash + run: ci/scripts/download_tz_database.sh - name: Download MinIO shell: msys2 {0} run: | diff --git a/ci/scripts/download_tz_database.sh b/ci/scripts/download_tz_database.sh new file mode 100644 index 00000000000..c5f6f61d35c --- /dev/null +++ b/ci/scripts/download_tz_database.sh @@ -0,0 +1,30 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -ex + +# Download database +curl https://data.iana.org/time-zones/releases/tzdata2021e.tar.gz --output ~/Downloads/tzdata2021e.tar.gz + +# Extract +mkdir -p ~/Downloads/tzdata +tar --extract --file ~/Downloads/tzdata2021e.tar.gz --directory ~/Downloads/tzdata + +# Download windows +curl https://raw.githubusercontent.com/unicode-org/cldr/master/common/supplemental/windowsZones.xml --output ~/Downloads/tzdata/windowsZones.xml diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_test.cc b/cpp/src/arrow/compute/kernels/scalar_cast_test.cc index f13b05ccd07..e11ddd96fbd 100644 --- a/cpp/src/arrow/compute/kernels/scalar_cast_test.cc +++ b/cpp/src/arrow/compute/kernels/scalar_cast_test.cc @@ -1182,11 +1182,6 @@ TEST(Cast, TimestampToDate) { } TEST(Cast, ZonedTimestampToDate) { -#ifdef _WIN32 - // TODO(ARROW-13168): we lack tzdb on Windows - GTEST_SKIP() << "ARROW-13168: no access to timezone database on Windows"; -#endif - { // See TestZoned in scalar_temporal_test.cc auto timestamps = @@ -1378,11 +1373,6 @@ TEST(Cast, TimestampToTime) { } TEST(Cast, ZonedTimestampToTime) { -#ifdef _WIN32 - // TODO(ARROW-13168): we lack tzdb on Windows - GTEST_SKIP() << "ARROW-13168: no access to timezone database on Windows"; -#endif - CheckCast(ArrayFromJSON(timestamp(TimeUnit::NANO, "Pacific/Marquesas"), kTimestampJson), ArrayFromJSON(time64(TimeUnit::NANO), R"([ 52259123456789, 50003999999999, 56480001001001, 65000000000000, @@ -1573,7 +1563,6 @@ TEST(Cast, TimestampToString) { } } -#ifndef _WIN32 TEST(Cast, TimestampWithZoneToString) { for (auto string_type : {utf8(), large_utf8()}) { CheckCast( @@ -1608,21 +1597,6 @@ TEST(Cast, TimestampWithZoneToString) { R"(["1968-11-30 13:30:44.123456789-0700", "2016-02-29 10:42:23.456789246-0700"])")); } } -#else -// TODO(ARROW-13168): we lack tzdb on Windows -TEST(Cast, TimestampWithZoneToString) { - for (auto string_type : {utf8(), large_utf8()}) { - ASSERT_RAISES(NotImplemented, Cast(ArrayFromJSON(timestamp(TimeUnit::SECOND, "UTC"), - "[-34226955, 1456767743]"), - CastOptions::Safe(string_type))); - - ASSERT_RAISES(NotImplemented, - Cast(ArrayFromJSON(timestamp(TimeUnit::SECOND, "America/Phoenix"), - "[-34226955, 1456767743]"), - CastOptions::Safe(string_type))); - } -} -#endif TEST(Cast, DateToDate) { auto day_32 = ArrayFromJSON(date32(), "[0, null, 100, 1, 10]"); diff --git a/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc b/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc index 38b810902ba..708cac23c0d 100644 --- a/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc +++ b/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc @@ -564,8 +564,6 @@ TEST_F(ScalarTemporalTest, TestOutsideNanosecondRange) { CheckScalarUnary("subsecond", unit, times, float64(), subsecond); } -#ifndef _WIN32 -// TODO: We should test on windows once ARROW-13168 is resolved. TEST_F(ScalarTemporalTest, TestIsLeapYear) { auto is_leap_year_marquesas = "[false, true, false, false, false, false, false, false, false, false, false, " @@ -792,7 +790,6 @@ TEST_F(ScalarTemporalTest, TestNonexistentTimezone) { ASSERT_RAISES(Invalid, Subsecond(timestamp_array)); } } -#endif TEST_F(ScalarTemporalTest, Week) { auto unit = timestamp(TimeUnit::NANO); @@ -1607,8 +1604,6 @@ TEST_F(ScalarTemporalTest, TestTemporalDifferenceErrors) { CallFunction("weeks_between", {arr1, arr1}, &options)); } -// TODO: We should test on windows once ARROW-13168 is resolved. -#ifndef _WIN32 TEST_F(ScalarTemporalTest, TestAssumeTimezone) { std::string timezone_utc = "UTC"; std::string timezone_kolkata = "Asia/Kolkata"; @@ -2579,7 +2574,6 @@ TEST_F(ScalarTemporalTest, TestCeilFloorRoundTemporalKolkata) { CheckScalarUnary("round_temporal", unit, times, unit, round_1_hours, &round_to_1_hours); CheckScalarUnary("round_temporal", unit, times, unit, round_2_hours, &round_to_2_hours); } -#endif // !_WIN32 } // namespace compute } // namespace arrow From b58d102177183e5785380870e7fc831a1c0df158 Mon Sep 17 00:00:00 2001 From: Will Jones Date: Mon, 28 Feb 2022 14:10:02 -0800 Subject: [PATCH 08/35] First pass at R bindings --- cpp/src/arrow/config.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/cpp/src/arrow/config.h b/cpp/src/arrow/config.h index cb8b5f55039..61ff49ceabd 100644 --- a/cpp/src/arrow/config.h +++ b/cpp/src/arrow/config.h @@ -88,6 +88,7 @@ RuntimeInfo GetRuntimeInfo(); struct ArrowGlobalOptions { /// Path to text timezone database. This is only configurable on Windows, /// which does not have a compatible OS timezone database. +<<<<<<< HEAD util::optional tz_db_path; }; @@ -95,5 +96,11 @@ struct ArrowGlobalOptions { // TODO: We need to run this before C++ unit tests on Windows ARROW_EXPORT Status Initialize(const ArrowGlobalOptions& options) noexcept; +== == == = std::string * tz_db_path; +}; + +ARROW_EXPORT +Status Initialize(const ArrowGlobalOptions& options); +>>>>>>> 1f1750ae6 (First pass at R bindings) } // namespace arrow From 05d1dee5ef1fa35fa823041e2e3889342ae6afc3 Mon Sep 17 00:00:00 2001 From: Will Jones Date: Tue, 1 Mar 2022 08:42:34 -0800 Subject: [PATCH 09/35] Enable R timezone unit tests --- cpp/src/arrow/config.h | 7 ------- 1 file changed, 7 deletions(-) diff --git a/cpp/src/arrow/config.h b/cpp/src/arrow/config.h index 61ff49ceabd..cb8b5f55039 100644 --- a/cpp/src/arrow/config.h +++ b/cpp/src/arrow/config.h @@ -88,7 +88,6 @@ RuntimeInfo GetRuntimeInfo(); struct ArrowGlobalOptions { /// Path to text timezone database. This is only configurable on Windows, /// which does not have a compatible OS timezone database. -<<<<<<< HEAD util::optional tz_db_path; }; @@ -96,11 +95,5 @@ struct ArrowGlobalOptions { // TODO: We need to run this before C++ unit tests on Windows ARROW_EXPORT Status Initialize(const ArrowGlobalOptions& options) noexcept; -== == == = std::string * tz_db_path; -}; - -ARROW_EXPORT -Status Initialize(const ArrowGlobalOptions& options); ->>>>>>> 1f1750ae6 (First pass at R bindings) } // namespace arrow From 48c5584f84db4e905610d89d1498319ed19cb6fa Mon Sep 17 00:00:00 2001 From: Will Jones Date: Wed, 2 Mar 2022 12:43:43 -0800 Subject: [PATCH 10/35] Get timezone tests passing in R; locale still a problem --- r/tests/testthat/test-dplyr-funcs-datetime.R | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/r/tests/testthat/test-dplyr-funcs-datetime.R b/r/tests/testthat/test-dplyr-funcs-datetime.R index bcb8bd1ca20..c8bbc82258c 100644 --- a/r/tests/testthat/test-dplyr-funcs-datetime.R +++ b/r/tests/testthat/test-dplyr-funcs-datetime.R @@ -119,6 +119,8 @@ test_that("errors in strptime", { }) test_that("strftime", { + skip_on_os("windows") # TODO: Fix Windows locales + # Error on Windows: Cannot find locale 'English_United States.1252' times <- tibble( datetime = c(lubridate::ymd_hms("2018-10-07 19:04:05", tz = "Etc/GMT+6"), NA), date = c(as.Date("2021-01-01"), NA) @@ -347,6 +349,7 @@ test_that("extract quarter from timestamp", { }) test_that("extract month from timestamp", { + skip_on_os("windows") # TODO: Fix Windows locales compare_dplyr_binding( .input %>% mutate(x = month(datetime)) %>% @@ -409,6 +412,7 @@ test_that("extract day from timestamp", { }) test_that("extract wday from timestamp", { + skip_on_os("windows") # TODO: Fix Windows locales compare_dplyr_binding( .input %>% mutate(x = wday(datetime)) %>% @@ -561,6 +565,7 @@ test_that("extract week from date", { # Duplicate? test_that("extract month from date", { + skip_on_os("windows") # TODO: Fix Windows locales compare_dplyr_binding( .input %>% mutate(x = month(date)) %>% @@ -597,6 +602,7 @@ test_that("extract day from date", { }) test_that("extract wday from date", { + skip_on_os("windows") # TODO: Fix Windows locales compare_dplyr_binding( .input %>% mutate(x = wday(date)) %>% From 647e3ac278a4f24b2c1ddb592ca41101d93f27a1 Mon Sep 17 00:00:00 2001 From: Will Jones Date: Wed, 2 Mar 2022 15:36:38 -0800 Subject: [PATCH 11/35] Force Windows time locale to be C --- r/tests/testthat/test-dplyr-funcs-datetime.R | 6 ------ 1 file changed, 6 deletions(-) diff --git a/r/tests/testthat/test-dplyr-funcs-datetime.R b/r/tests/testthat/test-dplyr-funcs-datetime.R index c8bbc82258c..bcb8bd1ca20 100644 --- a/r/tests/testthat/test-dplyr-funcs-datetime.R +++ b/r/tests/testthat/test-dplyr-funcs-datetime.R @@ -119,8 +119,6 @@ test_that("errors in strptime", { }) test_that("strftime", { - skip_on_os("windows") # TODO: Fix Windows locales - # Error on Windows: Cannot find locale 'English_United States.1252' times <- tibble( datetime = c(lubridate::ymd_hms("2018-10-07 19:04:05", tz = "Etc/GMT+6"), NA), date = c(as.Date("2021-01-01"), NA) @@ -349,7 +347,6 @@ test_that("extract quarter from timestamp", { }) test_that("extract month from timestamp", { - skip_on_os("windows") # TODO: Fix Windows locales compare_dplyr_binding( .input %>% mutate(x = month(datetime)) %>% @@ -412,7 +409,6 @@ test_that("extract day from timestamp", { }) test_that("extract wday from timestamp", { - skip_on_os("windows") # TODO: Fix Windows locales compare_dplyr_binding( .input %>% mutate(x = wday(datetime)) %>% @@ -565,7 +561,6 @@ test_that("extract week from date", { # Duplicate? test_that("extract month from date", { - skip_on_os("windows") # TODO: Fix Windows locales compare_dplyr_binding( .input %>% mutate(x = month(date)) %>% @@ -602,7 +597,6 @@ test_that("extract day from date", { }) test_that("extract wday from date", { - skip_on_os("windows") # TODO: Fix Windows locales compare_dplyr_binding( .input %>% mutate(x = wday(date)) %>% From 22fc9307a3a1c2ccc1f7e9b3e58e9158ccce3555 Mon Sep 17 00:00:00 2001 From: Will Jones Date: Tue, 8 Mar 2022 10:08:42 -0800 Subject: [PATCH 12/35] PR feedback on R bindings --- r/DESCRIPTION | 1 + r/R/dplyr-funcs-datetime.R | 11 ++++++----- r/tests/testthat/test-dplyr-funcs-datetime.R | 3 +-- 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/r/DESCRIPTION b/r/DESCRIPTION index 1f721468765..36a55c05b26 100644 --- a/r/DESCRIPTION +++ b/r/DESCRIPTION @@ -58,6 +58,7 @@ Suggests: stringr, testthat (>= 3.1.0), tibble, + tzdb, withr LinkingTo: cpp11 (>= 0.4.2) Collate: diff --git a/r/R/dplyr-funcs-datetime.R b/r/R/dplyr-funcs-datetime.R index 71d4e5ab69a..74e53a3b380 100644 --- a/r/R/dplyr-funcs-datetime.R +++ b/r/R/dplyr-funcs-datetime.R @@ -15,10 +15,10 @@ # specific language governing permissions and limitations # under the License. -check_locale <- function(locale = Sys.getlocale("LC_TIME")) { +check_time_locale <- function(locale = Sys.getlocale("LC_TIME")) { if (tolower(Sys.info()[["sysname"]]) == "windows" & locale != "C") { # MingW C++ std::locale only supports "C" and "POSIX" - stop(paste0("On Windows, locales other than 'C' are not supported in Arrow. ", + stop(paste0("On Windows, time locales other than 'C' are not supported in Arrow. ", "Consider setting `Sys.setlocale('LC_TIME', 'C')`")) } locale @@ -57,7 +57,7 @@ register_bindings_datetime <- function() { } else { ts <- x } - Expression$create("strftime", ts, options = list(format = format, locale = check_locale())) + Expression$create("strftime", ts, options = list(format = format, locale = check_time_locale())) }) register_binding("format_ISO8601", function(x, usetz = FALSE, precision = NULL, ...) { @@ -104,7 +104,7 @@ register_bindings_datetime <- function() { } else { format <- "%A" } - return(Expression$create("strftime", x, options = list(format = format, locale = check_locale(locale)))) + return(Expression$create("strftime", x, options = list(format = format, locale = check_time_locale(locale)))) } Expression$create("day_of_week", x, options = list(count_from_zero = FALSE, week_start = week_start)) @@ -142,7 +142,8 @@ register_bindings_datetime <- function() { } else { format <- "%B" } - return(build_expr("strftime", x, options = list(format = format, locale = check_locale(locale)))) +<<<<<<< HEAD + return(build_expr("strftime", x, options = list(format = format, locale = check_time_locale(locale)))) } build_expr("month", x) diff --git a/r/tests/testthat/test-dplyr-funcs-datetime.R b/r/tests/testthat/test-dplyr-funcs-datetime.R index bcb8bd1ca20..9e067b0a86c 100644 --- a/r/tests/testthat/test-dplyr-funcs-datetime.R +++ b/r/tests/testthat/test-dplyr-funcs-datetime.R @@ -181,8 +181,7 @@ test_that("strftime", { # This check is due to differences in the way %c currently works in Arrow and R's strftime. # We can revisit after https://github.com/HowardHinnant/date/issues/704 is resolved. - # Skip on Windows because it must use C locale. - if (tolower(Sys.info()[["sysname"]]) != "windows") { + if (Sys.getlocale("LC_TIME") != "C") { expect_error( times %>% Table$create() %>% From 1c609aa6603902edbacfad888c5c2154836249cb Mon Sep 17 00:00:00 2001 From: Will Jones Date: Tue, 8 Mar 2022 13:36:27 -0800 Subject: [PATCH 13/35] More fixes for CI scripts --- ci/appveyor-cpp-setup.bat | 11 +++++++++++ ci/scripts/download_tz_database.sh | 2 +- 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/ci/appveyor-cpp-setup.bat b/ci/appveyor-cpp-setup.bat index 8db35831673..f1ee07a55f1 100644 --- a/ci/appveyor-cpp-setup.bat +++ b/ci/appveyor-cpp-setup.bat @@ -115,3 +115,14 @@ powershell.exe -Command "Start-Process clcache-server" || exit /B if "%ARROW_S3%" == "ON" ( appveyor DownloadFile https://dl.min.io/server/minio/release/windows-amd64/minio.exe -FileName C:\Windows\Minio.exe || exit /B ) + + +@rem +@rem Download IANA Timezone Database for unit tests +@rem +curl https://data.iana.org/time-zones/releases/tzdata2021e.tar.gz --output %USERPROFILE%\Downloads\tzdata2021e.tar.gz +mkdir %USERPROFILE%\Downloads\tzdata +tar --extract --file %USERPROFILE%\Downloads\tzdata2021e.tar.gz --directory %USERPROFILE%\Downloads\tzdata +@rem Also need Windows timezone mapping +curl https://raw.githubusercontent.com/unicode-org/cldr/master/common/supplemental/windowsZones.xml ^ + --output %USERPROFILE%\Downloads\tzdata\windowsZones.xml diff --git a/ci/scripts/download_tz_database.sh b/ci/scripts/download_tz_database.sh index c5f6f61d35c..c4095ef9c2d 100644 --- a/ci/scripts/download_tz_database.sh +++ b/ci/scripts/download_tz_database.sh @@ -26,5 +26,5 @@ curl https://data.iana.org/time-zones/releases/tzdata2021e.tar.gz --output ~/Dow mkdir -p ~/Downloads/tzdata tar --extract --file ~/Downloads/tzdata2021e.tar.gz --directory ~/Downloads/tzdata -# Download windows +# Download Windows timezone mapping curl https://raw.githubusercontent.com/unicode-org/cldr/master/common/supplemental/windowsZones.xml --output ~/Downloads/tzdata/windowsZones.xml From 64af70c42c471ff0396cbf52623bda037f62c2ab Mon Sep 17 00:00:00 2001 From: Will Jones Date: Tue, 8 Mar 2022 14:17:17 -0800 Subject: [PATCH 14/35] Add section to developer docs for downloading --- ci/appveyor-cpp-setup.bat | 2 ++ docs/source/developers/cpp/windows.rst | 16 ++++++++++++++++ 2 files changed, 18 insertions(+) diff --git a/ci/appveyor-cpp-setup.bat b/ci/appveyor-cpp-setup.bat index f1ee07a55f1..64091957961 100644 --- a/ci/appveyor-cpp-setup.bat +++ b/ci/appveyor-cpp-setup.bat @@ -120,9 +120,11 @@ if "%ARROW_S3%" == "ON" ( @rem @rem Download IANA Timezone Database for unit tests @rem +@rem (Doc section: Download timezone database) curl https://data.iana.org/time-zones/releases/tzdata2021e.tar.gz --output %USERPROFILE%\Downloads\tzdata2021e.tar.gz mkdir %USERPROFILE%\Downloads\tzdata tar --extract --file %USERPROFILE%\Downloads\tzdata2021e.tar.gz --directory %USERPROFILE%\Downloads\tzdata @rem Also need Windows timezone mapping curl https://raw.githubusercontent.com/unicode-org/cldr/master/common/supplemental/windowsZones.xml ^ --output %USERPROFILE%\Downloads\tzdata\windowsZones.xml +@rem (Doc section: Download timezone database) \ No newline at end of file diff --git a/docs/source/developers/cpp/windows.rst b/docs/source/developers/cpp/windows.rst index b2c0c238ff2..3573ee4cfc8 100644 --- a/docs/source/developers/cpp/windows.rst +++ b/docs/source/developers/cpp/windows.rst @@ -362,6 +362,22 @@ suppress dllimport/dllexport marking of symbols. Projects that statically link against Arrow on Windows additionally need this definition. The Unix builds do not use the macro. +Downloading the Timezone Database +================================= + +To run some of the compute unit tests on Windows, the IANA timezone database +and the Windows timezone mapping need to be downloaded first. To download +them to the default search directory (in the user Downloads directory), use +the following section of the `ci/appveyor-setup.bat +`_ +script: + +.. literalinclude:: ../../../ci/appveyor-cpp-setup.bat + :language: cpp + :start-after: @rem (Doc section: Download timezone database) + :end-before: @rem (Doc section: Download timezone database) + :linenos: + Replicating Appveyor Builds =========================== From 410c2e5b93965bb2214da6608e4edcb2d6fd3c10 Mon Sep 17 00:00:00 2001 From: Will Jones Date: Wed, 9 Mar 2022 14:18:29 -0800 Subject: [PATCH 15/35] Add new test to illuminate issue --- .../compute/kernels/scalar_temporal_test.cc | 13 ++++++++++ cpp/src/arrow/public_api_test.cc | 24 +++++++++++++++++++ r/R/dplyr-funcs-datetime.R | 1 - 3 files changed, 37 insertions(+), 1 deletion(-) diff --git a/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc b/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc index 708cac23c0d..bedc958e494 100644 --- a/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc +++ b/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc @@ -1883,6 +1883,19 @@ TEST_F(ScalarTemporalTest, StrftimeOtherLocale) { expected, &options); } +TEST_F(ScalarTemporalTest, PlatformLocale) { + if (!LocaleExists("fr_FR.UTF-8")) { + GTEST_SKIP() << "locale 'fr_FR.UTF-8' doesn't exist on this system"; + } + auto locale = std::locale("fr_FR.UTF-8"); + std::stringstream ss; + ss.imbue(locale); + auto dt = ScalarFromJSON(timestamp(TimeUnit::MILLI, "UTC"), "2021-08-18T15:11:50.456"); + ss << dt; + auto result = ss.str(); + EXPECT_EQ(result, "01 janvier 1970 00:00:59,123"); +} + TEST_F(ScalarTemporalTest, StrftimeInvalidLocale) { auto options = StrftimeOptions("%d %B %Y %H:%M:%S", "non-existent"); const char* seconds = R"(["1970-01-01T00:00:59", null])"; diff --git a/cpp/src/arrow/public_api_test.cc b/cpp/src/arrow/public_api_test.cc index f6f78295499..8b28bcec130 100644 --- a/cpp/src/arrow/public_api_test.cc +++ b/cpp/src/arrow/public_api_test.cc @@ -85,6 +85,7 @@ #include #include +#include "arrow/testing/gtest_util.h" namespace arrow { @@ -103,4 +104,27 @@ TEST(Misc, BuildInfo) { ASSERT_THAT(info.full_so_version, ::testing::HasSubstr(info.so_version)); } +TEST(Misc, SetTimzoneConfig) { +#ifndef _WIN32 + GTEST_SKIP() << "Can only set the Timezone database on Windows"; +#else + // Create a tmp directory + ASSERT_OK_AND_ASSIGN(auto tempdir, arrow::internal::TemporaryDir::Make("tzdata")); + + // Validate that setting tzdb to that dir fails + arrow::ArrowGlobalOptions options = {util::make_optional(tempdir->path().ToString())}; + ASSERT_NOT_OK(arrow::Initialize(options)); + + // Copy tzdb data from ~/Downloads + auto fs = std::make_shared(); + auto selector = arrow::fs::FileSelector(); + selector.base_dir = "~/Downloads/tzdata"; + selector.recursive = true; + ASSERT_OK(arrow::fs::CopyFiles(fs, selector, fs, tempdir->path().ToString())); + + // Validate that tzdb is working + ASSERT_OK(arrow::Initialize(options)); +#endif // _WIN32 +} + } // namespace arrow diff --git a/r/R/dplyr-funcs-datetime.R b/r/R/dplyr-funcs-datetime.R index 74e53a3b380..88e772884ac 100644 --- a/r/R/dplyr-funcs-datetime.R +++ b/r/R/dplyr-funcs-datetime.R @@ -142,7 +142,6 @@ register_bindings_datetime <- function() { } else { format <- "%B" } -<<<<<<< HEAD return(build_expr("strftime", x, options = list(format = format, locale = check_time_locale(locale)))) } From 5a33d3034923bcc326213ddf28c147fd8d12018f Mon Sep 17 00:00:00 2001 From: Will Jones Date: Thu, 10 Mar 2022 11:40:12 -0800 Subject: [PATCH 16/35] Make tzdb test skip if not found in downloads --- cpp/src/arrow/public_api_test.cc | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/cpp/src/arrow/public_api_test.cc b/cpp/src/arrow/public_api_test.cc index 8b28bcec130..fa678f65e97 100644 --- a/cpp/src/arrow/public_api_test.cc +++ b/cpp/src/arrow/public_api_test.cc @@ -108,6 +108,16 @@ TEST(Misc, SetTimzoneConfig) { #ifndef _WIN32 GTEST_SKIP() << "Can only set the Timezone database on Windows"; #else + auto fs = std::make_shared(); + auto home_raw = std::getenv("USERPROFILE"); + std::string home = home_raw == nullptr ? "~" : std::string(home_raw); + ASSERT_OK_AND_ASSIGN(std::string tzdata_dir, + fs->NormalizePath(home + "\\Downloads\\tzdata")); + ASSERT_OK_AND_ASSIGN(auto tzdata_path, + arrow::internal::PlatformFilename::FromString(tzdata_dir)); + if (!arrow::internal::FileExists(tzdata_path).ValueOr(false)) { + GTEST_SKIP() << "Couldn't find timezone database in expected dir: " << tzdata_dir; + } // Create a tmp directory ASSERT_OK_AND_ASSIGN(auto tempdir, arrow::internal::TemporaryDir::Make("tzdata")); @@ -116,9 +126,8 @@ TEST(Misc, SetTimzoneConfig) { ASSERT_NOT_OK(arrow::Initialize(options)); // Copy tzdb data from ~/Downloads - auto fs = std::make_shared(); auto selector = arrow::fs::FileSelector(); - selector.base_dir = "~/Downloads/tzdata"; + selector.base_dir = tzdata_dir; selector.recursive = true; ASSERT_OK(arrow::fs::CopyFiles(fs, selector, fs, tempdir->path().ToString())); From ab5d0387c0bf61989a9b0fef4580310fac4d41b5 Mon Sep 17 00:00:00 2001 From: Will Jones Date: Thu, 10 Mar 2022 14:27:00 -0800 Subject: [PATCH 17/35] Get other test running --- .../compute/kernels/scalar_temporal_test.cc | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc b/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc index bedc958e494..176007381ca 100644 --- a/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc +++ b/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc @@ -1887,13 +1887,17 @@ TEST_F(ScalarTemporalTest, PlatformLocale) { if (!LocaleExists("fr_FR.UTF-8")) { GTEST_SKIP() << "locale 'fr_FR.UTF-8' doesn't exist on this system"; } - auto locale = std::locale("fr_FR.UTF-8"); - std::stringstream ss; - ss.imbue(locale); - auto dt = ScalarFromJSON(timestamp(TimeUnit::MILLI, "UTC"), "2021-08-18T15:11:50.456"); - ss << dt; - auto result = ss.str(); - EXPECT_EQ(result, "01 janvier 1970 00:00:59,123"); + using namespace std::chrono; + using namespace arrow_vendored::date; + zoned_time d{"America/New_York", local_days{August/18/2021}}; + auto locale_str = "fr_FR.UTF-8"; + std::locale loc(locale_str); + std::ostringstream oss; + oss.imbue(loc); + oss << format("%d %B %Y", d); + oss.clear(); + const auto s = oss.str(); + EXPECT_EQ(s, "18 août 2021"); } TEST_F(ScalarTemporalTest, StrftimeInvalidLocale) { From 668c8ccf784449037dbc7014fa6340e3f1bdf3a5 Mon Sep 17 00:00:00 2001 From: Will Jones Date: Thu, 10 Mar 2022 15:56:50 -0800 Subject: [PATCH 18/35] Fix some failures --- cpp/src/arrow/compute/kernels/scalar_temporal_test.cc | 4 ++-- cpp/src/arrow/public_api_test.cc | 4 ++++ 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc b/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc index 176007381ca..6f6a1061bac 100644 --- a/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc +++ b/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc @@ -1889,12 +1889,12 @@ TEST_F(ScalarTemporalTest, PlatformLocale) { } using namespace std::chrono; using namespace arrow_vendored::date; - zoned_time d{"America/New_York", local_days{August/18/2021}}; + zoned_time d{"America/New_York", local_days{August / 18 / 2021}}; auto locale_str = "fr_FR.UTF-8"; std::locale loc(locale_str); std::ostringstream oss; oss.imbue(loc); - oss << format("%d %B %Y", d); + to_stream(oss, "%d %B %Y", d); oss.clear(); const auto s = oss.str(); EXPECT_EQ(s, "18 août 2021"); diff --git a/cpp/src/arrow/public_api_test.cc b/cpp/src/arrow/public_api_test.cc index fa678f65e97..5d4d68bd672 100644 --- a/cpp/src/arrow/public_api_test.cc +++ b/cpp/src/arrow/public_api_test.cc @@ -107,6 +107,9 @@ TEST(Misc, BuildInfo) { TEST(Misc, SetTimzoneConfig) { #ifndef _WIN32 GTEST_SKIP() << "Can only set the Timezone database on Windows"; +#else +#ifndef ARROW_FILESYSTEM + GTEST_SKIP() << "Need filesystem support to test timezone config."; #else auto fs = std::make_shared(); auto home_raw = std::getenv("USERPROFILE"); @@ -133,6 +136,7 @@ TEST(Misc, SetTimzoneConfig) { // Validate that tzdb is working ASSERT_OK(arrow::Initialize(options)); +#endif // ARROW_FILESYSTEM #endif // _WIN32 } From 32df619d2ec8ed6303496ecdd7267a37357d4279 Mon Sep 17 00:00:00 2001 From: Will Jones Date: Fri, 11 Mar 2022 14:07:30 -0800 Subject: [PATCH 19/35] Just skip the locale test for now --- .../compute/kernels/scalar_temporal_test.cc | 20 +++---------------- 1 file changed, 3 insertions(+), 17 deletions(-) diff --git a/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc b/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc index 6f6a1061bac..70570134071 100644 --- a/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc +++ b/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc @@ -1870,6 +1870,9 @@ TEST_F(ScalarTemporalTest, StrftimeCLocale) { } TEST_F(ScalarTemporalTest, StrftimeOtherLocale) { +#ifdef WIN32 + GTEST_SKIP() << "There is a known bug in strftime for locales on Windows (ARROW-15922)"; +#endif if (!LocaleExists("fr_FR.UTF-8")) { GTEST_SKIP() << "locale 'fr_FR.UTF-8' doesn't exist on this system"; } @@ -1883,23 +1886,6 @@ TEST_F(ScalarTemporalTest, StrftimeOtherLocale) { expected, &options); } -TEST_F(ScalarTemporalTest, PlatformLocale) { - if (!LocaleExists("fr_FR.UTF-8")) { - GTEST_SKIP() << "locale 'fr_FR.UTF-8' doesn't exist on this system"; - } - using namespace std::chrono; - using namespace arrow_vendored::date; - zoned_time d{"America/New_York", local_days{August / 18 / 2021}}; - auto locale_str = "fr_FR.UTF-8"; - std::locale loc(locale_str); - std::ostringstream oss; - oss.imbue(loc); - to_stream(oss, "%d %B %Y", d); - oss.clear(); - const auto s = oss.str(); - EXPECT_EQ(s, "18 août 2021"); -} - TEST_F(ScalarTemporalTest, StrftimeInvalidLocale) { auto options = StrftimeOptions("%d %B %Y %H:%M:%S", "non-existent"); const char* seconds = R"(["1970-01-01T00:00:59", null])"; From 7ebf6a821d41218a6bb7c73f90a635def73d2b11 Mon Sep 17 00:00:00 2001 From: Will Jones Date: Fri, 11 Mar 2022 14:26:20 -0800 Subject: [PATCH 20/35] Cleanup --- cpp/src/arrow/config.h | 5 ++--- r/tests/testthat/test-dplyr-funcs-datetime.R | 2 -- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/cpp/src/arrow/config.h b/cpp/src/arrow/config.h index cb8b5f55039..9db4a7dcecf 100644 --- a/cpp/src/arrow/config.h +++ b/cpp/src/arrow/config.h @@ -66,9 +66,10 @@ struct RuntimeInfo { std::string detected_simd_level; /// Whether using the OS-based timezone database + /// This is set at compile-time. bool using_os_timezone_db; - /// The path to the timezone database; is None + /// The path to the timezone database; by default None. util::optional timezone_db_path; }; @@ -91,8 +92,6 @@ struct ArrowGlobalOptions { util::optional tz_db_path; }; -// TODO: We need to tell users to run this before using timezones on Windows -// TODO: We need to run this before C++ unit tests on Windows ARROW_EXPORT Status Initialize(const ArrowGlobalOptions& options) noexcept; diff --git a/r/tests/testthat/test-dplyr-funcs-datetime.R b/r/tests/testthat/test-dplyr-funcs-datetime.R index 9e067b0a86c..1e3ad628be4 100644 --- a/r/tests/testthat/test-dplyr-funcs-datetime.R +++ b/r/tests/testthat/test-dplyr-funcs-datetime.R @@ -558,7 +558,6 @@ test_that("extract week from date", { ) }) -# Duplicate? test_that("extract month from date", { compare_dplyr_binding( .input %>% @@ -585,7 +584,6 @@ test_that("extract month from date", { ) }) - test_that("extract day from date", { compare_dplyr_binding( .input %>% From 42f0a6a8e050e6c0414016332a90c83d5e6fafcc Mon Sep 17 00:00:00 2001 From: Will Jones Date: Mon, 14 Mar 2022 08:19:10 -0700 Subject: [PATCH 21/35] Adjust script to avoid tar Windows drive issue --- ci/appveyor-cpp-setup.bat | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/ci/appveyor-cpp-setup.bat b/ci/appveyor-cpp-setup.bat index 64091957961..936306296f6 100644 --- a/ci/appveyor-cpp-setup.bat +++ b/ci/appveyor-cpp-setup.bat @@ -121,9 +121,10 @@ if "%ARROW_S3%" == "ON" ( @rem Download IANA Timezone Database for unit tests @rem @rem (Doc section: Download timezone database) -curl https://data.iana.org/time-zones/releases/tzdata2021e.tar.gz --output %USERPROFILE%\Downloads\tzdata2021e.tar.gz -mkdir %USERPROFILE%\Downloads\tzdata -tar --extract --file %USERPROFILE%\Downloads\tzdata2021e.tar.gz --directory %USERPROFILE%\Downloads\tzdata +curl https://data.iana.org/time-zones/releases/tzdata2021e.tar.gz --output tzdata.tar.gz +mkdir tzdata +tar --extract --file tzdata.tar.gz --directory tzdata +move tzdata %USERPROFILE%\Downloads\tzdata @rem Also need Windows timezone mapping curl https://raw.githubusercontent.com/unicode-org/cldr/master/common/supplemental/windowsZones.xml ^ --output %USERPROFILE%\Downloads\tzdata\windowsZones.xml From baa22639763e8a00dcdd1155553bc6829758beee Mon Sep 17 00:00:00 2001 From: Will Jones Date: Mon, 14 Mar 2022 08:35:17 -0700 Subject: [PATCH 22/35] Do a better job of skipping test --- cpp/src/arrow/compute/kernels/scalar_temporal_test.cc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc b/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc index 70570134071..584f67125f3 100644 --- a/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc +++ b/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc @@ -1872,7 +1872,7 @@ TEST_F(ScalarTemporalTest, StrftimeCLocale) { TEST_F(ScalarTemporalTest, StrftimeOtherLocale) { #ifdef WIN32 GTEST_SKIP() << "There is a known bug in strftime for locales on Windows (ARROW-15922)"; -#endif +#else if (!LocaleExists("fr_FR.UTF-8")) { GTEST_SKIP() << "locale 'fr_FR.UTF-8' doesn't exist on this system"; } @@ -1884,6 +1884,7 @@ TEST_F(ScalarTemporalTest, StrftimeOtherLocale) { ["01 janvier 1970 00:00:59,123", "18 août 2021 15:11:50,456", null])"; CheckScalarUnary("strftime", timestamp(TimeUnit::MILLI, "UTC"), milliseconds, utf8(), expected, &options); +#endif } TEST_F(ScalarTemporalTest, StrftimeInvalidLocale) { From 7c0908036e25d7d4686a558b39eac0e88fe95350 Mon Sep 17 00:00:00 2001 From: Will Jones Date: Mon, 14 Mar 2022 09:54:49 -0700 Subject: [PATCH 23/35] Document for users how to get tzdb --- docs/source/cpp/api/support.rst | 10 ++++++++++ docs/source/cpp/build_system.rst | 21 +++++++++++++++++++++ docs/source/developers/cpp/windows.rst | 5 ++--- 3 files changed, 33 insertions(+), 3 deletions(-) diff --git a/docs/source/cpp/api/support.rst b/docs/source/cpp/api/support.rst index 85374d27fb6..ac1c6f6d9c3 100644 --- a/docs/source/cpp/api/support.rst +++ b/docs/source/cpp/api/support.rst @@ -73,6 +73,16 @@ introduced API). ``"7.0.1"``. +Runtime Configuration +===================== + +.. doxygenstruct:: arrow::ArrowGlobalOptions + :members: + + +.. doxygenfunction:: arrow::Initialize + + Error return and reporting ========================== diff --git a/docs/source/cpp/build_system.rst b/docs/source/cpp/build_system.rst index fbdd2eafbd2..ba28a10f902 100644 --- a/docs/source/cpp/build_system.rst +++ b/docs/source/cpp/build_system.rst @@ -163,3 +163,24 @@ can control the source of each dependency and whether it is statically or dynamically linked. See :doc:`/developers/cpp/building` for instructions. Or alternatively, use Arrow from a package manager such as Conda or vcpkg which will manage consistent versions of Arrow and its dependencies. + + +Runtime Dependencies +==================== + +While Arrow uses the OS-provided timezone database on Linux and Mac OS, it +requires a user-provided database on Windows. You must download and extract the +text version of the IANA timezone database and add the Windows timezone mapping +XML. To download, you can use the following batch script: + +.. literalinclude:: ../../../ci/appveyor-cpp-setup.bat + :language: cmd + :start-after: @rem (Doc section: Download timezone database) + :end-before: @rem (Doc section: Download timezone database) + +By default, the timezone database will be detected at ``%USERPROFILE%\Downloads\tzdata``, +but you can set a custom path at runtime in :struct:`arrow::ArrowGlobalOptions`:: + + arrow::ArrowGlobalOptions options; + options.tz_db_path = "path/to/tzdata"; + ARROW_RETURN_NOT_OK(arrow::Initialize(options)); diff --git a/docs/source/developers/cpp/windows.rst b/docs/source/developers/cpp/windows.rst index 3573ee4cfc8..9e02ef7c460 100644 --- a/docs/source/developers/cpp/windows.rst +++ b/docs/source/developers/cpp/windows.rst @@ -372,11 +372,10 @@ the following section of the `ci/appveyor-setup.bat `_ script: -.. literalinclude:: ../../../ci/appveyor-cpp-setup.bat - :language: cpp +.. literalinclude:: ../../../../ci/appveyor-cpp-setup.bat + :language: cmd :start-after: @rem (Doc section: Download timezone database) :end-before: @rem (Doc section: Download timezone database) - :linenos: Replicating Appveyor Builds =========================== From d883a481e6ba66dffb54429c6b212cd3f1b38a9e Mon Sep 17 00:00:00 2001 From: Will Jones Date: Mon, 21 Mar 2022 13:31:47 -0700 Subject: [PATCH 24/35] Add underscore to WIN32 --- cpp/src/arrow/compute/kernels/scalar_temporal_test.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc b/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc index 584f67125f3..e657903d50b 100644 --- a/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc +++ b/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc @@ -1870,7 +1870,7 @@ TEST_F(ScalarTemporalTest, StrftimeCLocale) { } TEST_F(ScalarTemporalTest, StrftimeOtherLocale) { -#ifdef WIN32 +#ifdef _WIN32 GTEST_SKIP() << "There is a known bug in strftime for locales on Windows (ARROW-15922)"; #else if (!LocaleExists("fr_FR.UTF-8")) { From 60ccf65cd5fe8deed37360aa090a4f774bb9e550 Mon Sep 17 00:00:00 2001 From: Will Jones Date: Wed, 23 Mar 2022 10:57:41 -0700 Subject: [PATCH 25/35] Update docs/source/cpp/build_system.rst Co-authored-by: Antoine Pitrou --- docs/source/cpp/build_system.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/cpp/build_system.rst b/docs/source/cpp/build_system.rst index ba28a10f902..f1deeb2f1ad 100644 --- a/docs/source/cpp/build_system.rst +++ b/docs/source/cpp/build_system.rst @@ -168,7 +168,7 @@ will manage consistent versions of Arrow and its dependencies. Runtime Dependencies ==================== -While Arrow uses the OS-provided timezone database on Linux and Mac OS, it +While Arrow uses the OS-provided timezone database on Linux and macOS, it requires a user-provided database on Windows. You must download and extract the text version of the IANA timezone database and add the Windows timezone mapping XML. To download, you can use the following batch script: From ece8ce08554e6921172ae6a3d48421eb1131fa8a Mon Sep 17 00:00:00 2001 From: Will Jones Date: Wed, 23 Mar 2022 10:59:56 -0700 Subject: [PATCH 26/35] Update cpp/src/arrow/public_api_test.cc Co-authored-by: Antoine Pitrou --- cpp/src/arrow/public_api_test.cc | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/cpp/src/arrow/public_api_test.cc b/cpp/src/arrow/public_api_test.cc index 5d4d68bd672..c4313312f43 100644 --- a/cpp/src/arrow/public_api_test.cc +++ b/cpp/src/arrow/public_api_test.cc @@ -107,8 +107,7 @@ TEST(Misc, BuildInfo) { TEST(Misc, SetTimzoneConfig) { #ifndef _WIN32 GTEST_SKIP() << "Can only set the Timezone database on Windows"; -#else -#ifndef ARROW_FILESYSTEM +#elif !defined(ARROW_FILESYSTEM) GTEST_SKIP() << "Need filesystem support to test timezone config."; #else auto fs = std::make_shared(); From 7da26f6c60d5b32e17d7794f9be873aba45197a9 Mon Sep 17 00:00:00 2001 From: Will Jones Date: Wed, 23 Mar 2022 13:24:50 -0700 Subject: [PATCH 27/35] Address PR feedback --- cpp/src/arrow/config.cc | 16 +++++++++------- cpp/src/arrow/config.h | 6 +++--- cpp/src/arrow/public_api_test.cc | 14 ++++++++++++-- r/src/config.cpp | 2 +- 4 files changed, 25 insertions(+), 13 deletions(-) diff --git a/cpp/src/arrow/config.cc b/cpp/src/arrow/config.cc index 21ebe7115e2..5622fba59c1 100644 --- a/cpp/src/arrow/config.cc +++ b/cpp/src/arrow/config.cc @@ -79,24 +79,26 @@ RuntimeInfo GetRuntimeInfo() { info.using_os_timezone_db = USE_OS_TZDB; #if !USE_OS_TZDB info.timezone_db_path = timezone_db_path; +#else + info.timezone_db_path = util::optional(); #endif return info; } -Status Initialize(const ArrowGlobalOptions& options) noexcept { - if (options.tz_db_path.has_value()) { +Status Initialize(const GlobalOptions& options) noexcept { + if (options.timezone_db_path.has_value()) { #if !USE_OS_TZDB try { - arrow_vendored::date::set_install(options.tz_db_path.value()); + arrow_vendored::date::set_install(options.timezone_db_path.value()); arrow_vendored::date::reload_tzdb(); } catch (const std::runtime_error& e) { - return Status::ExecutionError(e.what()); + return Status::IOError(e.what()); } - timezone_db_path = options.tz_db_path.value(); + timezone_db_path = options.timezone_db_path.value(); #else return Status::Invalid( - "Arrow was set to use OS timezone database at compile time, so it cannot be set " - "at runtime."); + "Arrow was set to use OS timezone database at compile time, so a downloaded database " + "cannot be provided at runtime."); #endif // !USE_OS_TZDB } return Status::OK(); diff --git a/cpp/src/arrow/config.h b/cpp/src/arrow/config.h index 9db4a7dcecf..87e31cc456a 100644 --- a/cpp/src/arrow/config.h +++ b/cpp/src/arrow/config.h @@ -86,13 +86,13 @@ const BuildInfo& GetBuildInfo(); ARROW_EXPORT RuntimeInfo GetRuntimeInfo(); -struct ArrowGlobalOptions { +struct GlobalOptions { /// Path to text timezone database. This is only configurable on Windows, /// which does not have a compatible OS timezone database. - util::optional tz_db_path; + util::optional timezone_db_path; }; ARROW_EXPORT -Status Initialize(const ArrowGlobalOptions& options) noexcept; +Status Initialize(const GlobalOptions& options) noexcept; } // namespace arrow diff --git a/cpp/src/arrow/public_api_test.cc b/cpp/src/arrow/public_api_test.cc index c4313312f43..fb0c30cfe63 100644 --- a/cpp/src/arrow/public_api_test.cc +++ b/cpp/src/arrow/public_api_test.cc @@ -104,7 +104,17 @@ TEST(Misc, BuildInfo) { ASSERT_THAT(info.full_so_version, ::testing::HasSubstr(info.so_version)); } -TEST(Misc, SetTimzoneConfig) { +class ConfigTest : public ::testing::Test { + protected: + void TearDown() override { + // Reset global options to defaults + arrow::GlobalOptions default_options; + default_options.timezone_db_path = util::optional(); + ASSERT_OK(arrow::Initialize(default_options)); + } +}; + +TEST_F(ConfigTest, SetTimezoneConfig) { #ifndef _WIN32 GTEST_SKIP() << "Can only set the Timezone database on Windows"; #elif !defined(ARROW_FILESYSTEM) @@ -124,7 +134,7 @@ TEST(Misc, SetTimzoneConfig) { ASSERT_OK_AND_ASSIGN(auto tempdir, arrow::internal::TemporaryDir::Make("tzdata")); // Validate that setting tzdb to that dir fails - arrow::ArrowGlobalOptions options = {util::make_optional(tempdir->path().ToString())}; + arrow::GlobalOptions options = {util::make_optional(tempdir->path().ToString())}; ASSERT_NOT_OK(arrow::Initialize(options)); // Copy tzdb data from ~/Downloads diff --git a/r/src/config.cpp b/r/src/config.cpp index 2e8cc602c44..b6c66de7408 100644 --- a/r/src/config.cpp +++ b/r/src/config.cpp @@ -43,7 +43,7 @@ void set_timezone_database(cpp11::strings path) { } arrow::ArrowGlobalOptions options; - options.tz_db_path = arrow::util::make_optional(paths[0]); + options.timezone_db_path = arrow::util::make_optional(paths[0]); arrow::StopIfNotOk(arrow::Initialize(options)); } From 2471139344f940ace4fd8e4aaa86885b54c9014d Mon Sep 17 00:00:00 2001 From: Will Jones Date: Wed, 23 Mar 2022 13:50:08 -0700 Subject: [PATCH 28/35] Link docs --- docs/source/cpp/build_system.rst | 2 ++ docs/source/developers/cpp/windows.rst | 12 ++---------- 2 files changed, 4 insertions(+), 10 deletions(-) diff --git a/docs/source/cpp/build_system.rst b/docs/source/cpp/build_system.rst index f1deeb2f1ad..d91d070e092 100644 --- a/docs/source/cpp/build_system.rst +++ b/docs/source/cpp/build_system.rst @@ -165,6 +165,8 @@ alternatively, use Arrow from a package manager such as Conda or vcpkg which will manage consistent versions of Arrow and its dependencies. +.. _download-timezone-database: + Runtime Dependencies ==================== diff --git a/docs/source/developers/cpp/windows.rst b/docs/source/developers/cpp/windows.rst index 9e02ef7c460..b6111079c3b 100644 --- a/docs/source/developers/cpp/windows.rst +++ b/docs/source/developers/cpp/windows.rst @@ -366,16 +366,8 @@ Downloading the Timezone Database ================================= To run some of the compute unit tests on Windows, the IANA timezone database -and the Windows timezone mapping need to be downloaded first. To download -them to the default search directory (in the user Downloads directory), use -the following section of the `ci/appveyor-setup.bat -`_ -script: - -.. literalinclude:: ../../../../ci/appveyor-cpp-setup.bat - :language: cmd - :start-after: @rem (Doc section: Download timezone database) - :end-before: @rem (Doc section: Download timezone database) +and the Windows timezone mapping need to be downloaded first. See +:ref:`download-timezone-database`. Replicating Appveyor Builds =========================== From 843cdb6fe2ce62bb1424bec06ccac86f032abec3 Mon Sep 17 00:00:00 2001 From: Will Jones Date: Thu, 24 Mar 2022 11:07:52 -0700 Subject: [PATCH 29/35] Use environment variable for public api tests --- cpp/src/arrow/public_api_test.cc | 43 +++++++++++++++++++------------- cpp/src/arrow/testing/util.cc | 9 +++++++ cpp/src/arrow/testing/util.h | 4 +++ 3 files changed, 39 insertions(+), 17 deletions(-) diff --git a/cpp/src/arrow/public_api_test.cc b/cpp/src/arrow/public_api_test.cc index fb0c30cfe63..ae62d263a83 100644 --- a/cpp/src/arrow/public_api_test.cc +++ b/cpp/src/arrow/public_api_test.cc @@ -86,6 +86,7 @@ #include #include #include "arrow/testing/gtest_util.h" +#include "arrow/testing/util.h" namespace arrow { @@ -104,29 +105,38 @@ TEST(Misc, BuildInfo) { ASSERT_THAT(info.full_so_version, ::testing::HasSubstr(info.so_version)); } -class ConfigTest : public ::testing::Test { - protected: - void TearDown() override { - // Reset global options to defaults - arrow::GlobalOptions default_options; - default_options.timezone_db_path = util::optional(); - ASSERT_OK(arrow::Initialize(default_options)); - } -}; - -TEST_F(ConfigTest, SetTimezoneConfig) { +// class ConfigTest : public ::testing::Test { +// protected: +// void TearDown() override { +// // Reset global options to defaults +// arrow::GlobalOptions default_options; +// default_options.timezone_db_path = util::optional(); +// ASSERT_OK(arrow::Initialize(default_options)); +// } +// }; + +TEST(Misc, SetTimezoneConfig) { #ifndef _WIN32 GTEST_SKIP() << "Can only set the Timezone database on Windows"; #elif !defined(ARROW_FILESYSTEM) GTEST_SKIP() << "Need filesystem support to test timezone config."; #else auto fs = std::make_shared(); - auto home_raw = std::getenv("USERPROFILE"); - std::string home = home_raw == nullptr ? "~" : std::string(home_raw); - ASSERT_OK_AND_ASSIGN(std::string tzdata_dir, - fs->NormalizePath(home + "\\Downloads\\tzdata")); + + Result tzdata_result = GetTestTimezoneDatabaseRoot(); + std::string tzdata_dir; + if (tzdata_result.ok()) { + tzdata_dir = tzdata_result.ValueUnsafe(); + } else { + auto home_raw = std::getenv("USERPROFILE"); + std::string home = home_raw == nullptr ? "~" : std::string(home_raw); + ASSERT_OK_AND_ASSIGN(tzdata_dir, fs->NormalizePath(home + "\\Downloads\\tzdata")); + } + ASSERT_OK_AND_ASSIGN(tzdata_dir, fs->NormalizePath(tzdata_dir)); ASSERT_OK_AND_ASSIGN(auto tzdata_path, arrow::internal::PlatformFilename::FromString(tzdata_dir)); + + if (!arrow::internal::FileExists(tzdata_path).ValueOr(false)) { GTEST_SKIP() << "Couldn't find timezone database in expected dir: " << tzdata_dir; } @@ -145,8 +155,7 @@ TEST_F(ConfigTest, SetTimezoneConfig) { // Validate that tzdb is working ASSERT_OK(arrow::Initialize(options)); -#endif // ARROW_FILESYSTEM -#endif // _WIN32 +#endif } } // namespace arrow diff --git a/cpp/src/arrow/testing/util.cc b/cpp/src/arrow/testing/util.cc index 19917185130..84fd68687a5 100644 --- a/cpp/src/arrow/testing/util.cc +++ b/cpp/src/arrow/testing/util.cc @@ -108,6 +108,15 @@ Status GetTestResourceRoot(std::string* out) { return Status::OK(); } +Result GetTestTimezoneDatabaseRoot() { + const char* c_root = std::getenv("ARROW_TIMEZONE_DATABASE"); + if (!c_root) { + return Status::IOError( + "Test resources not found, set ARROW_TIMEZONE_DATABASE"); + } + return std::string(c_root); +} + int GetListenPort() { // Get a new available port number by binding a socket to an ephemeral port // and then closing it. Since ephemeral port allocation tends to avoid diff --git a/cpp/src/arrow/testing/util.h b/cpp/src/arrow/testing/util.h index 786ab3814aa..39069a92bf3 100644 --- a/cpp/src/arrow/testing/util.h +++ b/cpp/src/arrow/testing/util.h @@ -110,6 +110,10 @@ UnionTypeFactories() { // Status ARROW_TESTING_EXPORT Status GetTestResourceRoot(std::string*); +// Return the value of the ARROW_TIMEZONE_DATABASE environment variable or return error +// Status +ARROW_TESTING_EXPORT Result GetTestTimezoneDatabaseRoot(); + // Get a TCP port number to listen on. This is a different number every time, // as reusing the same port across tests can produce spurious bind errors on // Windows. From 29dc875f0610fd5ff6ce18970ad6721eb20e558b Mon Sep 17 00:00:00 2001 From: Will Jones Date: Thu, 24 Mar 2022 14:12:13 -0700 Subject: [PATCH 30/35] Allow tests set location via environment variable --- .../arrow/compute/kernels/scalar_cast_test.cc | 17 ++++++++++++--- .../compute/kernels/scalar_temporal_test.cc | 9 ++++++++ cpp/src/arrow/public_api_test.cc | 7 +++---- cpp/src/arrow/testing/util.cc | 21 +++++++++++++++---- cpp/src/arrow/testing/util.h | 9 +++++--- 5 files changed, 49 insertions(+), 14 deletions(-) diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_test.cc b/cpp/src/arrow/compute/kernels/scalar_cast_test.cc index e11ddd96fbd..222b5ee88a4 100644 --- a/cpp/src/arrow/compute/kernels/scalar_cast_test.cc +++ b/cpp/src/arrow/compute/kernels/scalar_cast_test.cc @@ -34,6 +34,7 @@ #include "arrow/testing/extension_type.h" #include "arrow/testing/gtest_util.h" #include "arrow/testing/random.h" +#include "arrow/testing/util.h" #include "arrow/type.h" #include "arrow/type_fwd.h" #include "arrow/type_traits.h" @@ -1146,6 +1147,16 @@ constexpr char kTimestampSecondsJson[] = constexpr char kTimestampExtremeJson[] = R"(["1677-09-20T00:00:59.123456", "2262-04-13T23:23:23.999999"])"; +class CastTimezone : public ::testing::Test { + protected: + void SetUp() override { +#ifdef _WIN32 + // Initialize timezone database on Windows + ASSERT_OK(InitTestTimezoneDatabase()); +#endif + } +}; + TEST(Cast, TimestampToDate) { // See scalar_temporal_test.cc auto timestamps = ArrayFromJSON(timestamp(TimeUnit::NANO), kTimestampJson); @@ -1181,7 +1192,7 @@ TEST(Cast, TimestampToDate) { } } -TEST(Cast, ZonedTimestampToDate) { +TEST_F(CastTimezone, ZonedTimestampToDate) { { // See TestZoned in scalar_temporal_test.cc auto timestamps = @@ -1372,7 +1383,7 @@ TEST(Cast, TimestampToTime) { } } -TEST(Cast, ZonedTimestampToTime) { +TEST_F(CastTimezone, ZonedTimestampToTime) { CheckCast(ArrayFromJSON(timestamp(TimeUnit::NANO, "Pacific/Marquesas"), kTimestampJson), ArrayFromJSON(time64(TimeUnit::NANO), R"([ 52259123456789, 50003999999999, 56480001001001, 65000000000000, @@ -1563,7 +1574,7 @@ TEST(Cast, TimestampToString) { } } -TEST(Cast, TimestampWithZoneToString) { +TEST_F(CastTimezone, TimestampWithZoneToString) { for (auto string_type : {utf8(), large_utf8()}) { CheckCast( ArrayFromJSON(timestamp(TimeUnit::SECOND, "UTC"), "[-30610224000, -5364662400]"), diff --git a/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc b/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc index e657903d50b..5d9690d58ff 100644 --- a/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc +++ b/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc @@ -24,6 +24,7 @@ #include "arrow/compute/kernels/test_util.h" #include "arrow/testing/gtest_util.h" #include "arrow/testing/matchers.h" +#include "arrow/testing/util.h" #include "arrow/type.h" #include "arrow/util/checked_cast.h" #include "arrow/util/formatting.h" @@ -407,6 +408,14 @@ class ScalarTemporalTest : public ::testing::Test { RoundTemporalOptions round_to_15_quarters = RoundTemporalOptions(15, CalendarUnit::QUARTER); RoundTemporalOptions round_to_15_years = RoundTemporalOptions(15, CalendarUnit::YEAR); + + protected: + void SetUp() override { +#ifdef _WIN32 + // Initialize timezone database on Windows + ASSERT_OK(InitTestTimezoneDatabase()); +#endif + } }; TEST_F(ScalarTemporalTest, TestTemporalComponentExtractionAllTemporalTypes) { diff --git a/cpp/src/arrow/public_api_test.cc b/cpp/src/arrow/public_api_test.cc index ae62d263a83..8d714461bd1 100644 --- a/cpp/src/arrow/public_api_test.cc +++ b/cpp/src/arrow/public_api_test.cc @@ -123,10 +123,10 @@ TEST(Misc, SetTimezoneConfig) { #else auto fs = std::make_shared(); - Result tzdata_result = GetTestTimezoneDatabaseRoot(); + util::optional tzdata_result = GetTestTimezoneDatabaseRoot(); std::string tzdata_dir; - if (tzdata_result.ok()) { - tzdata_dir = tzdata_result.ValueUnsafe(); + if (tzdata_result.has_value()) { + tzdata_dir = tzdata_result.value(); } else { auto home_raw = std::getenv("USERPROFILE"); std::string home = home_raw == nullptr ? "~" : std::string(home_raw); @@ -135,7 +135,6 @@ TEST(Misc, SetTimezoneConfig) { ASSERT_OK_AND_ASSIGN(tzdata_dir, fs->NormalizePath(tzdata_dir)); ASSERT_OK_AND_ASSIGN(auto tzdata_path, arrow::internal::PlatformFilename::FromString(tzdata_dir)); - if (!arrow::internal::FileExists(tzdata_path).ValueOr(false)) { GTEST_SKIP() << "Couldn't find timezone database in expected dir: " << tzdata_dir; diff --git a/cpp/src/arrow/testing/util.cc b/cpp/src/arrow/testing/util.cc index 84fd68687a5..9fe32db90ee 100644 --- a/cpp/src/arrow/testing/util.cc +++ b/cpp/src/arrow/testing/util.cc @@ -37,6 +37,7 @@ #include // IWYU pragma: keep #endif +#include "arrow/config.h" #include "arrow/table.h" #include "arrow/testing/random.h" #include "arrow/type.h" @@ -108,13 +109,25 @@ Status GetTestResourceRoot(std::string* out) { return Status::OK(); } -Result GetTestTimezoneDatabaseRoot() { +util::optional GetTestTimezoneDatabaseRoot() { const char* c_root = std::getenv("ARROW_TIMEZONE_DATABASE"); if (!c_root) { - return Status::IOError( - "Test resources not found, set ARROW_TIMEZONE_DATABASE"); + return util::make_optional(); } - return std::string(c_root); + return util::make_optional(std::string(c_root)); +} + +// This is only relevant on Windows, since other OSs have compatible databases built-in +Status InitTestTimezoneDatabase() { + auto maybe_tzdata = GetTestTimezoneDatabaseRoot(); + // If missing, timezone database will default to %USERPROFILE%\Downloads\tzdata + if (!maybe_tzdata.has_value()) return Status::OK(); + + auto tzdata_path = std::string(maybe_tzdata.value()); + arrow::GlobalOptions options = {util::make_optional(tzdata_path)}; + std::cout << "timezone db path set to: " << tzdata_path; // TODO: remove + ARROW_RETURN_NOT_OK(arrow::Initialize(options)); + return Status::OK(); } int GetListenPort() { diff --git a/cpp/src/arrow/testing/util.h b/cpp/src/arrow/testing/util.h index 39069a92bf3..f2e8101a50b 100644 --- a/cpp/src/arrow/testing/util.h +++ b/cpp/src/arrow/testing/util.h @@ -34,6 +34,7 @@ #include "arrow/testing/visibility.h" #include "arrow/type_fwd.h" #include "arrow/util/macros.h" +#include "arrow/util/optional.h" namespace arrow { @@ -110,9 +111,11 @@ UnionTypeFactories() { // Status ARROW_TESTING_EXPORT Status GetTestResourceRoot(std::string*); -// Return the value of the ARROW_TIMEZONE_DATABASE environment variable or return error -// Status -ARROW_TESTING_EXPORT Result GetTestTimezoneDatabaseRoot(); +// Return the value of the ARROW_TIMEZONE_DATABASE environment variable +ARROW_TESTING_EXPORT util::optional GetTestTimezoneDatabaseRoot(); + +// +ARROW_TESTING_EXPORT Status InitTestTimezoneDatabase(); // Get a TCP port number to listen on. This is a different number every time, // as reusing the same port across tests can produce spurious bind errors on From c78d3cd3f3eff3a40f951c33c93b84fef45945a0 Mon Sep 17 00:00:00 2001 From: Will Jones Date: Thu, 24 Mar 2022 14:16:25 -0700 Subject: [PATCH 31/35] Document environment variable --- docs/source/developers/cpp/windows.rst | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/docs/source/developers/cpp/windows.rst b/docs/source/developers/cpp/windows.rst index b6111079c3b..ad3f47749bd 100644 --- a/docs/source/developers/cpp/windows.rst +++ b/docs/source/developers/cpp/windows.rst @@ -367,7 +367,9 @@ Downloading the Timezone Database To run some of the compute unit tests on Windows, the IANA timezone database and the Windows timezone mapping need to be downloaded first. See -:ref:`download-timezone-database`. +:ref:`download-timezone-database` for download instructions. To set a non-default +path for the timezone database while running the unit tests, set the +``ARROW_TIMEZONE_DATABASE`` environment variable. Replicating Appveyor Builds =========================== From fe6469d11e4fec5bf1d579bfea1ad39883e1a850 Mon Sep 17 00:00:00 2001 From: Will Jones Date: Thu, 24 Mar 2022 14:30:50 -0700 Subject: [PATCH 32/35] Cleanup --- cpp/src/arrow/public_api_test.cc | 10 ---------- cpp/src/arrow/testing/util.cc | 2 -- cpp/src/arrow/testing/util.h | 3 ++- 3 files changed, 2 insertions(+), 13 deletions(-) diff --git a/cpp/src/arrow/public_api_test.cc b/cpp/src/arrow/public_api_test.cc index 8d714461bd1..45f3313c67f 100644 --- a/cpp/src/arrow/public_api_test.cc +++ b/cpp/src/arrow/public_api_test.cc @@ -105,16 +105,6 @@ TEST(Misc, BuildInfo) { ASSERT_THAT(info.full_so_version, ::testing::HasSubstr(info.so_version)); } -// class ConfigTest : public ::testing::Test { -// protected: -// void TearDown() override { -// // Reset global options to defaults -// arrow::GlobalOptions default_options; -// default_options.timezone_db_path = util::optional(); -// ASSERT_OK(arrow::Initialize(default_options)); -// } -// }; - TEST(Misc, SetTimezoneConfig) { #ifndef _WIN32 GTEST_SKIP() << "Can only set the Timezone database on Windows"; diff --git a/cpp/src/arrow/testing/util.cc b/cpp/src/arrow/testing/util.cc index 9fe32db90ee..764f8d976d2 100644 --- a/cpp/src/arrow/testing/util.cc +++ b/cpp/src/arrow/testing/util.cc @@ -117,7 +117,6 @@ util::optional GetTestTimezoneDatabaseRoot() { return util::make_optional(std::string(c_root)); } -// This is only relevant on Windows, since other OSs have compatible databases built-in Status InitTestTimezoneDatabase() { auto maybe_tzdata = GetTestTimezoneDatabaseRoot(); // If missing, timezone database will default to %USERPROFILE%\Downloads\tzdata @@ -125,7 +124,6 @@ Status InitTestTimezoneDatabase() { auto tzdata_path = std::string(maybe_tzdata.value()); arrow::GlobalOptions options = {util::make_optional(tzdata_path)}; - std::cout << "timezone db path set to: " << tzdata_path; // TODO: remove ARROW_RETURN_NOT_OK(arrow::Initialize(options)); return Status::OK(); } diff --git a/cpp/src/arrow/testing/util.h b/cpp/src/arrow/testing/util.h index f2e8101a50b..b993d86ed63 100644 --- a/cpp/src/arrow/testing/util.h +++ b/cpp/src/arrow/testing/util.h @@ -114,7 +114,8 @@ ARROW_TESTING_EXPORT Status GetTestResourceRoot(std::string*); // Return the value of the ARROW_TIMEZONE_DATABASE environment variable ARROW_TESTING_EXPORT util::optional GetTestTimezoneDatabaseRoot(); -// +// Set the Timezone database based on the ARROW_TIMEZONE_DATABASE env variable +// This is only relevant on Windows, since other OSs have compatible databases built-in ARROW_TESTING_EXPORT Status InitTestTimezoneDatabase(); // Get a TCP port number to listen on. This is a different number every time, From 9f9701657db8dde378c92d35d4942449e2c35ba4 Mon Sep 17 00:00:00 2001 From: Will Jones Date: Thu, 24 Mar 2022 19:40:14 -0700 Subject: [PATCH 33/35] Apply suggestions from code review Co-authored-by: Jonathan Keane --- cpp/src/arrow/config.cc | 4 ++-- r/src/config.cpp | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/cpp/src/arrow/config.cc b/cpp/src/arrow/config.cc index 5622fba59c1..a93a8feae1d 100644 --- a/cpp/src/arrow/config.cc +++ b/cpp/src/arrow/config.cc @@ -97,8 +97,8 @@ Status Initialize(const GlobalOptions& options) noexcept { timezone_db_path = options.timezone_db_path.value(); #else return Status::Invalid( - "Arrow was set to use OS timezone database at compile time, so a downloaded database " - "cannot be provided at runtime."); + "Arrow was set to use OS timezone database at compile time, " + "so a downloaded database cannot be provided at runtime."); #endif // !USE_OS_TZDB } return Status::OK(); diff --git a/r/src/config.cpp b/r/src/config.cpp index b6c66de7408..763ddee2cc1 100644 --- a/r/src/config.cpp +++ b/r/src/config.cpp @@ -42,7 +42,7 @@ void set_timezone_database(cpp11::strings path) { cpp11::stop("Must provide a single path to the timezone database."); } - arrow::ArrowGlobalOptions options; + arrow::GlobalOptions options; options.timezone_db_path = arrow::util::make_optional(paths[0]); arrow::StopIfNotOk(arrow::Initialize(options)); } From ad161ae07e65bf1528a2adf2259e0fd54df9aa8e Mon Sep 17 00:00:00 2001 From: Will Jones Date: Fri, 25 Mar 2022 08:12:28 -0700 Subject: [PATCH 34/35] Fix type that led to empty string --- cpp/src/arrow/testing/util.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/arrow/testing/util.cc b/cpp/src/arrow/testing/util.cc index 764f8d976d2..16da96452e0 100644 --- a/cpp/src/arrow/testing/util.cc +++ b/cpp/src/arrow/testing/util.cc @@ -112,7 +112,7 @@ Status GetTestResourceRoot(std::string* out) { util::optional GetTestTimezoneDatabaseRoot() { const char* c_root = std::getenv("ARROW_TIMEZONE_DATABASE"); if (!c_root) { - return util::make_optional(); + return util::optional(); } return util::make_optional(std::string(c_root)); } From 62e81780dda234106fba5a3148f0d19045d66898 Mon Sep 17 00:00:00 2001 From: Will Jones Date: Fri, 25 Mar 2022 13:58:33 -0700 Subject: [PATCH 35/35] Update r/R/arrow-package.R Co-authored-by: Davis Vaughan --- r/R/arrow-package.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/r/R/arrow-package.R b/r/R/arrow-package.R index f392c26a389..5ab82d88a88 100644 --- a/r/R/arrow-package.R +++ b/r/R/arrow-package.R @@ -71,7 +71,7 @@ tzdb::tzdb_initialize() set_timezone_database(tzdb::tzdb_path("text")) } else { - warning("tzdb not installed. Timezones will not be available.") + warning("The tzdb package is not installed. Timezones will not be available.") } }