From 2ac1a881eecebc03c4bff95585b03a6524cbf5df Mon Sep 17 00:00:00 2001 From: Rok Mihevc Date: Fri, 19 Dec 2025 23:49:22 +0100 Subject: [PATCH 01/29] first draft --- .../compute/kernels/scalar_temporal_binary.cc | 43 +- .../compute/kernels/scalar_temporal_unary.cc | 74 ++-- .../arrow/compute/kernels/temporal_internal.h | 31 +- cpp/src/arrow/util/chrono_internal.h | 385 ++++++++++++++++++ cpp/src/arrow/util/date_internal.h | 39 +- 5 files changed, 485 insertions(+), 87 deletions(-) create mode 100644 cpp/src/arrow/util/chrono_internal.h diff --git a/cpp/src/arrow/compute/kernels/scalar_temporal_binary.cc b/cpp/src/arrow/compute/kernels/scalar_temporal_binary.cc index 4437b8fe1db..920d1ec0105 100644 --- a/cpp/src/arrow/compute/kernels/scalar_temporal_binary.cc +++ b/cpp/src/arrow/compute/kernels/scalar_temporal_binary.cc @@ -27,7 +27,6 @@ #include "arrow/util/checked_cast.h" #include "arrow/util/logging_internal.h" #include "arrow/util/time.h" -#include "arrow/vendored/datetime.h" namespace arrow { @@ -37,28 +36,30 @@ using internal::checked_pointer_cast; namespace compute { namespace internal { +namespace chrono = arrow::internal::chrono; + namespace { -using arrow_vendored::date::days; -using arrow_vendored::date::floor; -using arrow_vendored::date::hh_mm_ss; -using arrow_vendored::date::local_days; -using arrow_vendored::date::local_time; -using arrow_vendored::date::sys_days; -using arrow_vendored::date::sys_time; -using arrow_vendored::date::trunc; -using arrow_vendored::date::weekday; -using arrow_vendored::date::weeks; -using arrow_vendored::date::year_month_day; -using arrow_vendored::date::year_month_weekday; -using arrow_vendored::date::years; -using arrow_vendored::date::literals::dec; -using arrow_vendored::date::literals::jan; -using arrow_vendored::date::literals::last; -using arrow_vendored::date::literals::mon; -using arrow_vendored::date::literals::sun; -using arrow_vendored::date::literals::thu; -using arrow_vendored::date::literals::wed; +using chrono::days; +using chrono::floor; +using chrono::hh_mm_ss; +using chrono::local_days; +using chrono::local_time; +using chrono::sys_days; +using chrono::sys_time; +using chrono::trunc; +using chrono::weekday; +using chrono::weeks; +using chrono::year_month_day; +using chrono::year_month_weekday; +using chrono::years; +using chrono::literals::dec; +using chrono::literals::jan; +using chrono::literals::last; +using chrono::literals::mon; +using chrono::literals::sun; +using chrono::literals::thu; +using chrono::literals::wed; using internal::applicator::ScalarBinaryNotNullStatefulEqualTypes; using DayOfWeekState = OptionsWrapper; diff --git a/cpp/src/arrow/compute/kernels/scalar_temporal_unary.cc b/cpp/src/arrow/compute/kernels/scalar_temporal_unary.cc index 8c7bdceb228..8df00b6b04e 100644 --- a/cpp/src/arrow/compute/kernels/scalar_temporal_unary.cc +++ b/cpp/src/arrow/compute/kernels/scalar_temporal_unary.cc @@ -29,7 +29,6 @@ #include "arrow/util/logging_internal.h" #include "arrow/util/time.h" #include "arrow/util/value_parsing.h" -#include "arrow/vendored/datetime.h" namespace arrow { @@ -38,34 +37,36 @@ using internal::checked_pointer_cast; namespace compute::internal { +namespace chrono = arrow::internal::chrono; + namespace { -using arrow_vendored::date::ceil; -using arrow_vendored::date::days; -using arrow_vendored::date::floor; -using arrow_vendored::date::hh_mm_ss; -using arrow_vendored::date::local_days; -using arrow_vendored::date::local_time; -using arrow_vendored::date::locate_zone; -using arrow_vendored::date::Monday; -using arrow_vendored::date::months; -using arrow_vendored::date::round; -using arrow_vendored::date::Sunday; -using arrow_vendored::date::sys_time; -using arrow_vendored::date::trunc; -using arrow_vendored::date::weekday; -using arrow_vendored::date::weeks; -using arrow_vendored::date::year; -using arrow_vendored::date::year_month_day; -using arrow_vendored::date::year_month_weekday; -using arrow_vendored::date::years; -using arrow_vendored::date::literals::dec; -using arrow_vendored::date::literals::jan; -using arrow_vendored::date::literals::last; -using arrow_vendored::date::literals::mon; -using arrow_vendored::date::literals::sun; -using arrow_vendored::date::literals::thu; -using arrow_vendored::date::literals::wed; +using chrono::ceil; +using chrono::days; +using chrono::floor; +using chrono::hh_mm_ss; +using chrono::local_days; +using chrono::local_time; +using chrono::locate_zone; +using chrono::Monday; +using chrono::months; +using chrono::round; +using chrono::Sunday; +using chrono::sys_time; +using chrono::trunc; +using chrono::weekday; +using chrono::weeks; +using chrono::year; +using chrono::year_month_day; +using chrono::year_month_weekday; +using chrono::years; +using chrono::literals::dec; +using chrono::literals::jan; +using chrono::literals::last; +using chrono::literals::mon; +using chrono::literals::sun; +using chrono::literals::thu; +using chrono::literals::wed; using std::chrono::duration_cast; using std::chrono::hours; using std::chrono::minutes; @@ -525,8 +526,8 @@ struct Week { } Localizer localizer_; - arrow_vendored::date::weekday wd_; - arrow_vendored::date::days days_offset_; + chrono::weekday wd_; + chrono::days days_offset_; const bool count_from_zero_; const bool first_week_is_fully_in_year_; }; @@ -1379,7 +1380,7 @@ struct AssumeTimezone { T Call(KernelContext*, Arg0 arg, Status* st) const { try { return get_local_time(arg, &tz_); - } catch (const arrow_vendored::date::nonexistent_local_time& e) { + } catch (const chrono::nonexistent_local_time& e) { switch (options.nonexistent) { case AssumeTimezoneOptions::Nonexistent::NONEXISTENT_RAISE: { *st = Status::Invalid("Timestamp doesn't exist in timezone '", options.timezone, @@ -1387,15 +1388,13 @@ struct AssumeTimezone { return arg; } case AssumeTimezoneOptions::Nonexistent::NONEXISTENT_EARLIEST: { - return get_local_time(arg, arrow_vendored::date::choose::latest, - &tz_) - - 1; + return get_local_time(arg, chrono::choose::latest, &tz_) - 1; } case AssumeTimezoneOptions::Nonexistent::NONEXISTENT_LATEST: { - return get_local_time(arg, arrow_vendored::date::choose::latest, &tz_); + return get_local_time(arg, chrono::choose::latest, &tz_); } } - } catch (const arrow_vendored::date::ambiguous_local_time& e) { + } catch (const chrono::ambiguous_local_time& e) { switch (options.ambiguous) { case AssumeTimezoneOptions::Ambiguous::AMBIGUOUS_RAISE: { *st = Status::Invalid("Timestamp is ambiguous in timezone '", options.timezone, @@ -1403,11 +1402,10 @@ struct AssumeTimezone { return arg; } case AssumeTimezoneOptions::Ambiguous::AMBIGUOUS_EARLIEST: { - return get_local_time(arg, arrow_vendored::date::choose::earliest, - &tz_); + return get_local_time(arg, chrono::choose::earliest, &tz_); } case AssumeTimezoneOptions::Ambiguous::AMBIGUOUS_LATEST: { - return get_local_time(arg, arrow_vendored::date::choose::latest, &tz_); + return get_local_time(arg, chrono::choose::latest, &tz_); } } } diff --git a/cpp/src/arrow/compute/kernels/temporal_internal.h b/cpp/src/arrow/compute/kernels/temporal_internal.h index 139cc134bde..af06daff10c 100644 --- a/cpp/src/arrow/compute/kernels/temporal_internal.h +++ b/cpp/src/arrow/compute/kernels/temporal_internal.h @@ -26,19 +26,22 @@ #include "arrow/util/value_parsing.h" namespace arrow::compute::internal { + +namespace chrono = arrow::internal::chrono; + using arrow::internal::checked_cast; using arrow::internal::OffsetZone; -using arrow_vendored::date::choose; -using arrow_vendored::date::days; -using arrow_vendored::date::floor; -using arrow_vendored::date::local_days; -using arrow_vendored::date::local_time; -using arrow_vendored::date::locate_zone; -using arrow_vendored::date::sys_days; -using arrow_vendored::date::sys_time; -using arrow_vendored::date::time_zone; -using arrow_vendored::date::year_month_day; -using arrow_vendored::date::zoned_time; +using chrono::choose; +using chrono::days; +using chrono::floor; +using chrono::local_days; +using chrono::local_time; +using chrono::locate_zone; +using chrono::sys_days; +using chrono::sys_time; +using chrono::time_zone; +using chrono::year_month_day; +using chrono::zoned_time; using std::chrono::duration_cast; // https://howardhinnant.github.io/date/tz.html#Examples @@ -148,10 +151,10 @@ struct ZonedLocalizer { try { return ApplyTimeZone(tz_, lt, std::nullopt, local_to_sys_time); - } catch (const arrow_vendored::date::nonexistent_local_time& e) { + } catch (const chrono::nonexistent_local_time& e) { *st = Status::Invalid("Local time does not exist: ", e.what()); return Duration{0}; - } catch (const arrow_vendored::date::ambiguous_local_time& e) { + } catch (const chrono::ambiguous_local_time& e) { *st = Status::Invalid("Local time is ambiguous: ", e.what()); return Duration{0}; } @@ -179,7 +182,7 @@ struct TimestampFormatter { const auto timepoint = sys_time(Duration{arg}); auto format_zoned_time = [&](auto&& zt) { try { - arrow_vendored::date::to_stream(bufstream, format, zt); + chrono::to_stream(bufstream, format, zt); return Status::OK(); } catch (const std::runtime_error& ex) { bufstream.clear(); diff --git a/cpp/src/arrow/util/chrono_internal.h b/cpp/src/arrow/util/chrono_internal.h new file mode 100644 index 00000000000..2eef96f8adb --- /dev/null +++ b/cpp/src/arrow/util/chrono_internal.h @@ -0,0 +1,385 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +/// \file chrono_internal.h +/// \brief Abstraction layer for C++20 chrono calendar/timezone APIs +/// +/// This header provides a unified interface for chrono calendar and timezone +/// functionality. On compilers with full C++20 chrono support (MSVC 16.10+ and +/// GCC 14+), it uses std::chrono. On other compilers, it falls back to the +/// vendored Howard Hinnant date library. +/// +/// The main benefit is on Windows where std::chrono uses the system timezone +/// database, eliminating the need for users to install IANA tzdata separately. + +#include +#include +#include +#include +#include +#include + +// Feature detection for C++20 chrono timezone support +// We only enable for compilers with FULL support (not partial) +// +// Compiler support +// (https://en.cppreference.com/w/cpp/compiler_support/20.html#cpp_lib_chrono_201907L): +// - MSVC 19.29 (VS 2019 16.10)+: Full support, uses Windows TZ database +// - GCC 14+: Full support, requires tzdata.zi on system +// - GCC 11-13: Partial support only +// - Clang/libc++: Still partial even in version 19 +// - Apple Clang: Still partial + +#if defined(__cpp_lib_chrono) && __cpp_lib_chrono >= 201907L +# if defined(_MSC_VER) +// MSVC 19.29+: Full support, uses Windows internal TZ database +# define ARROW_USE_STD_CHRONO 1 +# elif defined(__GLIBCXX__) && __GNUC__ >= 14 +// GCC 14+ with libstdc++: Full support, requires tzdata.zi +# define ARROW_USE_STD_CHRONO 1 +# endif +#endif + +#ifndef ARROW_USE_STD_CHRONO +# define ARROW_USE_STD_CHRONO 0 +#endif + +#if ARROW_USE_STD_CHRONO +// Use C++20 standard library chrono +#else +// Use vendored Howard Hinnant date library +# include "arrow/vendored/datetime.h" +#endif + +namespace arrow::internal::chrono { + +#if ARROW_USE_STD_CHRONO + +// ============================================================================ +// C++20 std::chrono backend +// ============================================================================ + +// Duration types +using days = std::chrono::days; +using weeks = std::chrono::weeks; +using months = std::chrono::months; +using years = std::chrono::years; + +// Time point types +template +using sys_time = std::chrono::sys_time; +using sys_days = std::chrono::sys_days; +using sys_seconds = std::chrono::sys_seconds; + +template +using local_time = std::chrono::local_time; +using local_days = std::chrono::local_days; +using local_seconds = std::chrono::local_seconds; + +// Calendar types +using year = std::chrono::year; +using month = std::chrono::month; +using day = std::chrono::day; +using weekday = std::chrono::weekday; +using year_month_day = std::chrono::year_month_day; +using year_month_weekday = std::chrono::year_month_weekday; + +template +using hh_mm_ss = std::chrono::hh_mm_ss; + +// Timezone types +using time_zone = std::chrono::time_zone; +using sys_info = std::chrono::sys_info; +using local_info = std::chrono::local_info; +using choose = std::chrono::choose; + +template +using zoned_time = std::chrono::zoned_time; + +template +using zoned_traits = std::chrono::zoned_traits; + +// Exceptions +using nonexistent_local_time = std::chrono::nonexistent_local_time; +using ambiguous_local_time = std::chrono::ambiguous_local_time; + +// Weekday constants +inline constexpr std::chrono::weekday Monday{1}; +inline constexpr std::chrono::weekday Sunday{0}; + +// Rounding functions +using std::chrono::ceil; +using std::chrono::floor; +using std::chrono::round; + +// trunc is not in std::chrono - implement proper truncation toward zero +// floor rounds toward negative infinity, but trunc rounds toward zero +template +constexpr ToDuration trunc(const std::chrono::duration& d) { + auto floor_result = std::chrono::floor(d); + auto remainder = d - floor_result; + // If original was negative and there's a non-zero remainder, + // floor went too far negative, so add one unit back + if (d.count() < 0 && remainder.count() != 0) { + return floor_result + ToDuration{1}; + } + return floor_result; +} + +// Timezone lookup +inline const time_zone* locate_zone(std::string_view tz_name) { + return std::chrono::locate_zone(tz_name); +} + +inline const time_zone* current_zone() { return std::chrono::current_zone(); } + +// Helper to get subsecond decimal places based on duration period +template +constexpr int get_subsecond_decimals() { + using Period = typename Duration::period; + if constexpr (Period::den == 1000) + return 3; // milliseconds + else if constexpr (Period::den == 1000000) + return 6; // microseconds + else if constexpr (Period::den == 1000000000) + return 9; // nanoseconds + else + return 0; // seconds or coarser +} + +// Formatting support with subsecond precision and timezone handling +// Mimics the vendored date library's to_stream behavior for compatibility +template +std::basic_ostream& to_stream( + std::basic_ostream& os, const CharT* fmt, + const std::chrono::zoned_time& zt) { + // Get local time and timezone info + auto lt = zt.get_local_time(); + auto info = zt.get_info(); + + auto lt_days = std::chrono::floor(lt); + auto ymd = year_month_day{lt_days}; + + // Calculate time of day components + auto time_since_midnight = lt - local_time{lt_days}; + auto total_secs = std::chrono::duration_cast(time_since_midnight); + auto h = std::chrono::duration_cast(time_since_midnight); + auto m = std::chrono::duration_cast(time_since_midnight - h); + auto s = std::chrono::duration_cast(time_since_midnight - h - m); + + // Build std::tm for strftime + std::tm tm{}; + tm.tm_sec = static_cast(s.count()); + tm.tm_min = static_cast(m.count()); + tm.tm_hour = static_cast(h.count()); + tm.tm_mday = static_cast(static_cast(ymd.day())); + tm.tm_mon = static_cast(static_cast(ymd.month())) - 1; + tm.tm_year = static_cast(ymd.year()) - 1900; + + auto wd = weekday{lt_days}; + tm.tm_wday = static_cast(wd.c_encoding()); + + auto year_start = + std::chrono::local_days{ymd.year() / std::chrono::January / std::chrono::day{1}}; + tm.tm_yday = static_cast((lt_days - year_start).count()); + tm.tm_isdst = info.save != std::chrono::minutes{0} ? 1 : 0; + + // Timezone offset calculation + auto offset_mins = std::chrono::duration_cast(info.offset); + bool neg_offset = offset_mins.count() < 0; + auto abs_offset = neg_offset ? -offset_mins : offset_mins; + auto off_h = std::chrono::duration_cast(abs_offset); + auto off_m = abs_offset - off_h; + + // Calculate subsecond value + constexpr int decimals = get_subsecond_decimals(); + int64_t subsec_value = 0; + if constexpr (decimals > 0) { + auto subsec_duration = time_since_midnight - total_secs; + subsec_value = std::chrono::duration_cast(subsec_duration).count(); + if (subsec_value < 0) subsec_value = -subsec_value; + } + + // Parse format string, handle %S, %z, %Z specially + std::string result; + for (const CharT* p = fmt; *p; ++p) { + if (*p == '%' && *(p + 1)) { + CharT spec = *(p + 1); + if (spec == 'S') { + // %S with subsecond precision + result += (tm.tm_sec < 10 ? "0" : "") + std::to_string(tm.tm_sec); + if constexpr (decimals > 0) { + std::ostringstream ss; + ss << '.' << std::setfill('0') << std::setw(decimals) << subsec_value; + result += ss.str(); + } + ++p; + } else if (spec == 'z') { + // %z timezone offset + std::ostringstream ss; + ss << (neg_offset ? '-' : '+') << std::setfill('0') << std::setw(2) + << off_h.count() << std::setfill('0') << std::setw(2) << off_m.count(); + result += ss.str(); + ++p; + } else if (spec == 'Z') { + // %Z timezone abbreviation + result += info.abbrev; + ++p; + } else { + // Use strftime for other specifiers + char buf[64]; + char small_fmt[3] = {'%', static_cast(spec), '\0'}; + if (std::strftime(buf, sizeof(buf), small_fmt, &tm) > 0) { + result += buf; + } + ++p; + } + } else { + result += static_cast(*p); + } + } + + return os << result; +} + +template +std::string format(const char* fmt, const Duration& d) { + std::ostringstream ss; + auto total_minutes = std::chrono::duration_cast(d).count(); + bool negative = total_minutes < 0; + if (negative) total_minutes = -total_minutes; + auto hours = total_minutes / 60; + auto mins = total_minutes % 60; + ss << (negative ? "-" : "+"); + ss << std::setfill('0') << std::setw(2) << hours; + ss << std::setfill('0') << std::setw(2) << mins; + return ss.str(); +} + +// Literals namespace +namespace literals { +// Month literals +inline constexpr std::chrono::month jan = std::chrono::January; +inline constexpr std::chrono::month dec = std::chrono::December; + +// Weekday literals +inline constexpr std::chrono::weekday sun = std::chrono::Sunday; +inline constexpr std::chrono::weekday mon = std::chrono::Monday; +inline constexpr std::chrono::weekday wed = std::chrono::Wednesday; +inline constexpr std::chrono::weekday thu = std::chrono::Thursday; + +// last specifier +inline constexpr std::chrono::last_spec last = std::chrono::last; +} // namespace literals + +#else // !ARROW_USE_STD_CHRONO + +// ============================================================================ +// Vendored Howard Hinnant date library backend +// ============================================================================ + +namespace vendored = arrow_vendored::date; + +// Duration types +using days = vendored::days; +using weeks = vendored::weeks; +using months = vendored::months; +using years = vendored::years; + +// Time point types +template +using sys_time = vendored::sys_time; +using sys_days = vendored::sys_days; +using sys_seconds = vendored::sys_seconds; + +template +using local_time = vendored::local_time; +using local_days = vendored::local_days; +using local_seconds = vendored::local_seconds; + +// Calendar types +using year = vendored::year; +using month = vendored::month; +using day = vendored::day; +using weekday = vendored::weekday; +using year_month_day = vendored::year_month_day; +using year_month_weekday = vendored::year_month_weekday; + +template +using hh_mm_ss = vendored::hh_mm_ss; + +// Timezone types +using time_zone = vendored::time_zone; +using sys_info = vendored::sys_info; +using local_info = vendored::local_info; +using choose = vendored::choose; + +template +using zoned_time = vendored::zoned_time; + +template +using zoned_traits = vendored::zoned_traits; + +// Exceptions +using nonexistent_local_time = vendored::nonexistent_local_time; +using ambiguous_local_time = vendored::ambiguous_local_time; + +// Weekday constants +inline constexpr vendored::weekday Monday = vendored::Monday; +inline constexpr vendored::weekday Sunday = vendored::Sunday; + +// Rounding functions +using vendored::ceil; +using vendored::floor; +using vendored::round; +using vendored::trunc; + +// Timezone lookup +inline const time_zone* locate_zone(std::string_view tz_name) { + return vendored::locate_zone(std::string(tz_name)); +} + +inline const time_zone* current_zone() { return vendored::current_zone(); } + +// Formatting support +using vendored::format; + +template +std::basic_ostream& to_stream( + std::basic_ostream& os, const CharT* fmt, + const vendored::zoned_time& zt) { + return vendored::to_stream(os, fmt, zt); +} + +// Literals namespace +namespace literals { +inline constexpr vendored::month jan = vendored::jan; +inline constexpr vendored::month dec = vendored::dec; + +inline constexpr vendored::weekday sun = vendored::sun; +inline constexpr vendored::weekday mon = vendored::mon; +inline constexpr vendored::weekday wed = vendored::wed; +inline constexpr vendored::weekday thu = vendored::thu; + +inline constexpr vendored::last_spec last = vendored::last; +} // namespace literals + +#endif // ARROW_USE_STD_CHRONO + +} // namespace arrow::internal::chrono diff --git a/cpp/src/arrow/util/date_internal.h b/cpp/src/arrow/util/date_internal.h index 32f1cae966e..1e280627f15 100644 --- a/cpp/src/arrow/util/date_internal.h +++ b/cpp/src/arrow/util/date_internal.h @@ -17,12 +17,10 @@ #pragma once -#include "arrow/vendored/datetime.h" +#include "arrow/util/chrono_internal.h" namespace arrow::internal { -namespace date = arrow_vendored::date; - // OffsetZone object is inspired by an example from date.h documentation: // https://howardhinnant.github.io/date/tz.html#Examples @@ -33,23 +31,23 @@ class OffsetZone { explicit OffsetZone(std::chrono::minutes offset) : offset_{offset} {} template - date::local_time to_local(date::sys_time tp) const { - return date::local_time{(tp + offset_).time_since_epoch()}; + chrono::local_time to_local(chrono::sys_time tp) const { + return chrono::local_time{(tp + offset_).time_since_epoch()}; } template - date::sys_time to_sys( - date::local_time tp, - [[maybe_unused]] date::choose = date::choose::earliest) const { - return date::sys_time{(tp - offset_).time_since_epoch()}; + chrono::sys_time to_sys( + chrono::local_time tp, + [[maybe_unused]] chrono::choose = chrono::choose::earliest) const { + return chrono::sys_time{(tp - offset_).time_since_epoch()}; } template - date::sys_info get_info(date::sys_time st) const { - return {date::sys_seconds::min(), date::sys_seconds::max(), offset_, + chrono::sys_info get_info(chrono::sys_time st) const { + return {chrono::sys_seconds::min(), chrono::sys_seconds::max(), offset_, std::chrono::minutes(0), - offset_ >= std::chrono::minutes(0) ? "+" + date::format("%H%M", offset_) - : "-" + date::format("%H%M", -offset_)}; + offset_ >= std::chrono::minutes(0) ? "+" + chrono::format("%H%M", offset_) + : "-" + chrono::format("%H%M", -offset_)}; } const OffsetZone* operator->() const { return this; } @@ -57,7 +55,15 @@ class OffsetZone { } // namespace arrow::internal +// zoned_traits specialization for OffsetZone +// This needs to be in the correct namespace depending on the backend + +#if ARROW_USE_STD_CHRONO +namespace std::chrono { +#else namespace arrow_vendored::date { +#endif + using arrow::internal::OffsetZone; template <> @@ -68,4 +74,9 @@ struct zoned_traits { throw std::runtime_error{"OffsetZone can't parse " + name}; } }; -} // namespace arrow_vendored::date + +#if ARROW_USE_STD_CHRONO +} // namespace std::chrono +#else +} // namespace arrow_vendored::date // NOLINT(readability/namespace) +#endif From c68a90d9bb9aceaee0239de99d54acc120fced95 Mon Sep 17 00:00:00 2001 From: Rok Mihevc Date: Tue, 23 Dec 2025 17:05:39 +0100 Subject: [PATCH 02/29] attempt to understand gcc behavior --- .../compute/kernels/scalar_temporal_test.cc | 77 ++++++++++++------- 1 file changed, 51 insertions(+), 26 deletions(-) diff --git a/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc b/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc index 3350fb805c4..218fb79c363 100644 --- a/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc +++ b/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc @@ -20,6 +20,7 @@ #include #include "arrow/compute/api_scalar.h" +#include "arrow/util/chrono_internal.h" // for ARROW_USE_STD_CHRONO #include "arrow/compute/cast.h" #include "arrow/compute/kernels/test_util_internal.h" #include "arrow/testing/gtest_util.h" @@ -869,7 +870,13 @@ TEST_F(ScalarTemporalTest, TestZoned2) { {"iso_year": 2009, "iso_week": 1, "iso_day_of_week": 1}, {"iso_year": 2011, "iso_week": 52, "iso_day_of_week": 7}, null])"); auto quarter = "[1, 1, 1, 2, 1, 4, 4, 4, 1, 1, 1, 1, 4, 4, 4, 1, null]"; - auto hour = "[9, 9, 9, 13, 11, 12, 13, 14, 15, 17, 18, 19, 20, 10, 10, 11, null]"; + // Note: GCC behaves differently for Australia/Broken_Hill around the year 2000 zone + // rule transition. The expected hour for 2000-02-29 (index 1) differs because the + // offset is wrong (+9:30 instead of +10:30). + std::string hour = "[9, 9, 9, 13, 11, 12, 13, 14, 15, 17, 18, 19, 20, 10, 10, 11, null]"; +#if ARROW_USE_STD_CHRONO + hour.replace(hour.find("[9, 9, "), 6, "[9, 8, "); +#endif auto minute = "[30, 53, 59, 3, 35, 40, 45, 50, 55, 0, 5, 10, 15, 30, 30, 32, null]"; CheckScalarUnary("year", unit, times_seconds_precision, int64(), year); @@ -890,7 +897,7 @@ TEST_F(ScalarTemporalTest, TestZoned2) { CheckScalarUnary("iso_calendar", ArrayFromJSON(unit, times_seconds_precision), iso_calendar); CheckScalarUnary("quarter", unit, times_seconds_precision, int64(), quarter); - CheckScalarUnary("hour", unit, times_seconds_precision, int64(), hour); + CheckScalarUnary("hour", unit, times_seconds_precision, int64(), hour.c_str()); CheckScalarUnary("minute", unit, times_seconds_precision, int64(), minute); CheckScalarUnary("second", unit, times_seconds_precision, int64(), second); CheckScalarUnary("millisecond", unit, times_seconds_precision, int64(), zeros); @@ -2817,26 +2824,32 @@ TEST_F(ScalarTemporalTestMultipleSinceGreaterUnit, CeilZoned) { "2020-01-01 01:09:00", "2019-12-31 02:22:00", "2019-12-30 03:22:00", "2009-12-31 04:22:00", "2010-01-01 05:35:00", "2010-01-03 06:43:00", "2010-01-04 07:43:00", "2006-01-01 08:43:00", "2005-12-31 09:56:00", "2008-12-28 00:09:00", "2008-12-29 00:09:00", "2012-01-01 01:09:00", null])"; - const char* ceil_15_hour = R"([ + std::string ceil_15_hour = R"([ "1970-01-01 05:30:00", "2000-03-01 04:30:00", "1899-01-01 06:00:00", "2033-05-18 05:30:00", "2020-01-01 04:30:00", "2019-12-31 04:30:00", "2019-12-30 04:30:00", "2009-12-31 04:30:00", "2010-01-01 19:30:00", "2010-01-03 19:30:00", "2010-01-04 19:30:00", "2006-01-01 19:30:00", "2005-12-31 19:30:00", "2008-12-28 04:30:00", "2008-12-29 04:30:00", "2012-01-01 04:30:00", null])"; - const char* ceil_15_day = R"([ + std::string ceil_15_day = R"([ "1970-01-15 14:30:00", "2000-03-15 13:30:00", "1899-01-15 15:00:00", "2033-05-30 14:30:00", "2020-01-15 13:30:00", "2020-01-14 13:30:00", "2019-12-30 13:30:00", "2010-01-14 13:30:00", "2010-01-15 13:30:00", "2010-01-15 13:30:00", "2010-01-15 13:30:00", "2006-01-15 13:30:00", "2006-01-14 13:30:00", "2008-12-30 13:30:00", "2008-12-30 13:30:00", "2012-01-15 13:30:00", null])"; - const char* ceil_3_weeks = R"([ + std::string ceil_3_weeks = R"([ "1970-01-18 14:30:00", "2000-03-05 13:30:00", "1899-01-22 15:00:00", "2033-05-29 14:30:00", "2020-01-19 13:30:00", "2020-01-19 13:30:00", "2020-01-19 13:30:00", "2010-01-24 13:30:00", "2010-01-24 13:30:00", "2010-01-24 13:30:00", "2010-01-24 13:30:00", "2006-01-22 13:30:00", "2006-01-22 13:30:00", "2009-01-11 13:30:00", "2009-01-18 13:30:00", "2012-01-22 13:30:00", null])"; - const char* ceil_3_weeks_sunday = R"([ + std::string ceil_3_weeks_sunday = R"([ "1970-01-24 14:30:00", "2000-03-25 13:30:00", "1899-01-21 15:00:00", "2033-05-28 14:30:00", "2020-01-18 13:30:00", "2020-01-18 13:30:00", "2020-01-18 13:30:00", "2010-01-23 13:30:00", "2010-01-23 13:30:00", "2010-01-23 13:30:00", "2010-01-23 13:30:00", "2006-01-21 13:30:00", "2006-01-21 13:30:00", "2009-01-24 13:30:00", "2009-01-24 13:30:00", "2012-01-21 13:30:00", null])"; +#if ARROW_USE_STD_CHRONO + ceil_15_hour.replace(ceil_15_hour.find("2000-03-01 04:30:00"), 19, "2000-03-01 05:30:00"); + ceil_15_day.replace(ceil_15_day.find("2000-03-15 13:30:00"), 19, "2000-03-15 14:30:00"); + ceil_3_weeks.replace(ceil_3_weeks.find("2000-03-05 13:30:00"), 19, "2000-03-05 14:30:00"); + ceil_3_weeks_sunday.replace(ceil_3_weeks_sunday.find("2000-03-25 13:30:00"), 19, "2000-03-25 14:30:00"); +#endif const char* ceil_5_months = R"([ "1970-05-31 14:30:00", "2000-05-31 14:30:00", "1899-05-31 14:30:00", "2033-05-31 14:30:00", "2020-05-31 14:30:00", "2020-03-31 13:30:00", "2020-03-31 13:30:00", "2010-03-31 13:30:00", @@ -2861,10 +2874,10 @@ TEST_F(ScalarTemporalTestMultipleSinceGreaterUnit, CeilZoned) { CheckScalarUnary(op, unit, times, unit, ceil_15_millisecond, &round_to_15_milliseconds); CheckScalarUnary(op, unit, times, unit, ceil_13_second, &round_to_13_seconds); CheckScalarUnary(op, unit, times, unit, ceil_13_minute, &round_to_13_minutes); - CheckScalarUnary(op, unit, times, unit, ceil_15_hour, &round_to_15_hours); - CheckScalarUnary(op, unit, times, unit, ceil_15_day, &round_to_15_days); - CheckScalarUnary(op, unit, times, unit, ceil_3_weeks, &round_to_3_weeks); - CheckScalarUnary(op, unit, times, unit, ceil_3_weeks_sunday, &round_to_3_weeks_sunday); + CheckScalarUnary(op, unit, times, unit, ceil_15_hour.c_str(), &round_to_15_hours); + CheckScalarUnary(op, unit, times, unit, ceil_15_day.c_str(), &round_to_15_days); + CheckScalarUnary(op, unit, times, unit, ceil_3_weeks.c_str(), &round_to_3_weeks); + CheckScalarUnary(op, unit, times, unit, ceil_3_weeks_sunday.c_str(), &round_to_3_weeks_sunday); CheckScalarUnary(op, unit, times, unit, ceil_5_months, &round_to_5_months); CheckScalarUnary(op, unit, times, unit, ceil_3_quarters, &round_to_3_quarters); CheckScalarUnary(op, unit, times, unit, ceil_15_years, &round_to_15_years); @@ -3207,26 +3220,32 @@ TEST_F(ScalarTemporalTestMultipleSinceGreaterUnit, FloorZoned) { "2020-01-01 00:56:00", "2019-12-31 02:09:00", "2019-12-30 03:09:00", "2009-12-31 04:09:00", "2010-01-01 05:22:00", "2010-01-03 06:30:00", "2010-01-04 07:30:00", "2006-01-01 08:30:00", "2005-12-31 09:43:00", "2008-12-27 23:56:00", "2008-12-28 23:56:00", "2012-01-01 00:56:00", null])"; - const char* floor_15_hour = R"([ + std::string floor_15_hour = R"([ "1969-12-31 14:30:00", "2000-02-29 13:30:00", "1898-12-31 15:00:00", "2033-05-17 14:30:00", "2019-12-31 13:30:00", "2019-12-30 13:30:00", "2019-12-29 13:30:00", "2009-12-30 13:30:00", "2010-01-01 04:30:00", "2010-01-03 04:30:00", "2010-01-04 04:30:00", "2006-01-01 04:30:00", "2005-12-31 04:30:00", "2008-12-27 13:30:00", "2008-12-28 13:30:00", "2011-12-31 13:30:00", null])"; - const char* floor_15_day = R"([ + std::string floor_15_day = R"([ "1969-12-31 14:30:00", "2000-02-29 13:30:00", "1898-12-31 15:00:00", "2033-05-15 14:30:00", "2019-12-31 13:30:00", "2019-12-30 13:30:00", "2019-12-15 13:30:00", "2009-12-30 13:30:00", "2009-12-31 13:30:00", "2009-12-31 13:30:00", "2009-12-31 13:30:00", "2005-12-31 13:30:00", "2005-12-30 13:30:00", "2008-12-15 13:30:00", "2008-12-15 13:30:00", "2011-12-31 13:30:00", null])"; - const char* floor_3_weeks = R"([ + std::string floor_3_weeks = R"([ "1969-12-28 14:30:00", "2000-02-13 13:30:00", "1899-01-01 15:00:00", "2033-05-08 14:30:00", "2019-12-29 13:30:00", "2019-12-29 13:30:00", "2019-12-29 13:30:00", "2010-01-03 13:30:00", "2010-01-03 13:30:00", "2010-01-03 13:30:00", "2010-01-03 13:30:00", "2006-01-01 13:30:00", "2006-01-01 13:30:00", "2008-12-21 13:30:00", "2008-12-28 13:30:00", "2012-01-01 13:30:00", null])"; - const char* floor_3_weeks_sunday = R"([ + std::string floor_3_weeks_sunday = R"([ "1970-01-03 14:30:00", "2000-03-04 13:30:00", "1898-12-31 15:00:00", "2033-05-07 14:30:00", "2019-12-28 13:30:00", "2019-12-28 13:30:00", "2019-12-28 13:30:00", "2010-01-02 13:30:00", "2010-01-02 13:30:00", "2010-01-02 13:30:00", "2010-01-02 13:30:00", "2005-12-31 13:30:00", "2005-12-31 13:30:00", "2009-01-03 13:30:00", "2009-01-03 13:30:00", "2011-12-31 13:30:00", null])"; +#if ARROW_USE_STD_CHRONO + floor_15_hour.replace(floor_15_hour.find("2000-02-29 13:30:00"), 19, "2000-02-29 14:30:00"); + floor_15_day.replace(floor_15_day.find("2000-02-29 13:30:00"), 19, "2000-02-29 14:30:00"); + floor_3_weeks.replace(floor_3_weeks.find("2000-02-13 13:30:00"), 19, "2000-02-13 14:30:00"); + floor_3_weeks_sunday.replace(floor_3_weeks_sunday.find("2000-03-04 13:30:00"), 19, "2000-03-04 14:30:00"); +#endif const char* floor_5_months = R"([ "1969-12-31 14:30:00", "1999-12-31 13:30:00", "1898-12-31 15:00:00", "2032-12-31 13:30:00", "2019-12-31 13:30:00", "2019-10-31 13:30:00", "2019-10-31 13:30:00", "2009-10-31 13:30:00", @@ -3253,10 +3272,10 @@ TEST_F(ScalarTemporalTestMultipleSinceGreaterUnit, FloorZoned) { &round_to_15_milliseconds); CheckScalarUnary(op, unit, times, unit, floor_13_second, &round_to_13_seconds); CheckScalarUnary(op, unit, times, unit, floor_13_minute, &round_to_13_minutes); - CheckScalarUnary(op, unit, times, unit, floor_15_hour, &round_to_15_hours); - CheckScalarUnary(op, unit, times, unit, floor_15_day, &round_to_15_days); - CheckScalarUnary(op, unit, times, unit, floor_3_weeks, &round_to_3_weeks); - CheckScalarUnary(op, unit, times, unit, floor_3_weeks_sunday, &round_to_3_weeks_sunday); + CheckScalarUnary(op, unit, times, unit, floor_15_hour.c_str(), &round_to_15_hours); + CheckScalarUnary(op, unit, times, unit, floor_15_day.c_str(), &round_to_15_days); + CheckScalarUnary(op, unit, times, unit, floor_3_weeks.c_str(), &round_to_3_weeks); + CheckScalarUnary(op, unit, times, unit, floor_3_weeks_sunday.c_str(), &round_to_3_weeks_sunday); CheckScalarUnary(op, unit, times, unit, floor_5_months, &round_to_5_months); CheckScalarUnary(op, unit, times, unit, floor_3_quarters, &round_to_3_quarters); CheckScalarUnary(op, unit, times, unit, floor_15_years, &round_to_15_years); @@ -3640,26 +3659,32 @@ TEST_F(ScalarTemporalTestMultipleSinceGreaterUnit, RoundZoned) { "2020-01-01 01:09:00", "2019-12-31 02:09:00", "2019-12-30 03:09:00", "2009-12-31 04:22:00", "2010-01-01 05:22:00", "2010-01-03 06:30:00", "2010-01-04 07:30:00", "2006-01-01 08:43:00", "2005-12-31 09:43:00", "2008-12-27 23:56:00", "2008-12-28 23:56:00", "2012-01-01 00:56:00", null])"; - const char* round_15_hour = R"([ + std::string round_15_hour = R"([ "1970-01-01 05:30:00", "2000-03-01 04:30:00", "1899-01-01 06:00:00", "2033-05-18 05:30:00", "2020-01-01 04:30:00", "2019-12-31 04:30:00", "2019-12-30 04:30:00", "2009-12-31 04:30:00", "2010-01-01 04:30:00", "2010-01-03 04:30:00", "2010-01-04 04:30:00", "2006-01-01 04:30:00", "2005-12-31 04:30:00", "2008-12-28 04:30:00", "2008-12-29 04:30:00", "2012-01-01 04:30:00", null])"; - const char* round_15_day = R"([ + std::string round_15_day = R"([ "1969-12-31 14:30:00", "2000-02-29 13:30:00", "1898-12-31 15:00:00", "2033-05-15 14:30:00", "2019-12-31 13:30:00", "2019-12-30 13:30:00", "2019-12-30 13:30:00", "2009-12-30 13:30:00", "2009-12-31 13:30:00", "2009-12-31 13:30:00", "2009-12-31 13:30:00", "2005-12-31 13:30:00", "2005-12-30 13:30:00", "2008-12-30 13:30:00", "2008-12-30 13:30:00", "2011-12-31 13:30:00", null])"; - const char* round_3_weeks = R"([ + std::string round_3_weeks = R"([ "1969-12-28 14:30:00", "2000-03-05 13:30:00", "1899-01-01 15:00:00", "2033-05-08 14:30:00", "2019-12-29 13:30:00", "2019-12-29 13:30:00", "2019-12-29 13:30:00", "2010-01-03 13:30:00", "2010-01-03 13:30:00", "2010-01-03 13:30:00", "2010-01-03 13:30:00", "2006-01-01 13:30:00", "2006-01-01 13:30:00", "2008-12-21 13:30:00", "2008-12-28 13:30:00", "2012-01-01 13:30:00",null])"; - const char* round_3_weeks_sunday = R"([ + std::string round_3_weeks_sunday = R"([ "1970-01-03 14:30:00", "2000-03-04 13:30:00", "1898-12-31 15:00:00", "2033-05-28 14:30:00", "2019-12-28 13:30:00", "2019-12-28 13:30:00", "2019-12-28 13:30:00", "2010-01-02 13:30:00", "2010-01-02 13:30:00", "2010-01-02 13:30:00", "2010-01-02 13:30:00", "2005-12-31 13:30:00", "2005-12-31 13:30:00", "2009-01-03 13:30:00", "2009-01-03 13:30:00", "2011-12-31 13:30:00", null])"; +#if ARROW_USE_STD_CHRONO + round_15_hour.replace(round_15_hour.find("2000-03-01 04:30:00"), 19, "2000-03-01 05:30:00"); + round_15_day.replace(round_15_day.find("2000-02-29 13:30:00"), 19, "2000-02-29 14:30:00"); + round_3_weeks.replace(round_3_weeks.find("2000-03-05 13:30:00"), 19, "2000-03-05 14:30:00"); + round_3_weeks_sunday.replace(round_3_weeks_sunday.find("2000-03-04 13:30:00"), 19, "2000-03-04 14:30:00"); +#endif const char* round_5_months = R"([ "1969-12-31 14:30:00", "1999-12-31 13:30:00", "1898-12-31 15:00:00", "2033-05-31 14:30:00", "2019-12-31 13:30:00", "2019-10-31 13:30:00", "2019-10-31 13:30:00", "2009-10-31 13:30:00", @@ -3686,10 +3711,10 @@ TEST_F(ScalarTemporalTestMultipleSinceGreaterUnit, RoundZoned) { &round_to_15_milliseconds); CheckScalarUnary(op, unit, times, unit, round_13_second, &round_to_13_seconds); CheckScalarUnary(op, unit, times, unit, round_13_minute, &round_to_13_minutes); - CheckScalarUnary(op, unit, times, unit, round_15_hour, &round_to_15_hours); - CheckScalarUnary(op, unit, times, unit, round_15_day, &round_to_15_days); - CheckScalarUnary(op, unit, times, unit, round_3_weeks, &round_to_3_weeks); - CheckScalarUnary(op, unit, times, unit, round_3_weeks_sunday, &round_to_3_weeks_sunday); + CheckScalarUnary(op, unit, times, unit, round_15_hour.c_str(), &round_to_15_hours); + CheckScalarUnary(op, unit, times, unit, round_15_day.c_str(), &round_to_15_days); + CheckScalarUnary(op, unit, times, unit, round_3_weeks.c_str(), &round_to_3_weeks); + CheckScalarUnary(op, unit, times, unit, round_3_weeks_sunday.c_str(), &round_to_3_weeks_sunday); CheckScalarUnary(op, unit, times, unit, round_5_months, &round_to_5_months); CheckScalarUnary(op, unit, times, unit, round_3_quarters, &round_to_3_quarters); CheckScalarUnary(op, unit, times, unit, round_15_years, &round_to_15_years); From 8c7110af2208e62b24e5ca857bfe3520032d2444 Mon Sep 17 00:00:00 2001 From: Rok Mihevc Date: Tue, 23 Dec 2025 17:57:13 +0100 Subject: [PATCH 03/29] keep vendored lib for gcc --- .../compute/kernels/scalar_temporal_test.cc | 80 +++++++------------ cpp/src/arrow/util/chrono_internal.h | 35 +++----- 2 files changed, 43 insertions(+), 72 deletions(-) diff --git a/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc b/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc index 218fb79c363..da1172212a2 100644 --- a/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc +++ b/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc @@ -39,6 +39,10 @@ using internal::StringFormatter; namespace compute { +TEST(ChronoConfig, LogChronoBackend) { + std::cout << "ARROW_USE_STD_CHRONO=" << ARROW_USE_STD_CHRONO << std::endl; +} + class ScalarTemporalTest : public ::testing::Test { public: const char* date32s = @@ -870,13 +874,7 @@ TEST_F(ScalarTemporalTest, TestZoned2) { {"iso_year": 2009, "iso_week": 1, "iso_day_of_week": 1}, {"iso_year": 2011, "iso_week": 52, "iso_day_of_week": 7}, null])"); auto quarter = "[1, 1, 1, 2, 1, 4, 4, 4, 1, 1, 1, 1, 4, 4, 4, 1, null]"; - // Note: GCC behaves differently for Australia/Broken_Hill around the year 2000 zone - // rule transition. The expected hour for 2000-02-29 (index 1) differs because the - // offset is wrong (+9:30 instead of +10:30). - std::string hour = "[9, 9, 9, 13, 11, 12, 13, 14, 15, 17, 18, 19, 20, 10, 10, 11, null]"; -#if ARROW_USE_STD_CHRONO - hour.replace(hour.find("[9, 9, "), 6, "[9, 8, "); -#endif + auto hour = "[9, 9, 9, 13, 11, 12, 13, 14, 15, 17, 18, 19, 20, 10, 10, 11, null]"; auto minute = "[30, 53, 59, 3, 35, 40, 45, 50, 55, 0, 5, 10, 15, 30, 30, 32, null]"; CheckScalarUnary("year", unit, times_seconds_precision, int64(), year); @@ -897,7 +895,7 @@ TEST_F(ScalarTemporalTest, TestZoned2) { CheckScalarUnary("iso_calendar", ArrayFromJSON(unit, times_seconds_precision), iso_calendar); CheckScalarUnary("quarter", unit, times_seconds_precision, int64(), quarter); - CheckScalarUnary("hour", unit, times_seconds_precision, int64(), hour.c_str()); + CheckScalarUnary("hour", unit, times_seconds_precision, int64(), hour); CheckScalarUnary("minute", unit, times_seconds_precision, int64(), minute); CheckScalarUnary("second", unit, times_seconds_precision, int64(), second); CheckScalarUnary("millisecond", unit, times_seconds_precision, int64(), zeros); @@ -2824,32 +2822,26 @@ TEST_F(ScalarTemporalTestMultipleSinceGreaterUnit, CeilZoned) { "2020-01-01 01:09:00", "2019-12-31 02:22:00", "2019-12-30 03:22:00", "2009-12-31 04:22:00", "2010-01-01 05:35:00", "2010-01-03 06:43:00", "2010-01-04 07:43:00", "2006-01-01 08:43:00", "2005-12-31 09:56:00", "2008-12-28 00:09:00", "2008-12-29 00:09:00", "2012-01-01 01:09:00", null])"; - std::string ceil_15_hour = R"([ + const char* ceil_15_hour = R"([ "1970-01-01 05:30:00", "2000-03-01 04:30:00", "1899-01-01 06:00:00", "2033-05-18 05:30:00", "2020-01-01 04:30:00", "2019-12-31 04:30:00", "2019-12-30 04:30:00", "2009-12-31 04:30:00", "2010-01-01 19:30:00", "2010-01-03 19:30:00", "2010-01-04 19:30:00", "2006-01-01 19:30:00", "2005-12-31 19:30:00", "2008-12-28 04:30:00", "2008-12-29 04:30:00", "2012-01-01 04:30:00", null])"; - std::string ceil_15_day = R"([ + const char* ceil_15_day = R"([ "1970-01-15 14:30:00", "2000-03-15 13:30:00", "1899-01-15 15:00:00", "2033-05-30 14:30:00", "2020-01-15 13:30:00", "2020-01-14 13:30:00", "2019-12-30 13:30:00", "2010-01-14 13:30:00", "2010-01-15 13:30:00", "2010-01-15 13:30:00", "2010-01-15 13:30:00", "2006-01-15 13:30:00", "2006-01-14 13:30:00", "2008-12-30 13:30:00", "2008-12-30 13:30:00", "2012-01-15 13:30:00", null])"; - std::string ceil_3_weeks = R"([ + const char* ceil_3_weeks = R"([ "1970-01-18 14:30:00", "2000-03-05 13:30:00", "1899-01-22 15:00:00", "2033-05-29 14:30:00", "2020-01-19 13:30:00", "2020-01-19 13:30:00", "2020-01-19 13:30:00", "2010-01-24 13:30:00", "2010-01-24 13:30:00", "2010-01-24 13:30:00", "2010-01-24 13:30:00", "2006-01-22 13:30:00", "2006-01-22 13:30:00", "2009-01-11 13:30:00", "2009-01-18 13:30:00", "2012-01-22 13:30:00", null])"; - std::string ceil_3_weeks_sunday = R"([ + const char* ceil_3_weeks_sunday = R"([ "1970-01-24 14:30:00", "2000-03-25 13:30:00", "1899-01-21 15:00:00", "2033-05-28 14:30:00", "2020-01-18 13:30:00", "2020-01-18 13:30:00", "2020-01-18 13:30:00", "2010-01-23 13:30:00", "2010-01-23 13:30:00", "2010-01-23 13:30:00", "2010-01-23 13:30:00", "2006-01-21 13:30:00", "2006-01-21 13:30:00", "2009-01-24 13:30:00", "2009-01-24 13:30:00", "2012-01-21 13:30:00", null])"; -#if ARROW_USE_STD_CHRONO - ceil_15_hour.replace(ceil_15_hour.find("2000-03-01 04:30:00"), 19, "2000-03-01 05:30:00"); - ceil_15_day.replace(ceil_15_day.find("2000-03-15 13:30:00"), 19, "2000-03-15 14:30:00"); - ceil_3_weeks.replace(ceil_3_weeks.find("2000-03-05 13:30:00"), 19, "2000-03-05 14:30:00"); - ceil_3_weeks_sunday.replace(ceil_3_weeks_sunday.find("2000-03-25 13:30:00"), 19, "2000-03-25 14:30:00"); -#endif const char* ceil_5_months = R"([ "1970-05-31 14:30:00", "2000-05-31 14:30:00", "1899-05-31 14:30:00", "2033-05-31 14:30:00", "2020-05-31 14:30:00", "2020-03-31 13:30:00", "2020-03-31 13:30:00", "2010-03-31 13:30:00", @@ -2874,10 +2866,10 @@ TEST_F(ScalarTemporalTestMultipleSinceGreaterUnit, CeilZoned) { CheckScalarUnary(op, unit, times, unit, ceil_15_millisecond, &round_to_15_milliseconds); CheckScalarUnary(op, unit, times, unit, ceil_13_second, &round_to_13_seconds); CheckScalarUnary(op, unit, times, unit, ceil_13_minute, &round_to_13_minutes); - CheckScalarUnary(op, unit, times, unit, ceil_15_hour.c_str(), &round_to_15_hours); - CheckScalarUnary(op, unit, times, unit, ceil_15_day.c_str(), &round_to_15_days); - CheckScalarUnary(op, unit, times, unit, ceil_3_weeks.c_str(), &round_to_3_weeks); - CheckScalarUnary(op, unit, times, unit, ceil_3_weeks_sunday.c_str(), &round_to_3_weeks_sunday); + CheckScalarUnary(op, unit, times, unit, ceil_15_hour, &round_to_15_hours); + CheckScalarUnary(op, unit, times, unit, ceil_15_day, &round_to_15_days); + CheckScalarUnary(op, unit, times, unit, ceil_3_weeks, &round_to_3_weeks); + CheckScalarUnary(op, unit, times, unit, ceil_3_weeks_sunday, &round_to_3_weeks_sunday); CheckScalarUnary(op, unit, times, unit, ceil_5_months, &round_to_5_months); CheckScalarUnary(op, unit, times, unit, ceil_3_quarters, &round_to_3_quarters); CheckScalarUnary(op, unit, times, unit, ceil_15_years, &round_to_15_years); @@ -3220,32 +3212,26 @@ TEST_F(ScalarTemporalTestMultipleSinceGreaterUnit, FloorZoned) { "2020-01-01 00:56:00", "2019-12-31 02:09:00", "2019-12-30 03:09:00", "2009-12-31 04:09:00", "2010-01-01 05:22:00", "2010-01-03 06:30:00", "2010-01-04 07:30:00", "2006-01-01 08:30:00", "2005-12-31 09:43:00", "2008-12-27 23:56:00", "2008-12-28 23:56:00", "2012-01-01 00:56:00", null])"; - std::string floor_15_hour = R"([ + const char* floor_15_hour = R"([ "1969-12-31 14:30:00", "2000-02-29 13:30:00", "1898-12-31 15:00:00", "2033-05-17 14:30:00", "2019-12-31 13:30:00", "2019-12-30 13:30:00", "2019-12-29 13:30:00", "2009-12-30 13:30:00", "2010-01-01 04:30:00", "2010-01-03 04:30:00", "2010-01-04 04:30:00", "2006-01-01 04:30:00", "2005-12-31 04:30:00", "2008-12-27 13:30:00", "2008-12-28 13:30:00", "2011-12-31 13:30:00", null])"; - std::string floor_15_day = R"([ + const char* floor_15_day = R"([ "1969-12-31 14:30:00", "2000-02-29 13:30:00", "1898-12-31 15:00:00", "2033-05-15 14:30:00", "2019-12-31 13:30:00", "2019-12-30 13:30:00", "2019-12-15 13:30:00", "2009-12-30 13:30:00", "2009-12-31 13:30:00", "2009-12-31 13:30:00", "2009-12-31 13:30:00", "2005-12-31 13:30:00", "2005-12-30 13:30:00", "2008-12-15 13:30:00", "2008-12-15 13:30:00", "2011-12-31 13:30:00", null])"; - std::string floor_3_weeks = R"([ + const char* floor_3_weeks = R"([ "1969-12-28 14:30:00", "2000-02-13 13:30:00", "1899-01-01 15:00:00", "2033-05-08 14:30:00", "2019-12-29 13:30:00", "2019-12-29 13:30:00", "2019-12-29 13:30:00", "2010-01-03 13:30:00", "2010-01-03 13:30:00", "2010-01-03 13:30:00", "2010-01-03 13:30:00", "2006-01-01 13:30:00", "2006-01-01 13:30:00", "2008-12-21 13:30:00", "2008-12-28 13:30:00", "2012-01-01 13:30:00", null])"; - std::string floor_3_weeks_sunday = R"([ + const char* floor_3_weeks_sunday = R"([ "1970-01-03 14:30:00", "2000-03-04 13:30:00", "1898-12-31 15:00:00", "2033-05-07 14:30:00", "2019-12-28 13:30:00", "2019-12-28 13:30:00", "2019-12-28 13:30:00", "2010-01-02 13:30:00", "2010-01-02 13:30:00", "2010-01-02 13:30:00", "2010-01-02 13:30:00", "2005-12-31 13:30:00", "2005-12-31 13:30:00", "2009-01-03 13:30:00", "2009-01-03 13:30:00", "2011-12-31 13:30:00", null])"; -#if ARROW_USE_STD_CHRONO - floor_15_hour.replace(floor_15_hour.find("2000-02-29 13:30:00"), 19, "2000-02-29 14:30:00"); - floor_15_day.replace(floor_15_day.find("2000-02-29 13:30:00"), 19, "2000-02-29 14:30:00"); - floor_3_weeks.replace(floor_3_weeks.find("2000-02-13 13:30:00"), 19, "2000-02-13 14:30:00"); - floor_3_weeks_sunday.replace(floor_3_weeks_sunday.find("2000-03-04 13:30:00"), 19, "2000-03-04 14:30:00"); -#endif const char* floor_5_months = R"([ "1969-12-31 14:30:00", "1999-12-31 13:30:00", "1898-12-31 15:00:00", "2032-12-31 13:30:00", "2019-12-31 13:30:00", "2019-10-31 13:30:00", "2019-10-31 13:30:00", "2009-10-31 13:30:00", @@ -3272,10 +3258,10 @@ TEST_F(ScalarTemporalTestMultipleSinceGreaterUnit, FloorZoned) { &round_to_15_milliseconds); CheckScalarUnary(op, unit, times, unit, floor_13_second, &round_to_13_seconds); CheckScalarUnary(op, unit, times, unit, floor_13_minute, &round_to_13_minutes); - CheckScalarUnary(op, unit, times, unit, floor_15_hour.c_str(), &round_to_15_hours); - CheckScalarUnary(op, unit, times, unit, floor_15_day.c_str(), &round_to_15_days); - CheckScalarUnary(op, unit, times, unit, floor_3_weeks.c_str(), &round_to_3_weeks); - CheckScalarUnary(op, unit, times, unit, floor_3_weeks_sunday.c_str(), &round_to_3_weeks_sunday); + CheckScalarUnary(op, unit, times, unit, floor_15_hour, &round_to_15_hours); + CheckScalarUnary(op, unit, times, unit, floor_15_day, &round_to_15_days); + CheckScalarUnary(op, unit, times, unit, floor_3_weeks, &round_to_3_weeks); + CheckScalarUnary(op, unit, times, unit, floor_3_weeks_sunday, &round_to_3_weeks_sunday); CheckScalarUnary(op, unit, times, unit, floor_5_months, &round_to_5_months); CheckScalarUnary(op, unit, times, unit, floor_3_quarters, &round_to_3_quarters); CheckScalarUnary(op, unit, times, unit, floor_15_years, &round_to_15_years); @@ -3659,32 +3645,26 @@ TEST_F(ScalarTemporalTestMultipleSinceGreaterUnit, RoundZoned) { "2020-01-01 01:09:00", "2019-12-31 02:09:00", "2019-12-30 03:09:00", "2009-12-31 04:22:00", "2010-01-01 05:22:00", "2010-01-03 06:30:00", "2010-01-04 07:30:00", "2006-01-01 08:43:00", "2005-12-31 09:43:00", "2008-12-27 23:56:00", "2008-12-28 23:56:00", "2012-01-01 00:56:00", null])"; - std::string round_15_hour = R"([ + const char* round_15_hour = R"([ "1970-01-01 05:30:00", "2000-03-01 04:30:00", "1899-01-01 06:00:00", "2033-05-18 05:30:00", "2020-01-01 04:30:00", "2019-12-31 04:30:00", "2019-12-30 04:30:00", "2009-12-31 04:30:00", "2010-01-01 04:30:00", "2010-01-03 04:30:00", "2010-01-04 04:30:00", "2006-01-01 04:30:00", "2005-12-31 04:30:00", "2008-12-28 04:30:00", "2008-12-29 04:30:00", "2012-01-01 04:30:00", null])"; - std::string round_15_day = R"([ + const char* round_15_day = R"([ "1969-12-31 14:30:00", "2000-02-29 13:30:00", "1898-12-31 15:00:00", "2033-05-15 14:30:00", "2019-12-31 13:30:00", "2019-12-30 13:30:00", "2019-12-30 13:30:00", "2009-12-30 13:30:00", "2009-12-31 13:30:00", "2009-12-31 13:30:00", "2009-12-31 13:30:00", "2005-12-31 13:30:00", "2005-12-30 13:30:00", "2008-12-30 13:30:00", "2008-12-30 13:30:00", "2011-12-31 13:30:00", null])"; - std::string round_3_weeks = R"([ + const char* round_3_weeks = R"([ "1969-12-28 14:30:00", "2000-03-05 13:30:00", "1899-01-01 15:00:00", "2033-05-08 14:30:00", "2019-12-29 13:30:00", "2019-12-29 13:30:00", "2019-12-29 13:30:00", "2010-01-03 13:30:00", "2010-01-03 13:30:00", "2010-01-03 13:30:00", "2010-01-03 13:30:00", "2006-01-01 13:30:00", "2006-01-01 13:30:00", "2008-12-21 13:30:00", "2008-12-28 13:30:00", "2012-01-01 13:30:00",null])"; - std::string round_3_weeks_sunday = R"([ + const char* round_3_weeks_sunday = R"([ "1970-01-03 14:30:00", "2000-03-04 13:30:00", "1898-12-31 15:00:00", "2033-05-28 14:30:00", "2019-12-28 13:30:00", "2019-12-28 13:30:00", "2019-12-28 13:30:00", "2010-01-02 13:30:00", "2010-01-02 13:30:00", "2010-01-02 13:30:00", "2010-01-02 13:30:00", "2005-12-31 13:30:00", "2005-12-31 13:30:00", "2009-01-03 13:30:00", "2009-01-03 13:30:00", "2011-12-31 13:30:00", null])"; -#if ARROW_USE_STD_CHRONO - round_15_hour.replace(round_15_hour.find("2000-03-01 04:30:00"), 19, "2000-03-01 05:30:00"); - round_15_day.replace(round_15_day.find("2000-02-29 13:30:00"), 19, "2000-02-29 14:30:00"); - round_3_weeks.replace(round_3_weeks.find("2000-03-05 13:30:00"), 19, "2000-03-05 14:30:00"); - round_3_weeks_sunday.replace(round_3_weeks_sunday.find("2000-03-04 13:30:00"), 19, "2000-03-04 14:30:00"); -#endif const char* round_5_months = R"([ "1969-12-31 14:30:00", "1999-12-31 13:30:00", "1898-12-31 15:00:00", "2033-05-31 14:30:00", "2019-12-31 13:30:00", "2019-10-31 13:30:00", "2019-10-31 13:30:00", "2009-10-31 13:30:00", @@ -3711,10 +3691,10 @@ TEST_F(ScalarTemporalTestMultipleSinceGreaterUnit, RoundZoned) { &round_to_15_milliseconds); CheckScalarUnary(op, unit, times, unit, round_13_second, &round_to_13_seconds); CheckScalarUnary(op, unit, times, unit, round_13_minute, &round_to_13_minutes); - CheckScalarUnary(op, unit, times, unit, round_15_hour.c_str(), &round_to_15_hours); - CheckScalarUnary(op, unit, times, unit, round_15_day.c_str(), &round_to_15_days); - CheckScalarUnary(op, unit, times, unit, round_3_weeks.c_str(), &round_to_3_weeks); - CheckScalarUnary(op, unit, times, unit, round_3_weeks_sunday.c_str(), &round_to_3_weeks_sunday); + CheckScalarUnary(op, unit, times, unit, round_15_hour, &round_to_15_hours); + CheckScalarUnary(op, unit, times, unit, round_15_day, &round_to_15_days); + CheckScalarUnary(op, unit, times, unit, round_3_weeks, &round_to_3_weeks); + CheckScalarUnary(op, unit, times, unit, round_3_weeks_sunday, &round_to_3_weeks_sunday); CheckScalarUnary(op, unit, times, unit, round_5_months, &round_to_5_months); CheckScalarUnary(op, unit, times, unit, round_3_quarters, &round_to_3_quarters); CheckScalarUnary(op, unit, times, unit, round_15_years, &round_to_15_years); diff --git a/cpp/src/arrow/util/chrono_internal.h b/cpp/src/arrow/util/chrono_internal.h index 2eef96f8adb..67c5818b210 100644 --- a/cpp/src/arrow/util/chrono_internal.h +++ b/cpp/src/arrow/util/chrono_internal.h @@ -21,9 +21,9 @@ /// \brief Abstraction layer for C++20 chrono calendar/timezone APIs /// /// This header provides a unified interface for chrono calendar and timezone -/// functionality. On compilers with full C++20 chrono support (MSVC 16.10+ and -/// GCC 14+), it uses std::chrono. On other compilers, it falls back to the -/// vendored Howard Hinnant date library. +/// functionality. On compilers with full C++20 chrono support, it uses +/// std::chrono. On other compilers, it falls back to the vendored Howard Hinnant +/// date library. /// /// The main benefit is on Windows where std::chrono uses the system timezone /// database, eliminating the need for users to install IANA tzdata separately. @@ -37,26 +37,17 @@ // Feature detection for C++20 chrono timezone support // We only enable for compilers with FULL support (not partial) +// https://en.cppreference.com/w/cpp/compiler_support/20.html#cpp_lib_chrono_201907L // -// Compiler support -// (https://en.cppreference.com/w/cpp/compiler_support/20.html#cpp_lib_chrono_201907L): -// - MSVC 19.29 (VS 2019 16.10)+: Full support, uses Windows TZ database -// - GCC 14+: Full support, requires tzdata.zi on system -// - GCC 11-13: Partial support only -// - Clang/libc++: Still partial even in version 19 -// - Apple Clang: Still partial - -#if defined(__cpp_lib_chrono) && __cpp_lib_chrono >= 201907L -# if defined(_MSC_VER) -// MSVC 19.29+: Full support, uses Windows internal TZ database -# define ARROW_USE_STD_CHRONO 1 -# elif defined(__GLIBCXX__) && __GNUC__ >= 14 -// GCC 14+ with libstdc++: Full support, requires tzdata.zi -# define ARROW_USE_STD_CHRONO 1 -# endif -#endif - -#ifndef ARROW_USE_STD_CHRONO +// MSVC 19.29+ (VS16.10+): Full C++20 chrono support, uses Windows internal TZ database. +// GCC libstdc++ has a bug where DST state is incorrectly reset when a timezone +// transitions between rule sets in tzdata.zi (e.g., Australia/Broken_Hill around +// 2000-02-29 23:23:24). +// Until this is fixed, we use the vendored date.h library for GCC. + +#if defined(_MSC_VER) && defined(__cpp_lib_chrono) && __cpp_lib_chrono >= 201907L +# define ARROW_USE_STD_CHRONO 1 +#else # define ARROW_USE_STD_CHRONO 0 #endif From 38a6a9b0b80b7fd7b19a991cb1bc47749765a2fc Mon Sep 17 00:00:00 2001 From: Rok Mihevc Date: Tue, 23 Dec 2025 18:59:58 +0100 Subject: [PATCH 04/29] simplify with C++20 chrono features --- .../compute/kernels/scalar_temporal_test.cc | 2 +- cpp/src/arrow/util/chrono_internal.h | 147 +++--------------- 2 files changed, 21 insertions(+), 128 deletions(-) diff --git a/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc b/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc index da1172212a2..ebaa7aecdc4 100644 --- a/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc +++ b/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc @@ -20,7 +20,6 @@ #include #include "arrow/compute/api_scalar.h" -#include "arrow/util/chrono_internal.h" // for ARROW_USE_STD_CHRONO #include "arrow/compute/cast.h" #include "arrow/compute/kernels/test_util_internal.h" #include "arrow/testing/gtest_util.h" @@ -30,6 +29,7 @@ #include "arrow/type_fwd.h" #include "arrow/type_traits.h" #include "arrow/util/checked_cast.h" +#include "arrow/util/chrono_internal.h" // for ARROW_USE_STD_CHRONO #include "arrow/util/formatting.h" #include "arrow/util/logging_internal.h" diff --git a/cpp/src/arrow/util/chrono_internal.h b/cpp/src/arrow/util/chrono_internal.h index 67c5818b210..453d952d1dc 100644 --- a/cpp/src/arrow/util/chrono_internal.h +++ b/cpp/src/arrow/util/chrono_internal.h @@ -29,9 +29,6 @@ /// database, eliminating the need for users to install IANA tzdata separately. #include -#include -#include -#include #include #include @@ -53,6 +50,9 @@ #if ARROW_USE_STD_CHRONO // Use C++20 standard library chrono +# include +# include +# include #else // Use vendored Howard Hinnant date library # include "arrow/vendored/datetime.h" @@ -111,26 +111,24 @@ using nonexistent_local_time = std::chrono::nonexistent_local_time; using ambiguous_local_time = std::chrono::ambiguous_local_time; // Weekday constants -inline constexpr std::chrono::weekday Monday{1}; -inline constexpr std::chrono::weekday Sunday{0}; +using std::chrono::Monday; +using std::chrono::Sunday; // Rounding functions using std::chrono::ceil; using std::chrono::floor; using std::chrono::round; -// trunc is not in std::chrono - implement proper truncation toward zero -// floor rounds toward negative infinity, but trunc rounds toward zero +// trunc (truncation toward zero) is not in std::chrono, only floor/ceil/round template constexpr ToDuration trunc(const std::chrono::duration& d) { - auto floor_result = std::chrono::floor(d); - auto remainder = d - floor_result; - // If original was negative and there's a non-zero remainder, - // floor went too far negative, so add one unit back - if (d.count() < 0 && remainder.count() != 0) { - return floor_result + ToDuration{1}; + auto floored = std::chrono::floor(d); + // floor rounds toward -infinity; for negative values with remainder, add 1 to get + // toward zero + if (d.count() < 0 && (d - floored).count() != 0) { + return floored + ToDuration{1}; } - return floor_result; + return floored; } // Timezone lookup @@ -140,127 +138,22 @@ inline const time_zone* locate_zone(std::string_view tz_name) { inline const time_zone* current_zone() { return std::chrono::current_zone(); } -// Helper to get subsecond decimal places based on duration period -template -constexpr int get_subsecond_decimals() { - using Period = typename Duration::period; - if constexpr (Period::den == 1000) - return 3; // milliseconds - else if constexpr (Period::den == 1000000) - return 6; // microseconds - else if constexpr (Period::den == 1000000000) - return 9; // nanoseconds - else - return 0; // seconds or coarser -} - -// Formatting support with subsecond precision and timezone handling -// Mimics the vendored date library's to_stream behavior for compatibility +// Formatting support - streams directly using C++20 std::vformat_to +// Provides: direct streaming, stream state preservation, chaining, rich format specifiers template std::basic_ostream& to_stream( std::basic_ostream& os, const CharT* fmt, const std::chrono::zoned_time& zt) { - // Get local time and timezone info - auto lt = zt.get_local_time(); - auto info = zt.get_info(); - - auto lt_days = std::chrono::floor(lt); - auto ymd = year_month_day{lt_days}; - - // Calculate time of day components - auto time_since_midnight = lt - local_time{lt_days}; - auto total_secs = std::chrono::duration_cast(time_since_midnight); - auto h = std::chrono::duration_cast(time_since_midnight); - auto m = std::chrono::duration_cast(time_since_midnight - h); - auto s = std::chrono::duration_cast(time_since_midnight - h - m); - - // Build std::tm for strftime - std::tm tm{}; - tm.tm_sec = static_cast(s.count()); - tm.tm_min = static_cast(m.count()); - tm.tm_hour = static_cast(h.count()); - tm.tm_mday = static_cast(static_cast(ymd.day())); - tm.tm_mon = static_cast(static_cast(ymd.month())) - 1; - tm.tm_year = static_cast(ymd.year()) - 1900; - - auto wd = weekday{lt_days}; - tm.tm_wday = static_cast(wd.c_encoding()); - - auto year_start = - std::chrono::local_days{ymd.year() / std::chrono::January / std::chrono::day{1}}; - tm.tm_yday = static_cast((lt_days - year_start).count()); - tm.tm_isdst = info.save != std::chrono::minutes{0} ? 1 : 0; - - // Timezone offset calculation - auto offset_mins = std::chrono::duration_cast(info.offset); - bool neg_offset = offset_mins.count() < 0; - auto abs_offset = neg_offset ? -offset_mins : offset_mins; - auto off_h = std::chrono::duration_cast(abs_offset); - auto off_m = abs_offset - off_h; - - // Calculate subsecond value - constexpr int decimals = get_subsecond_decimals(); - int64_t subsec_value = 0; - if constexpr (decimals > 0) { - auto subsec_duration = time_since_midnight - total_secs; - subsec_value = std::chrono::duration_cast(subsec_duration).count(); - if (subsec_value < 0) subsec_value = -subsec_value; - } - - // Parse format string, handle %S, %z, %Z specially - std::string result; - for (const CharT* p = fmt; *p; ++p) { - if (*p == '%' && *(p + 1)) { - CharT spec = *(p + 1); - if (spec == 'S') { - // %S with subsecond precision - result += (tm.tm_sec < 10 ? "0" : "") + std::to_string(tm.tm_sec); - if constexpr (decimals > 0) { - std::ostringstream ss; - ss << '.' << std::setfill('0') << std::setw(decimals) << subsec_value; - result += ss.str(); - } - ++p; - } else if (spec == 'z') { - // %z timezone offset - std::ostringstream ss; - ss << (neg_offset ? '-' : '+') << std::setfill('0') << std::setw(2) - << off_h.count() << std::setfill('0') << std::setw(2) << off_m.count(); - result += ss.str(); - ++p; - } else if (spec == 'Z') { - // %Z timezone abbreviation - result += info.abbrev; - ++p; - } else { - // Use strftime for other specifiers - char buf[64]; - char small_fmt[3] = {'%', static_cast(spec), '\0'}; - if (std::strftime(buf, sizeof(buf), small_fmt, &tm) > 0) { - result += buf; - } - ++p; - } - } else { - result += static_cast(*p); - } - } - - return os << result; + std::vformat_to(std::ostreambuf_iterator(os), std::string("{:") + fmt + "}", + std::make_format_args(zt)); + return os; } +// Format a duration using strftime-like format specifiers +// Converts "%H%M" style to C++20's "{:%H%M}" style and uses std::vformat template std::string format(const char* fmt, const Duration& d) { - std::ostringstream ss; - auto total_minutes = std::chrono::duration_cast(d).count(); - bool negative = total_minutes < 0; - if (negative) total_minutes = -total_minutes; - auto hours = total_minutes / 60; - auto mins = total_minutes % 60; - ss << (negative ? "-" : "+"); - ss << std::setfill('0') << std::setw(2) << hours; - ss << std::setfill('0') << std::setw(2) << mins; - return ss.str(); + return std::vformat(std::string("{:") + fmt + "}", std::make_format_args(d)); } // Literals namespace From deee2137ed97cb81a1be070dde78055bfb76f577 Mon Sep 17 00:00:00 2001 From: Rok Mihevc Date: Mon, 5 Jan 2026 18:03:09 +0100 Subject: [PATCH 05/29] Review feedback --- .../compute/kernels/scalar_temporal_binary.cc | 14 +++++++------- .../arrow/compute/kernels/scalar_temporal_test.cc | 4 ---- .../arrow/compute/kernels/scalar_temporal_unary.cc | 14 +++++++------- cpp/src/arrow/util/chrono_internal.h | 9 --------- 4 files changed, 14 insertions(+), 27 deletions(-) diff --git a/cpp/src/arrow/compute/kernels/scalar_temporal_binary.cc b/cpp/src/arrow/compute/kernels/scalar_temporal_binary.cc index 920d1ec0105..a53348171f7 100644 --- a/cpp/src/arrow/compute/kernels/scalar_temporal_binary.cc +++ b/cpp/src/arrow/compute/kernels/scalar_temporal_binary.cc @@ -53,13 +53,13 @@ using chrono::weeks; using chrono::year_month_day; using chrono::year_month_weekday; using chrono::years; -using chrono::literals::dec; -using chrono::literals::jan; -using chrono::literals::last; -using chrono::literals::mon; -using chrono::literals::sun; -using chrono::literals::thu; -using chrono::literals::wed; +using chrono::dec; +using chrono::jan; +using chrono::last; +using chrono::mon; +using chrono::sun; +using chrono::thu; +using chrono::wed; using internal::applicator::ScalarBinaryNotNullStatefulEqualTypes; using DayOfWeekState = OptionsWrapper; diff --git a/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc b/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc index ebaa7aecdc4..cdc1141fce8 100644 --- a/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc +++ b/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc @@ -39,10 +39,6 @@ using internal::StringFormatter; namespace compute { -TEST(ChronoConfig, LogChronoBackend) { - std::cout << "ARROW_USE_STD_CHRONO=" << ARROW_USE_STD_CHRONO << std::endl; -} - class ScalarTemporalTest : public ::testing::Test { public: const char* date32s = diff --git a/cpp/src/arrow/compute/kernels/scalar_temporal_unary.cc b/cpp/src/arrow/compute/kernels/scalar_temporal_unary.cc index 8df00b6b04e..4499b5bfbba 100644 --- a/cpp/src/arrow/compute/kernels/scalar_temporal_unary.cc +++ b/cpp/src/arrow/compute/kernels/scalar_temporal_unary.cc @@ -60,13 +60,13 @@ using chrono::year; using chrono::year_month_day; using chrono::year_month_weekday; using chrono::years; -using chrono::literals::dec; -using chrono::literals::jan; -using chrono::literals::last; -using chrono::literals::mon; -using chrono::literals::sun; -using chrono::literals::thu; -using chrono::literals::wed; +using chrono::dec; +using chrono::jan; +using chrono::last; +using chrono::mon; +using chrono::sun; +using chrono::thu; +using chrono::wed; using std::chrono::duration_cast; using std::chrono::hours; using std::chrono::minutes; diff --git a/cpp/src/arrow/util/chrono_internal.h b/cpp/src/arrow/util/chrono_internal.h index 453d952d1dc..56bfeb79109 100644 --- a/cpp/src/arrow/util/chrono_internal.h +++ b/cpp/src/arrow/util/chrono_internal.h @@ -156,21 +156,15 @@ std::string format(const char* fmt, const Duration& d) { return std::vformat(std::string("{:") + fmt + "}", std::make_format_args(d)); } -// Literals namespace -namespace literals { -// Month literals inline constexpr std::chrono::month jan = std::chrono::January; inline constexpr std::chrono::month dec = std::chrono::December; -// Weekday literals inline constexpr std::chrono::weekday sun = std::chrono::Sunday; inline constexpr std::chrono::weekday mon = std::chrono::Monday; inline constexpr std::chrono::weekday wed = std::chrono::Wednesday; inline constexpr std::chrono::weekday thu = std::chrono::Thursday; -// last specifier inline constexpr std::chrono::last_spec last = std::chrono::last; -} // namespace literals #else // !ARROW_USE_STD_CHRONO @@ -251,8 +245,6 @@ std::basic_ostream& to_stream( return vendored::to_stream(os, fmt, zt); } -// Literals namespace -namespace literals { inline constexpr vendored::month jan = vendored::jan; inline constexpr vendored::month dec = vendored::dec; @@ -262,7 +254,6 @@ inline constexpr vendored::weekday wed = vendored::wed; inline constexpr vendored::weekday thu = vendored::thu; inline constexpr vendored::last_spec last = vendored::last; -} // namespace literals #endif // ARROW_USE_STD_CHRONO From fc5f7ecf3d5eb9fd4fd2c2a89d43c4430645bc5a Mon Sep 17 00:00:00 2001 From: Rok Mihevc Date: Mon, 5 Jan 2026 18:11:01 +0100 Subject: [PATCH 06/29] lint --- .../compute/kernels/scalar_temporal_binary.cc | 14 +++++++------- .../arrow/compute/kernels/scalar_temporal_unary.cc | 14 +++++++------- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/cpp/src/arrow/compute/kernels/scalar_temporal_binary.cc b/cpp/src/arrow/compute/kernels/scalar_temporal_binary.cc index a53348171f7..6d975d74e21 100644 --- a/cpp/src/arrow/compute/kernels/scalar_temporal_binary.cc +++ b/cpp/src/arrow/compute/kernels/scalar_temporal_binary.cc @@ -41,25 +41,25 @@ namespace chrono = arrow::internal::chrono; namespace { using chrono::days; +using chrono::dec; using chrono::floor; using chrono::hh_mm_ss; +using chrono::jan; +using chrono::last; using chrono::local_days; using chrono::local_time; +using chrono::mon; +using chrono::sun; using chrono::sys_days; using chrono::sys_time; +using chrono::thu; using chrono::trunc; +using chrono::wed; using chrono::weekday; using chrono::weeks; using chrono::year_month_day; using chrono::year_month_weekday; using chrono::years; -using chrono::dec; -using chrono::jan; -using chrono::last; -using chrono::mon; -using chrono::sun; -using chrono::thu; -using chrono::wed; using internal::applicator::ScalarBinaryNotNullStatefulEqualTypes; using DayOfWeekState = OptionsWrapper; diff --git a/cpp/src/arrow/compute/kernels/scalar_temporal_unary.cc b/cpp/src/arrow/compute/kernels/scalar_temporal_unary.cc index 4499b5bfbba..1bad2d0a118 100644 --- a/cpp/src/arrow/compute/kernels/scalar_temporal_unary.cc +++ b/cpp/src/arrow/compute/kernels/scalar_temporal_unary.cc @@ -43,30 +43,30 @@ namespace { using chrono::ceil; using chrono::days; +using chrono::dec; using chrono::floor; using chrono::hh_mm_ss; +using chrono::jan; +using chrono::last; using chrono::local_days; using chrono::local_time; using chrono::locate_zone; +using chrono::mon; using chrono::Monday; using chrono::months; using chrono::round; +using chrono::sun; using chrono::Sunday; using chrono::sys_time; +using chrono::thu; using chrono::trunc; +using chrono::wed; using chrono::weekday; using chrono::weeks; using chrono::year; using chrono::year_month_day; using chrono::year_month_weekday; using chrono::years; -using chrono::dec; -using chrono::jan; -using chrono::last; -using chrono::mon; -using chrono::sun; -using chrono::thu; -using chrono::wed; using std::chrono::duration_cast; using std::chrono::hours; using std::chrono::minutes; From 84145a7b6b60e260d669556f3e1306b2bd5ecf41 Mon Sep 17 00:00:00 2001 From: Rok Mihevc Date: Mon, 5 Jan 2026 18:23:14 +0100 Subject: [PATCH 07/29] Reference to gcc issue --- cpp/src/arrow/util/chrono_internal.h | 1 + 1 file changed, 1 insertion(+) diff --git a/cpp/src/arrow/util/chrono_internal.h b/cpp/src/arrow/util/chrono_internal.h index 56bfeb79109..986beb3f3a8 100644 --- a/cpp/src/arrow/util/chrono_internal.h +++ b/cpp/src/arrow/util/chrono_internal.h @@ -41,6 +41,7 @@ // transitions between rule sets in tzdata.zi (e.g., Australia/Broken_Hill around // 2000-02-29 23:23:24). // Until this is fixed, we use the vendored date.h library for GCC. +// See: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=116110 #if defined(_MSC_VER) && defined(__cpp_lib_chrono) && __cpp_lib_chrono >= 201907L # define ARROW_USE_STD_CHRONO 1 From 2751009bdad65a3e657c7e98bffba25237085391 Mon Sep 17 00:00:00 2001 From: Rok Mihevc Date: Mon, 5 Jan 2026 18:48:17 +0100 Subject: [PATCH 08/29] Remove windows tz workarounds --- .github/workflows/cpp.yml | 6 +-- .github/workflows/cpp_extra.yml | 4 ++ .github/workflows/cpp_windows.yml | 3 -- .github/workflows/matlab.yml | 3 -- .github/workflows/verify_rc.yml | 3 -- c_glib/test/test-assume-timezone-options.rb | 1 - c_glib/test/test-day-of-week-options.rb | 2 - c_glib/test/test-strftime-options.rb | 1 - ci/scripts/download_tz_database.sh | 3 ++ cpp/src/arrow/compute/function_test.cc | 2 - .../arrow/compute/kernels/scalar_cast_test.cc | 10 +---- .../compute/kernels/scalar_temporal_test.cc | 8 ---- cpp/src/arrow/config.cc | 27 ------------ cpp/src/arrow/config.h | 16 ------- cpp/src/arrow/public_api_test.cc | 42 ------------------- cpp/src/arrow/testing/util.cc | 19 --------- cpp/src/arrow/testing/util.h | 7 ---- dev/tasks/vcpkg-tests/github.windows.yml | 3 -- dev/tasks/verify-rc/github.win.yml | 4 -- docs/source/cpp/build_system.rst | 23 ---------- docs/source/developers/cpp/windows.rst | 9 ---- docs/source/python/install.rst | 33 --------------- python/pyarrow/config.pxi | 18 -------- python/pyarrow/conftest.py | 5 +-- python/pyarrow/includes/libarrow.pxd | 5 --- python/pyarrow/tests/conftest.py | 24 ----------- python/pyarrow/tests/strategies.py | 2 +- python/pyarrow/tests/test_compute.py | 6 +-- python/pyarrow/tests/test_misc.py | 12 ------ python/pyarrow/tests/test_util.py | 22 +--------- python/pyarrow/tests/util.py | 15 ------- python/pyarrow/util.py | 32 -------------- r/R/arrow-package.R | 17 -------- r/R/arrowExports.R | 4 -- r/src/arrowExports.cpp | 14 +------ r/src/config.cpp | 14 ------- 36 files changed, 19 insertions(+), 400 deletions(-) diff --git a/.github/workflows/cpp.yml b/.github/workflows/cpp.yml index 45a9c3ba774..6612308afc3 100644 --- a/.github/workflows/cpp.yml +++ b/.github/workflows/cpp.yml @@ -348,6 +348,9 @@ jobs: with: fetch-depth: 0 submodules: recursive + - name: Download Timezone Database + shell: bash + run: ci/scripts/download_tz_database.sh - uses: msys2/setup-msys2@v2 with: msystem: ${{ matrix.msystem_upper }} @@ -366,9 +369,6 @@ jobs: run: | export CMAKE_BUILD_PARALLEL_LEVEL=$NUMBER_OF_PROCESSORS ci/scripts/cpp_build.sh "$(pwd)" "$(pwd)/build" - - name: Download Timezone Database - shell: bash - run: ci/scripts/download_tz_database.sh - name: Download MinIO shell: msys2 {0} run: | diff --git a/.github/workflows/cpp_extra.yml b/.github/workflows/cpp_extra.yml index f353fe86340..31e3ccbc332 100644 --- a/.github/workflows/cpp_extra.yml +++ b/.github/workflows/cpp_extra.yml @@ -377,6 +377,10 @@ jobs: - name: Download Timezone Database shell: bash run: ci/scripts/download_tz_database.sh + - name: Install cmake + shell: bash + run: | + ci/scripts/install_cmake.sh 4.1.2 /usr - name: Install ccache shell: bash run: | diff --git a/.github/workflows/cpp_windows.yml b/.github/workflows/cpp_windows.yml index 69bbfee28b9..3f13097a6eb 100644 --- a/.github/workflows/cpp_windows.yml +++ b/.github/workflows/cpp_windows.yml @@ -84,9 +84,6 @@ jobs: with: fetch-depth: 0 submodules: recursive - - name: Download Timezone Database - shell: bash - run: ci/scripts/download_tz_database.sh - name: Install msys2 (for tzdata for ORC tests) uses: msys2/setup-msys2@v2 id: setup-msys2 diff --git a/.github/workflows/matlab.yml b/.github/workflows/matlab.yml index 8485e62b6f5..b3f538d0cac 100644 --- a/.github/workflows/matlab.yml +++ b/.github/workflows/matlab.yml @@ -147,9 +147,6 @@ jobs: uses: matlab-actions/setup-matlab@v2 with: release: R2025b - - name: Download Timezone Database - shell: bash - run: ci/scripts/download_tz_database.sh - name: Install ccache shell: bash run: ci/scripts/install_ccache.sh 4.6.3 /usr diff --git a/.github/workflows/verify_rc.yml b/.github/workflows/verify_rc.yml index e88b8ca8f23..04507cb3972 100644 --- a/.github/workflows/verify_rc.yml +++ b/.github/workflows/verify_rc.yml @@ -228,9 +228,6 @@ jobs: run: | choco install --no-progress --yes boost-msvc-14.1 choco install --no-progress --yes wget - - name: Download Timezone Database - shell: bash - run: ci/scripts/download_tz_database.sh - name: Run verification env: GH_TOKEN: ${{ github.token }} diff --git a/c_glib/test/test-assume-timezone-options.rb b/c_glib/test/test-assume-timezone-options.rb index 10bf4261d33..d60935964d7 100644 --- a/c_glib/test/test-assume-timezone-options.rb +++ b/c_glib/test/test-assume-timezone-options.rb @@ -45,7 +45,6 @@ def test_nonexistent_property end def test_assume_timezone_function - omit("Missing tzdata on Windows") if Gem.win_platform? args = [ Arrow::ArrayDatum.new(build_timestamp_array(:milli, [1504953190000])), ] diff --git a/c_glib/test/test-day-of-week-options.rb b/c_glib/test/test-day-of-week-options.rb index 8f76956fb4b..d1c254d3780 100644 --- a/c_glib/test/test-day-of-week-options.rb +++ b/c_glib/test/test-day-of-week-options.rb @@ -39,7 +39,6 @@ def test_week_start_property end def test_day_of_week_function_with_count_from_zero_false - omit("Missing tzdata on Windows") if Gem.win_platform? args = [ # 2017-09-09T10:33:10Z (Saturday) Arrow::ArrayDatum.new(build_timestamp_array(:milli, [1504953190000])), @@ -51,7 +50,6 @@ def test_day_of_week_function_with_count_from_zero_false end def test_day_of_week_function_with_week_start - omit("Missing tzdata on Windows") if Gem.win_platform? args = [ # 2017-09-09T10:33:10Z (Saturday) Arrow::ArrayDatum.new(build_timestamp_array(:milli, [1504953190000])), diff --git a/c_glib/test/test-strftime-options.rb b/c_glib/test/test-strftime-options.rb index 81440d5d086..aafcca98e42 100644 --- a/c_glib/test/test-strftime-options.rb +++ b/c_glib/test/test-strftime-options.rb @@ -35,7 +35,6 @@ def test_locale_property end def test_strftime_function - omit("Missing tzdata on Windows") if Gem.win_platform? args = [ Arrow::ArrayDatum.new(build_timestamp_array(:milli, [1504953190854])), ] diff --git a/ci/scripts/download_tz_database.sh b/ci/scripts/download_tz_database.sh index b74d251a43b..4fc9d857ea0 100755 --- a/ci/scripts/download_tz_database.sh +++ b/ci/scripts/download_tz_database.sh @@ -17,6 +17,9 @@ # specific language governing permissions and limitations # under the License. +# Downloads IANA timezone database for use with the vendored date library +# on Windows when not using MSVC (e.g., MinGW builds). + set -ex # Download database diff --git a/cpp/src/arrow/compute/function_test.cc b/cpp/src/arrow/compute/function_test.cc index b7d017d4820..7371b0ab866 100644 --- a/cpp/src/arrow/compute/function_test.cc +++ b/cpp/src/arrow/compute/function_test.cc @@ -95,11 +95,9 @@ TEST(FunctionOptions, Equality) { options.emplace_back(new StrptimeOptions("%Y", TimeUnit::type::MILLI, true)); options.emplace_back(new StrptimeOptions("%Y", TimeUnit::type::NANO)); options.emplace_back(new StrftimeOptions("%Y-%m-%dT%H:%M:%SZ", "C")); -#ifndef _WIN32 options.emplace_back(new AssumeTimezoneOptions( "Europe/Amsterdam", AssumeTimezoneOptions::Ambiguous::AMBIGUOUS_RAISE, AssumeTimezoneOptions::Nonexistent::NONEXISTENT_RAISE)); -#endif options.emplace_back(new PadOptions(5, " ")); options.emplace_back(new PadOptions(10, "A")); options.emplace_back(new PadOptions(10, "A", false)); diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_test.cc b/cpp/src/arrow/compute/kernels/scalar_cast_test.cc index 2589756a073..e6f9cd357bf 100644 --- a/cpp/src/arrow/compute/kernels/scalar_cast_test.cc +++ b/cpp/src/arrow/compute/kernels/scalar_cast_test.cc @@ -2358,15 +2358,7 @@ constexpr char kTimestampSecondsJson[] = constexpr char kTimestampExtremeJson[] = R"(["1677-09-20T00:00:59.123456", "2262-04-13T23:23:23.999999"])"; -class CastTimezone : public ::testing::Test { - protected: - void SetUp() override { -#ifdef _WIN32 - // Initialize timezone database on Windows - ASSERT_OK(InitTestTimezoneDatabase()); -#endif - } -}; +class CastTimezone : public ::testing::Test {}; TEST(Cast, TimestampToDate) { // See scalar_temporal_test.cc diff --git a/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc b/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc index cdc1141fce8..c2257c80e65 100644 --- a/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc +++ b/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc @@ -412,14 +412,6 @@ class ScalarTemporalTest : public ::testing::Test { RoundTemporalOptions round_to_15_quarters = RoundTemporalOptions(15, CalendarUnit::QUARTER); RoundTemporalOptions round_to_15_years = RoundTemporalOptions(15, CalendarUnit::YEAR); - - protected: - void SetUp() override { -#ifdef _WIN32 - // Initialize timezone database on Windows - ASSERT_OK(InitTestTimezoneDatabase()); -#endif - } }; class ScalarTemporalTestStrictCeil : public ScalarTemporalTest { diff --git a/cpp/src/arrow/config.cc b/cpp/src/arrow/config.cc index a0e3a079b31..e6b5707be32 100644 --- a/cpp/src/arrow/config.cc +++ b/cpp/src/arrow/config.cc @@ -64,8 +64,6 @@ std::string MakeSimdLevelString(QueryFlagFunction&& query_flag) { } } -std::optional timezone_db_path; - }; // namespace const BuildInfo& GetBuildInfo() { return kBuildInfo; } @@ -77,32 +75,7 @@ RuntimeInfo GetRuntimeInfo() { MakeSimdLevelString([&](int64_t flags) { return cpu_info->IsSupported(flags); }); info.detected_simd_level = MakeSimdLevelString([&](int64_t flags) { return cpu_info->IsDetected(flags); }); - info.using_os_timezone_db = USE_OS_TZDB; -#if !USE_OS_TZDB - info.timezone_db_path = timezone_db_path; -#else - info.timezone_db_path = std::optional(); -#endif return info; } -Status Initialize(const GlobalOptions& options) noexcept { - if (options.timezone_db_path.has_value()) { -#if !USE_OS_TZDB - try { - arrow_vendored::date::set_install(options.timezone_db_path.value()); - arrow_vendored::date::reload_tzdb(); - } catch (const std::runtime_error& e) { - return Status::IOError(e.what()); - } - timezone_db_path = options.timezone_db_path.value(); -#else - return Status::Invalid( - "Arrow was set to use OS timezone database at compile time, " - "so a downloaded database cannot be provided at runtime."); -#endif // !USE_OS_TZDB - } - return Status::OK(); -} - } // namespace arrow diff --git a/cpp/src/arrow/config.h b/cpp/src/arrow/config.h index 617d6c268b5..876fdbd484d 100644 --- a/cpp/src/arrow/config.h +++ b/cpp/src/arrow/config.h @@ -64,13 +64,6 @@ struct RuntimeInfo { /// The SIMD level available on the OS and CPU std::string detected_simd_level; - - /// Whether using the OS-based timezone database - /// This is set at compile-time. - bool using_os_timezone_db; - - /// The path to the timezone database; by default None. - std::optional timezone_db_path; }; /// \brief Get runtime build info. @@ -86,13 +79,4 @@ const BuildInfo& GetBuildInfo(); ARROW_EXPORT RuntimeInfo GetRuntimeInfo(); -struct GlobalOptions { - /// Path to text timezone database. This is only configurable on Windows, - /// which does not have a compatible OS timezone database. - std::optional timezone_db_path; -}; - -ARROW_EXPORT -Status Initialize(const GlobalOptions& options) noexcept; - } // namespace arrow diff --git a/cpp/src/arrow/public_api_test.cc b/cpp/src/arrow/public_api_test.cc index ccc80dc93a5..0b6608913a6 100644 --- a/cpp/src/arrow/public_api_test.cc +++ b/cpp/src/arrow/public_api_test.cc @@ -122,46 +122,4 @@ TEST(Misc, BuildInfo) { ASSERT_THAT(info.full_so_version, ::testing::HasSubstr(info.so_version)); } -TEST(Misc, SetTimezoneConfig) { -#ifndef _WIN32 - GTEST_SKIP() << "Can only set the Timezone database on Windows"; -#elif !defined(ARROW_FILESYSTEM) - GTEST_SKIP() << "Need filesystem support to test timezone config."; -#else - auto fs = std::make_shared(); - - std::optional tzdata_result = GetTestTimezoneDatabaseRoot(); - std::string tzdata_dir; - if (tzdata_result.has_value()) { - tzdata_dir = tzdata_result.value(); - } else { - auto home_raw = std::getenv("USERPROFILE"); - std::string home = home_raw == nullptr ? "~" : std::string(home_raw); - ASSERT_OK_AND_ASSIGN(tzdata_dir, fs->NormalizePath(home + "\\Downloads\\tzdata")); - } - ASSERT_OK_AND_ASSIGN(tzdata_dir, fs->NormalizePath(tzdata_dir)); - ASSERT_OK_AND_ASSIGN(auto tzdata_path, - arrow::internal::PlatformFilename::FromString(tzdata_dir)); - - if (!arrow::internal::FileExists(tzdata_path).ValueOr(false)) { - GTEST_SKIP() << "Couldn't find timezone database in expected dir: " << tzdata_dir; - } - // Create a tmp directory - ASSERT_OK_AND_ASSIGN(auto tempdir, arrow::internal::TemporaryDir::Make("tzdata")); - - // Validate that setting tzdb to that dir fails - arrow::GlobalOptions options = {std::make_optional(tempdir->path().ToString())}; - ASSERT_NOT_OK(arrow::Initialize(options)); - - // Copy tzdb data from ~/Downloads - auto selector = arrow::fs::FileSelector(); - selector.base_dir = tzdata_dir; - selector.recursive = true; - ASSERT_OK(arrow::fs::CopyFiles(fs, selector, fs, tempdir->path().ToString())); - - // Validate that tzdb is working - ASSERT_OK(arrow::Initialize(options)); -#endif -} - } // namespace arrow diff --git a/cpp/src/arrow/testing/util.cc b/cpp/src/arrow/testing/util.cc index b0c8deae36c..8846347e1c1 100644 --- a/cpp/src/arrow/testing/util.cc +++ b/cpp/src/arrow/testing/util.cc @@ -122,25 +122,6 @@ Status GetTestResourceRoot(std::string* out) { return Status::OK(); } -std::optional GetTestTimezoneDatabaseRoot() { - const char* c_root = std::getenv("ARROW_TIMEZONE_DATABASE"); - if (!c_root) { - return std::optional(); - } - return std::make_optional(std::string(c_root)); -} - -Status InitTestTimezoneDatabase() { - auto maybe_tzdata = GetTestTimezoneDatabaseRoot(); - // If missing, timezone database will default to %USERPROFILE%\Downloads\tzdata - if (!maybe_tzdata.has_value()) return Status::OK(); - - auto tzdata_path = std::string(maybe_tzdata.value()); - arrow::GlobalOptions options = {std::make_optional(tzdata_path)}; - ARROW_RETURN_NOT_OK(arrow::Initialize(options)); - return Status::OK(); -} - int GetListenPort() { // Get a new available port number by binding a socket to an ephemeral port // and then closing it. Since ephemeral port allocation tends to avoid diff --git a/cpp/src/arrow/testing/util.h b/cpp/src/arrow/testing/util.h index c2d6ca4d156..98b1bdb134e 100644 --- a/cpp/src/arrow/testing/util.h +++ b/cpp/src/arrow/testing/util.h @@ -112,13 +112,6 @@ UnionTypeFactories() { // Status ARROW_TESTING_EXPORT Status GetTestResourceRoot(std::string*); -// Return the value of the ARROW_TIMEZONE_DATABASE environment variable -ARROW_TESTING_EXPORT std::optional GetTestTimezoneDatabaseRoot(); - -// Set the Timezone database based on the ARROW_TIMEZONE_DATABASE env variable -// This is only relevant on Windows, since other OSs have compatible databases built-in -ARROW_TESTING_EXPORT Status InitTestTimezoneDatabase(); - // Get a TCP port number to listen on. This is a different number every time, // as reusing the same port across tests can produce spurious bind errors on // Windows. diff --git a/dev/tasks/vcpkg-tests/github.windows.yml b/dev/tasks/vcpkg-tests/github.windows.yml index 818bd771182..124482b8555 100644 --- a/dev/tasks/vcpkg-tests/github.windows.yml +++ b/dev/tasks/vcpkg-tests/github.windows.yml @@ -35,9 +35,6 @@ jobs: run: | arrow/ci/scripts/install_cmake.sh 3.29.0 /c/cmake echo "c:\\cmake\\bin" >> $GITHUB_PATH - - name: Download Timezone Database - shell: bash - run: arrow/ci/scripts/download_tz_database.sh - name: Remove and Reinstall vcpkg # When running vcpkg in GitHub Actions on Windows, remove the # preinstalled vcpkg and install the newest version from source. diff --git a/dev/tasks/verify-rc/github.win.yml b/dev/tasks/verify-rc/github.win.yml index 7d8c28d9315..d57d88d7e45 100644 --- a/dev/tasks/verify-rc/github.win.yml +++ b/dev/tasks/verify-rc/github.win.yml @@ -40,10 +40,6 @@ jobs: choco install boost-msvc-14.1 choco install wget - - name: Download Timezone Database - shell: bash - run: arrow/ci/scripts/download_tz_database.sh - - name: Run verification shell: cmd run: | diff --git a/docs/source/cpp/build_system.rst b/docs/source/cpp/build_system.rst index 01dbe5e45f8..b124060053b 100644 --- a/docs/source/cpp/build_system.rst +++ b/docs/source/cpp/build_system.rst @@ -228,26 +228,3 @@ can control the source of each dependency and whether it is statically or dynamically linked. See :doc:`/developers/cpp/building` for instructions. Or alternatively, use Arrow from a package manager such as Conda or vcpkg which will manage consistent versions of Arrow and its dependencies. - - -.. _download-timezone-database: - -Runtime Dependencies -==================== - -While Arrow uses the OS-provided timezone database on Linux and macOS, it -requires a user-provided database on Windows. You must download and extract the -text version of the IANA timezone database and add the Windows timezone mapping -XML. To download, you can use the following batch script: - -.. literalinclude:: ../../../ci/appveyor-cpp-setup.bat - :language: batch - :start-after: @rem (Doc section: Download timezone database) - :end-before: @rem (Doc section: Download timezone database) - -By default, the timezone database will be detected at ``%USERPROFILE%\Downloads\tzdata``, -but you can set a custom path at runtime in :struct:`arrow::ArrowGlobalOptions`:: - - arrow::GlobalOptions options; - options.timezone_db_path = "path/to/tzdata"; - ARROW_RETURN_NOT_OK(arrow::Initialize(options)); diff --git a/docs/source/developers/cpp/windows.rst b/docs/source/developers/cpp/windows.rst index 21bde92d0b7..b4d8f19dc26 100644 --- a/docs/source/developers/cpp/windows.rst +++ b/docs/source/developers/cpp/windows.rst @@ -381,15 +381,6 @@ be defined, and similarly for ``-DARROW_FLIGHT_SQL=ON``. ARROW_FLIGHT_STATIC ARROW_FLIGHT_SQL_STATIC) -Downloading the Timezone Database -================================= - -To run some of the compute unit tests on Windows, the IANA timezone database -and the Windows timezone mapping need to be downloaded first. See -:ref:`download-timezone-database` for download instructions. To set a non-default -path for the timezone database while running the unit tests, set the -``ARROW_TIMEZONE_DATABASE`` environment variable. - Replicating Appveyor Builds =========================== diff --git a/docs/source/python/install.rst b/docs/source/python/install.rst index c6f098ee20a..b948905df78 100644 --- a/docs/source/python/install.rst +++ b/docs/source/python/install.rst @@ -80,39 +80,6 @@ Optional dependencies Additional packages PyArrow is compatible with are :ref:`fsspec ` and **pytz**, **dateutil** or **tzdata** package for timezones. -tzdata on Windows -^^^^^^^^^^^^^^^^^ - -While Arrow uses the OS-provided timezone database on Linux and macOS, it requires a -user-provided database on Windows. To download and extract the text version of -the IANA timezone database follow the instructions in the C++ -:ref:`download-timezone-database` or use pyarrow utility function -``pyarrow.util.download_tzdata_on_windows()`` that does the same. - -By default, the timezone database will be detected at ``%USERPROFILE%\Downloads\tzdata``. -If the database has been downloaded in a different location, you will need to set -a custom path to the database from Python: - -.. code-block:: python - - >>> import pyarrow as pa - >>> pa.set_timezone_db_path("custom_path") - -You may encounter problems writing datetime data to an ORC file if you install -pyarrow with pip. One possible solution to fix this problem: - - 1. Install tzdata with ``pip install tzdata`` - 2. Set the environment variable ``TZDIR = path\to\.venv\Lib\site-packages\tzdata\`` - -You can find where ``tzdata`` is installed with the following python -command: - -.. code-block:: python - - >>> import tzdata - >>> print(tzdata.__file__) - path\to\.venv\Lib\site-packages\tzdata\__init__.py - .. _python-conda-differences: diff --git a/python/pyarrow/config.pxi b/python/pyarrow/config.pxi index 1f8047d1bd0..4fdaaf0bdb9 100644 --- a/python/pyarrow/config.pxi +++ b/python/pyarrow/config.pxi @@ -96,21 +96,3 @@ build_info = _build_info() cpp_build_info = build_info.cpp_build_info cpp_version = build_info.cpp_build_info.version cpp_version_info = build_info.cpp_build_info.version_info - - -def set_timezone_db_path(path): - """ - Configure the path to text timezone database on Windows. - - Parameters - ---------- - path : str - Path to text timezone database. - """ - cdef: - CGlobalOptions options - - if path is not None: - options.timezone_db_path = tobytes(path) - - check_status(Initialize(options)) diff --git a/python/pyarrow/conftest.py b/python/pyarrow/conftest.py index 41beaa14041..87c6bf91c8d 100644 --- a/python/pyarrow/conftest.py +++ b/python/pyarrow/conftest.py @@ -22,7 +22,6 @@ from pyarrow import Codec from pyarrow import fs from pyarrow.lib import is_threading_enabled -from pyarrow.tests.util import windows_has_tzdata import sys @@ -108,9 +107,7 @@ defaults['processes'] = False defaults['sockets'] = False -if sys.platform == "win32": - defaults['timezone_data'] = windows_has_tzdata() -elif sys.platform == "emscripten": +if sys.platform == "emscripten": defaults['timezone_data'] = os.path.exists("/usr/share/zoneinfo") try: diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd index e96a7d84696..897ead17397 100644 --- a/python/pyarrow/includes/libarrow.pxd +++ b/python/pyarrow/includes/libarrow.pxd @@ -90,11 +90,6 @@ cdef extern from "arrow/config.h" namespace "arrow" nogil: CRuntimeInfo GetRuntimeInfo() - cdef cppclass CGlobalOptions" arrow::GlobalOptions": - optional[c_string] timezone_db_path - - CStatus Initialize(const CGlobalOptions& options) - cdef extern from "arrow/util/future.h" namespace "arrow" nogil: cdef cppclass CFuture_Void" arrow::Future<>": diff --git a/python/pyarrow/tests/conftest.py b/python/pyarrow/tests/conftest.py index 575444c1cfc..50c194694c2 100644 --- a/python/pyarrow/tests/conftest.py +++ b/python/pyarrow/tests/conftest.py @@ -19,7 +19,6 @@ import os import pathlib import subprocess -import sys import time import urllib.request @@ -28,7 +27,6 @@ from ..conftest import groups, defaults -from pyarrow import set_timezone_db_path from pyarrow.util import find_free_port @@ -49,28 +47,6 @@ os.environ['AWS_CONFIG_FILE'] = "/dev/null" -if sys.platform == 'win32': - tzdata_set_path = os.environ.get('PYARROW_TZDATA_PATH', None) - if tzdata_set_path: - set_timezone_db_path(tzdata_set_path) - - -# GH-45295: For ORC, try to populate TZDIR env var from tzdata package resource -# path. -# -# Note this is a different kind of database than what we allow to be set by -# `PYARROW_TZDATA_PATH` and passed to set_timezone_db_path. -if sys.platform == 'win32': - if os.environ.get('TZDIR', None) is None: - from importlib import resources - try: - os.environ['TZDIR'] = os.path.join(resources.files('tzdata'), 'zoneinfo') - except ModuleNotFoundError: - print( - 'Package "tzdata" not found. Not setting TZDIR environment variable.' - ) - - def pytest_addoption(parser): # Create options to selectively enable test groups def bool_env(name, default=None): diff --git a/python/pyarrow/tests/strategies.py b/python/pyarrow/tests/strategies.py index 3c31650ddf9..f18d58b097e 100644 --- a/python/pyarrow/tests/strategies.py +++ b/python/pyarrow/tests/strategies.py @@ -327,7 +327,7 @@ def arrays(draw, type, size=None, nullable=True): value = st.dates() elif pa.types.is_timestamp(ty): if zoneinfo is None: - pytest.skip('no module named zoneinfo (or tzdata on Windows)') + pytest.skip('no module named zoneinfo') if ty.tz is None: pytest.skip('requires timezone not None') min_int64 = -(2**63) diff --git a/python/pyarrow/tests/test_compute.py b/python/pyarrow/tests/test_compute.py index d8a1c4d093e..713ea22b6fb 100644 --- a/python/pyarrow/tests/test_compute.py +++ b/python/pyarrow/tests/test_compute.py @@ -203,14 +203,14 @@ def test_option_class_equality(request): first_week_is_fully_in_year=False), pc.ZeroFillOptions(4, "0"), ] - # Timezone database might not be installed on Windows or Emscripten + # Timezone database might not be installed on Emscripten if request.config.pyarrow.is_enabled["timezone_data"]: options.append(pc.AssumeTimezoneOptions("Europe/Ljubljana")) classes = {type(option) for option in options} for cls in exported_option_classes: - # Timezone database might not be installed on Windows or Emscripten + # Timezone database might not be installed on Emscripten if ( cls not in classes and (request.config.pyarrow.is_enabled["timezone_data"]) @@ -2547,7 +2547,7 @@ def test_extract_datetime_components(request): # Test timezone aware timestamp array if not request.config.pyarrow.is_enabled["timezone_data"]: - pytest.skip('Timezone database is not installed on Windows') + pytest.skip('Timezone database is not available') else: for timezone in timezones: _check_datetime_components(timestamps, timezone) diff --git a/python/pyarrow/tests/test_misc.py b/python/pyarrow/tests/test_misc.py index 64f45d8bed8..fb73d654ae3 100644 --- a/python/pyarrow/tests/test_misc.py +++ b/python/pyarrow/tests/test_misc.py @@ -22,7 +22,6 @@ import pytest import pyarrow as pa -from pyarrow.lib import ArrowInvalid def test_get_include(): @@ -138,17 +137,6 @@ def import_arrow(): subprocess.check_call([sys.executable, "-c", code]) -@pytest.mark.skipif(sys.platform == "win32", - reason="Path to timezone database is not configurable " - "on non-Windows platforms") -def test_set_timezone_db_path_non_windows(): - # set_timezone_db_path raises an error on non-Windows platforms - with pytest.raises(ArrowInvalid, - match="Arrow was set to use OS timezone " - "database at compile time"): - pa.set_timezone_db_path("path") - - @pytest.mark.parametrize('klass', [ pa.Field, pa.Schema, diff --git a/python/pyarrow/tests/test_util.py b/python/pyarrow/tests/test_util.py index e584b041114..9fccb76112d 100644 --- a/python/pyarrow/tests/test_util.py +++ b/python/pyarrow/tests/test_util.py @@ -16,17 +16,14 @@ # under the License. import gc -import os import signal -import shutil import sys import textwrap import weakref import pytest -from pyarrow.util import (doc, _break_traceback_cycle_from_frame, - download_tzdata_on_windows) +from pyarrow.util import doc, _break_traceback_cycle_from_frame from pyarrow.tests.util import disabled_gc @@ -210,20 +207,3 @@ def test_signal_refcycle(): assert wr() is not None _break_traceback_cycle_from_frame(sys._getframe(0)) assert wr() is None - - -@pytest.mark.skipif(sys.platform != "win32", - reason="Timezone database is already provided.") -def test_download_tzdata_on_windows(): - tzdata_path = os.path.expandvars(r"%USERPROFILE%\Downloads\tzdata") - - # Download timezone database and remove data in case it already exists - if (os.path.exists(tzdata_path)): - shutil.rmtree(tzdata_path) - download_tzdata_on_windows() - - # Inspect the folder - assert os.path.exists(tzdata_path) - assert os.path.exists(os.path.join(tzdata_path, "windowsZones.xml")) - assert os.path.exists(os.path.join(tzdata_path, "europe")) - assert 'version' in os.listdir(tzdata_path) diff --git a/python/pyarrow/tests/util.py b/python/pyarrow/tests/util.py index 7e3dd4324e9..cf48ac807be 100644 --- a/python/pyarrow/tests/util.py +++ b/python/pyarrow/tests/util.py @@ -427,21 +427,6 @@ def _configure_s3_limited_user(s3_server, policy, username, password): pytest.skip("Configuring limited s3 user failed") -def windows_has_tzdata(): - """ - This is the default location where tz.cpp will look for (until we make - this configurable at run-time) - """ - tzdata_bool = False - if "PYARROW_TZDATA_PATH" in os.environ: - tzdata_bool = os.path.exists(os.environ['PYARROW_TZDATA_PATH']) - if not tzdata_bool: - tzdata_path = os.path.expandvars(r"%USERPROFILE%\Downloads\tzdata") - tzdata_bool = os.path.exists(tzdata_path) - - return tzdata_bool - - def running_on_musllinux(): """ Checks whether it's running on musl systems or not. diff --git a/python/pyarrow/util.py b/python/pyarrow/util.py index 5878d1f9026..a9827c36585 100644 --- a/python/pyarrow/util.py +++ b/python/pyarrow/util.py @@ -242,35 +242,3 @@ def _download_requests(url, out_path): with requests.get(url) as response: with open(out_path, 'wb') as f: f.write(response.content) - - -def download_tzdata_on_windows(): - r""" - Download and extract latest IANA timezone database into the - location expected by Arrow which is %USERPROFILE%\Downloads\tzdata. - """ - if sys.platform != 'win32': - raise TypeError(f"Timezone database is already provided by {sys.platform}") - - import tarfile - - tzdata_url = "https://data.iana.org/time-zones/tzdata-latest.tar.gz" - tzdata_path = os.path.expandvars(r"%USERPROFILE%\Downloads\tzdata") - tzdata_compressed_path = os.path.join(tzdata_path, "tzdata.tar.gz") - windows_zones_url = "https://raw.githubusercontent.com/unicode-org/cldr/master/common/supplemental/windowsZones.xml" # noqa - windows_zones_path = os.path.join(tzdata_path, "windowsZones.xml") - os.makedirs(tzdata_path, exist_ok=True) - - # Try to download the files with requests and then fall back to urllib. This - # works around possible issues in certain older environment (GH-45295) - try: - _download_requests(tzdata_url, tzdata_compressed_path) - _download_requests(windows_zones_url, windows_zones_path) - except ImportError: - _download_urllib(tzdata_url, tzdata_compressed_path) - _download_urllib(windows_zones_url, windows_zones_path) - - assert os.path.exists(tzdata_compressed_path) - assert os.path.exists(windows_zones_path) - - tarfile.open(tzdata_compressed_path).extractall(tzdata_path) diff --git a/r/R/arrow-package.R b/r/R/arrow-package.R index a1167433c93..9e0bfe77974 100644 --- a/r/R/arrow-package.R +++ b/r/R/arrow-package.R @@ -152,9 +152,6 @@ s3_finalizer <- new.env(parent = emptyenv()) # Disable multithreading on Windows # See https://issues.apache.org/jira/browse/ARROW-8379 options(arrow.use_threads = FALSE) - - # Try to set timezone database - configure_tzdb() } # Set interrupt handlers @@ -171,20 +168,6 @@ s3_finalizer <- new.env(parent = emptyenv()) invisible() } -configure_tzdb <- function() { - # This is needed on Windows to support timezone-aware calculations - if (requireNamespace("tzdb", quietly = TRUE)) { - tzdb::tzdb_initialize() - set_timezone_database(tzdb::tzdb_path("text")) - } else { - msg <- paste( - "The tzdb package is not installed.", - "Timezones will not be available to Arrow compute functions." - ) - packageStartupMessage(msg) - } -} - .onAttach <- function(libname, pkgname) { # Just to be extra safe, let's wrap this in a try(); # we don't want a failed startup message to prevent the package from loading diff --git a/r/R/arrowExports.R b/r/R/arrowExports.R index a8387526b25..3f4d9aa4a87 100644 --- a/r/R/arrowExports.R +++ b/r/R/arrowExports.R @@ -552,10 +552,6 @@ runtime_info <- function() { .Call(`_arrow_runtime_info`) } -set_timezone_database <- function(path) { - invisible(.Call(`_arrow_set_timezone_database`, path)) -} - csv___WriteOptions__initialize <- function(options) { .Call(`_arrow_csv___WriteOptions__initialize`, options) } diff --git a/r/src/arrowExports.cpp b/r/src/arrowExports.cpp index 73bf81f83bb..bcf351c120f 100644 --- a/r/src/arrowExports.cpp +++ b/r/src/arrowExports.cpp @@ -1382,15 +1382,6 @@ BEGIN_CPP11 return cpp11::as_sexp(runtime_info()); END_CPP11 } -// config.cpp -void set_timezone_database(cpp11::strings path); -extern "C" SEXP _arrow_set_timezone_database(SEXP path_sexp){ -BEGIN_CPP11 - arrow::r::Input::type path(path_sexp); - set_timezone_database(path); - return R_NilValue; -END_CPP11 -} // csv.cpp std::shared_ptr csv___WriteOptions__initialize(cpp11::list options); extern "C" SEXP _arrow_csv___WriteOptions__initialize(SEXP options_sexp){ @@ -5843,9 +5834,8 @@ static const R_CallMethodDef CallEntries[] = { { "_arrow_compute__GetFunctionNames", (DL_FUNC) &_arrow_compute__GetFunctionNames, 0}, { "_arrow_compute__Initialize", (DL_FUNC) &_arrow_compute__Initialize, 0}, { "_arrow_RegisterScalarUDF", (DL_FUNC) &_arrow_RegisterScalarUDF, 2}, - { "_arrow_build_info", (DL_FUNC) &_arrow_build_info, 0}, - { "_arrow_runtime_info", (DL_FUNC) &_arrow_runtime_info, 0}, - { "_arrow_set_timezone_database", (DL_FUNC) &_arrow_set_timezone_database, 1}, + { "_arrow_build_info", (DL_FUNC) &_arrow_build_info, 0}, + { "_arrow_runtime_info", (DL_FUNC) &_arrow_runtime_info, 0}, { "_arrow_csv___WriteOptions__initialize", (DL_FUNC) &_arrow_csv___WriteOptions__initialize, 1}, { "_arrow_csv___ReadOptions__initialize", (DL_FUNC) &_arrow_csv___ReadOptions__initialize, 1}, { "_arrow_csv___ParseOptions__initialize", (DL_FUNC) &_arrow_csv___ParseOptions__initialize, 1}, diff --git a/r/src/config.cpp b/r/src/config.cpp index a45df73a64a..1855f96ac6a 100644 --- a/r/src/config.cpp +++ b/r/src/config.cpp @@ -17,8 +17,6 @@ #include "./arrow_types.h" -#include - #include // [[arrow::export]] @@ -33,15 +31,3 @@ std::vector runtime_info() { auto info = arrow::GetRuntimeInfo(); return {info.simd_level, info.detected_simd_level}; } - -// [[arrow::export]] -void set_timezone_database(cpp11::strings path) { - auto paths = cpp11::as_cpp>(path); - if (path.size() != 1) { - cpp11::stop("Must provide a single path to the timezone database."); - } - - arrow::GlobalOptions options; - options.timezone_db_path = std::make_optional(paths[0]); - arrow::StopIfNotOk(arrow::Initialize(options)); -} From 65eda10dcd563b00ef3acb0c138fa847fdde2618 Mon Sep 17 00:00:00 2001 From: Rok Mihevc Date: Tue, 6 Jan 2026 13:44:04 +0100 Subject: [PATCH 09/29] Allow gcc bug on windows --- .github/workflows/cpp.yml | 3 --- .pre-commit-config.yaml | 1 - ci/scripts/download_tz_database.sh | 33 ---------------------------- cpp/src/arrow/util/chrono_internal.h | 14 +++++++----- 4 files changed, 8 insertions(+), 43 deletions(-) delete mode 100755 ci/scripts/download_tz_database.sh diff --git a/.github/workflows/cpp.yml b/.github/workflows/cpp.yml index 6612308afc3..36476395593 100644 --- a/.github/workflows/cpp.yml +++ b/.github/workflows/cpp.yml @@ -348,9 +348,6 @@ jobs: with: fetch-depth: 0 submodules: recursive - - name: Download Timezone Database - shell: bash - run: ci/scripts/download_tz_database.sh - uses: msys2/setup-msys2@v2 with: msystem: ${{ matrix.msystem_upper }} diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index c4c4f04188d..340f166fc6d 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -302,7 +302,6 @@ repos: ?^ci/scripts/conan_build\.sh$| ?^ci/scripts/conan_setup\.sh$| ?^ci/scripts/cpp_test\.sh$| - ?^ci/scripts/download_tz_database\.sh$| ?^ci/scripts/install_azurite\.sh$| ?^ci/scripts/install_ccache\.sh$| ?^ci/scripts/install_ceph\.sh$| diff --git a/ci/scripts/download_tz_database.sh b/ci/scripts/download_tz_database.sh deleted file mode 100755 index 4fc9d857ea0..00000000000 --- a/ci/scripts/download_tz_database.sh +++ /dev/null @@ -1,33 +0,0 @@ -#!/usr/bin/env bash -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -# Downloads IANA timezone database for use with the vendored date library -# on Windows when not using MSVC (e.g., MinGW builds). - -set -ex - -# Download database -curl https://data.iana.org/time-zones/releases/tzdata2024b.tar.gz --output ~/Downloads/tzdata.tar.gz - -# Extract -mkdir -p ~/Downloads/tzdata -tar --extract --file ~/Downloads/tzdata.tar.gz --directory ~/Downloads/tzdata - -# Download Windows timezone mapping -curl https://raw.githubusercontent.com/unicode-org/cldr/master/common/supplemental/windowsZones.xml --output ~/Downloads/tzdata/windowsZones.xml diff --git a/cpp/src/arrow/util/chrono_internal.h b/cpp/src/arrow/util/chrono_internal.h index 986beb3f3a8..3c962f58cfb 100644 --- a/cpp/src/arrow/util/chrono_internal.h +++ b/cpp/src/arrow/util/chrono_internal.h @@ -36,14 +36,16 @@ // We only enable for compilers with FULL support (not partial) // https://en.cppreference.com/w/cpp/compiler_support/20.html#cpp_lib_chrono_201907L // -// MSVC 19.29+ (VS16.10+): Full C++20 chrono support, uses Windows internal TZ database. -// GCC libstdc++ has a bug where DST state is incorrectly reset when a timezone -// transitions between rule sets in tzdata.zi (e.g., Australia/Broken_Hill around -// 2000-02-29 23:23:24). -// Until this is fixed, we use the vendored date.h library for GCC. +// On non-Windows: GCC libstdc++ has a bug where DST state is incorrectly reset when +// a timezone transitions between rule sets (e.g., Australia/Broken_Hill around +// 2000-02-29). Until this is fixed, we use the vendored date.h library. // See: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=116110 +// +// On Windows: Use std::chrono which accesses Windows' internal timezone database, +// eliminating the need for users to install IANA tzdata separately. We tolerate +// the GCC bug here since Windows users are less likely to be using GCC. -#if defined(_MSC_VER) && defined(__cpp_lib_chrono) && __cpp_lib_chrono >= 201907L +#if defined(_WIN32) && defined(__cpp_lib_chrono) && __cpp_lib_chrono >= 201907L # define ARROW_USE_STD_CHRONO 1 #else # define ARROW_USE_STD_CHRONO 0 From dff653e7192df55b2d3f9fd317024a5b7595823d Mon Sep 17 00:00:00 2001 From: Rok Mihevc Date: Tue, 6 Jan 2026 13:52:14 +0100 Subject: [PATCH 10/29] Fix verify RC step --- .github/workflows/verify_rc.yml | 3 +++ .pre-commit-config.yaml | 1 + ci/scripts/download_tz_database.sh | 33 ++++++++++++++++++++++++++++++ dev/tasks/verify-rc/github.win.yml | 4 ++++ 4 files changed, 41 insertions(+) create mode 100755 ci/scripts/download_tz_database.sh diff --git a/.github/workflows/verify_rc.yml b/.github/workflows/verify_rc.yml index 04507cb3972..e88b8ca8f23 100644 --- a/.github/workflows/verify_rc.yml +++ b/.github/workflows/verify_rc.yml @@ -228,6 +228,9 @@ jobs: run: | choco install --no-progress --yes boost-msvc-14.1 choco install --no-progress --yes wget + - name: Download Timezone Database + shell: bash + run: ci/scripts/download_tz_database.sh - name: Run verification env: GH_TOKEN: ${{ github.token }} diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 340f166fc6d..c4c4f04188d 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -302,6 +302,7 @@ repos: ?^ci/scripts/conan_build\.sh$| ?^ci/scripts/conan_setup\.sh$| ?^ci/scripts/cpp_test\.sh$| + ?^ci/scripts/download_tz_database\.sh$| ?^ci/scripts/install_azurite\.sh$| ?^ci/scripts/install_ccache\.sh$| ?^ci/scripts/install_ceph\.sh$| diff --git a/ci/scripts/download_tz_database.sh b/ci/scripts/download_tz_database.sh new file mode 100755 index 00000000000..4fc9d857ea0 --- /dev/null +++ b/ci/scripts/download_tz_database.sh @@ -0,0 +1,33 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# Downloads IANA timezone database for use with the vendored date library +# on Windows when not using MSVC (e.g., MinGW builds). + +set -ex + +# Download database +curl https://data.iana.org/time-zones/releases/tzdata2024b.tar.gz --output ~/Downloads/tzdata.tar.gz + +# Extract +mkdir -p ~/Downloads/tzdata +tar --extract --file ~/Downloads/tzdata.tar.gz --directory ~/Downloads/tzdata + +# Download Windows timezone mapping +curl https://raw.githubusercontent.com/unicode-org/cldr/master/common/supplemental/windowsZones.xml --output ~/Downloads/tzdata/windowsZones.xml diff --git a/dev/tasks/verify-rc/github.win.yml b/dev/tasks/verify-rc/github.win.yml index d57d88d7e45..7d8c28d9315 100644 --- a/dev/tasks/verify-rc/github.win.yml +++ b/dev/tasks/verify-rc/github.win.yml @@ -40,6 +40,10 @@ jobs: choco install boost-msvc-14.1 choco install wget + - name: Download Timezone Database + shell: bash + run: arrow/ci/scripts/download_tz_database.sh + - name: Run verification shell: cmd run: | From 7bc13355495b1172ff340de31ffb9222c299d5cc Mon Sep 17 00:00:00 2001 From: Rok Mihevc Date: Tue, 6 Jan 2026 14:38:56 +0100 Subject: [PATCH 11/29] Fix R's tzdb --- c_glib/test/test-assume-timezone-options.rb | 1 + c_glib/test/test-day-of-week-options.rb | 2 ++ c_glib/test/test-strftime-options.rb | 1 + cpp/src/arrow/config.cc | 14 ++++++++++++++ cpp/src/arrow/config.h | 9 +++++++++ cpp/src/arrow/util/chrono_internal.h | 11 ++++++----- r/R/arrow-package.R | 19 +++++++++++++++++++ r/R/arrowExports.R | 4 ++++ r/src/arrowExports.cpp | 10 ++++++++++ r/src/config.cpp | 14 ++++++++++++++ 10 files changed, 80 insertions(+), 5 deletions(-) diff --git a/c_glib/test/test-assume-timezone-options.rb b/c_glib/test/test-assume-timezone-options.rb index d60935964d7..097efc0b04b 100644 --- a/c_glib/test/test-assume-timezone-options.rb +++ b/c_glib/test/test-assume-timezone-options.rb @@ -45,6 +45,7 @@ def test_nonexistent_property end def test_assume_timezone_function + omit("std::chrono not available on Windows MinGW") if Gem.win_platform? args = [ Arrow::ArrayDatum.new(build_timestamp_array(:milli, [1504953190000])), ] diff --git a/c_glib/test/test-day-of-week-options.rb b/c_glib/test/test-day-of-week-options.rb index d1c254d3780..85ac116c04d 100644 --- a/c_glib/test/test-day-of-week-options.rb +++ b/c_glib/test/test-day-of-week-options.rb @@ -39,6 +39,7 @@ def test_week_start_property end def test_day_of_week_function_with_count_from_zero_false + omit("std::chrono not available on Windows MinGW") if Gem.win_platform? args = [ # 2017-09-09T10:33:10Z (Saturday) Arrow::ArrayDatum.new(build_timestamp_array(:milli, [1504953190000])), @@ -50,6 +51,7 @@ def test_day_of_week_function_with_count_from_zero_false end def test_day_of_week_function_with_week_start + omit("std::chrono not available on Windows MinGW") if Gem.win_platform? args = [ # 2017-09-09T10:33:10Z (Saturday) Arrow::ArrayDatum.new(build_timestamp_array(:milli, [1504953190000])), diff --git a/c_glib/test/test-strftime-options.rb b/c_glib/test/test-strftime-options.rb index aafcca98e42..93c3e0a5dec 100644 --- a/c_glib/test/test-strftime-options.rb +++ b/c_glib/test/test-strftime-options.rb @@ -35,6 +35,7 @@ def test_locale_property end def test_strftime_function + omit("std::chrono not available on Windows MinGW") if Gem.win_platform? args = [ Arrow::ArrayDatum.new(build_timestamp_array(:milli, [1504953190854])), ] diff --git a/cpp/src/arrow/config.cc b/cpp/src/arrow/config.cc index e6b5707be32..90b8b95d929 100644 --- a/cpp/src/arrow/config.cc +++ b/cpp/src/arrow/config.cc @@ -78,4 +78,18 @@ RuntimeInfo GetRuntimeInfo() { return info; } +Status Initialize(const GlobalOptions& options) noexcept { + if (options.timezone_db_path.has_value()) { +#if !USE_OS_TZDB + try { + arrow_vendored::date::set_install(options.timezone_db_path.value()); + arrow_vendored::date::reload_tzdb(); + } catch (const std::runtime_error& e) { + return Status::IOError(e.what()); + } +#endif + } + return Status::OK(); +} + } // namespace arrow diff --git a/cpp/src/arrow/config.h b/cpp/src/arrow/config.h index 876fdbd484d..c3d027944be 100644 --- a/cpp/src/arrow/config.h +++ b/cpp/src/arrow/config.h @@ -79,4 +79,13 @@ const BuildInfo& GetBuildInfo(); ARROW_EXPORT RuntimeInfo GetRuntimeInfo(); +struct GlobalOptions { + /// Path to text timezone database. This is only used on Windows MinGW + /// builds where std::chrono timezone support is not available. + std::optional timezone_db_path; +}; + +ARROW_EXPORT +Status Initialize(const GlobalOptions& options) noexcept; + } // namespace arrow diff --git a/cpp/src/arrow/util/chrono_internal.h b/cpp/src/arrow/util/chrono_internal.h index 3c962f58cfb..5a86af459c8 100644 --- a/cpp/src/arrow/util/chrono_internal.h +++ b/cpp/src/arrow/util/chrono_internal.h @@ -33,17 +33,18 @@ #include // Feature detection for C++20 chrono timezone support -// We only enable for compilers with FULL support (not partial) // https://en.cppreference.com/w/cpp/compiler_support/20.html#cpp_lib_chrono_201907L // +// On Windows with MSVC: std::chrono uses Windows' internal timezone database, +// eliminating the need for users to install IANA tzdata separately. +// +// On Windows with MinGW/GCC: libstdc++ reads tzdata files via TZDIR env var. +// The tzdata files must be provided (e.g., via the tzdb R package). +// // On non-Windows: GCC libstdc++ has a bug where DST state is incorrectly reset when // a timezone transitions between rule sets (e.g., Australia/Broken_Hill around // 2000-02-29). Until this is fixed, we use the vendored date.h library. // See: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=116110 -// -// On Windows: Use std::chrono which accesses Windows' internal timezone database, -// eliminating the need for users to install IANA tzdata separately. We tolerate -// the GCC bug here since Windows users are less likely to be using GCC. #if defined(_WIN32) && defined(__cpp_lib_chrono) && __cpp_lib_chrono >= 201907L # define ARROW_USE_STD_CHRONO 1 diff --git a/r/R/arrow-package.R b/r/R/arrow-package.R index 9e0bfe77974..1c9d2804f30 100644 --- a/r/R/arrow-package.R +++ b/r/R/arrow-package.R @@ -152,6 +152,9 @@ s3_finalizer <- new.env(parent = emptyenv()) # Disable multithreading on Windows # See https://issues.apache.org/jira/browse/ARROW-8379 options(arrow.use_threads = FALSE) + + # Try to set timezone database for MinGW builds + configure_tzdb() } # Set interrupt handlers @@ -168,6 +171,22 @@ s3_finalizer <- new.env(parent = emptyenv()) invisible() } +configure_tzdb <- function() { + # This is needed on Windows MinGW builds where std::chrono timezone support + # is not available (older GCC versions). The tzdb R package provides the + # IANA timezone database. + if (requireNamespace("tzdb", quietly = TRUE)) { + tzdb::tzdb_initialize() + set_timezone_database(tzdb::tzdb_path("text")) + } else { + msg <- paste( + "The tzdb package is not installed.", + "Timezones will not be available to Arrow compute functions." + ) + packageStartupMessage(msg) + } +} + .onAttach <- function(libname, pkgname) { # Just to be extra safe, let's wrap this in a try(); # we don't want a failed startup message to prevent the package from loading diff --git a/r/R/arrowExports.R b/r/R/arrowExports.R index 3f4d9aa4a87..a8387526b25 100644 --- a/r/R/arrowExports.R +++ b/r/R/arrowExports.R @@ -552,6 +552,10 @@ runtime_info <- function() { .Call(`_arrow_runtime_info`) } +set_timezone_database <- function(path) { + invisible(.Call(`_arrow_set_timezone_database`, path)) +} + csv___WriteOptions__initialize <- function(options) { .Call(`_arrow_csv___WriteOptions__initialize`, options) } diff --git a/r/src/arrowExports.cpp b/r/src/arrowExports.cpp index bcf351c120f..0fa62edc86c 100644 --- a/r/src/arrowExports.cpp +++ b/r/src/arrowExports.cpp @@ -1382,6 +1382,15 @@ BEGIN_CPP11 return cpp11::as_sexp(runtime_info()); END_CPP11 } +// config.cpp +void set_timezone_database(cpp11::strings path); +extern "C" SEXP _arrow_set_timezone_database(SEXP path_sexp){ +BEGIN_CPP11 + arrow::r::Input::type path(path_sexp); + set_timezone_database(path); + return R_NilValue; +END_CPP11 +} // csv.cpp std::shared_ptr csv___WriteOptions__initialize(cpp11::list options); extern "C" SEXP _arrow_csv___WriteOptions__initialize(SEXP options_sexp){ @@ -5836,6 +5845,7 @@ static const R_CallMethodDef CallEntries[] = { { "_arrow_RegisterScalarUDF", (DL_FUNC) &_arrow_RegisterScalarUDF, 2}, { "_arrow_build_info", (DL_FUNC) &_arrow_build_info, 0}, { "_arrow_runtime_info", (DL_FUNC) &_arrow_runtime_info, 0}, + { "_arrow_set_timezone_database", (DL_FUNC) &_arrow_set_timezone_database, 1}, { "_arrow_csv___WriteOptions__initialize", (DL_FUNC) &_arrow_csv___WriteOptions__initialize, 1}, { "_arrow_csv___ReadOptions__initialize", (DL_FUNC) &_arrow_csv___ReadOptions__initialize, 1}, { "_arrow_csv___ParseOptions__initialize", (DL_FUNC) &_arrow_csv___ParseOptions__initialize, 1}, diff --git a/r/src/config.cpp b/r/src/config.cpp index 1855f96ac6a..a45df73a64a 100644 --- a/r/src/config.cpp +++ b/r/src/config.cpp @@ -17,6 +17,8 @@ #include "./arrow_types.h" +#include + #include // [[arrow::export]] @@ -31,3 +33,15 @@ std::vector runtime_info() { auto info = arrow::GetRuntimeInfo(); return {info.simd_level, info.detected_simd_level}; } + +// [[arrow::export]] +void set_timezone_database(cpp11::strings path) { + auto paths = cpp11::as_cpp>(path); + if (path.size() != 1) { + cpp11::stop("Must provide a single path to the timezone database."); + } + + arrow::GlobalOptions options; + options.timezone_db_path = std::make_optional(paths[0]); + arrow::StopIfNotOk(arrow::Initialize(options)); +} From 38b93310702293a0fdaf906672139e3d2c99f0f3 Mon Sep 17 00:00:00 2001 From: Rok Mihevc Date: Tue, 6 Jan 2026 18:27:51 +0100 Subject: [PATCH 12/29] Skip failing tests (due to gcc bug) --- .../compute/kernels/scalar_temporal_test.cc | 30 +++++++++++++++++ cpp/src/arrow/config.cc | 2 ++ cpp/src/arrow/config.h | 4 +++ python/pyarrow/tests/test_compute.py | 32 ++++++++++++++++--- r/R/arrow-package.R | 4 +++ r/src/config.cpp | 2 ++ 6 files changed, 70 insertions(+), 4 deletions(-) diff --git a/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc b/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc index c2257c80e65..0161610d92a 100644 --- a/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc +++ b/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc @@ -709,6 +709,12 @@ TEST_F(ScalarTemporalTest, TestIsLeapYear) { } TEST_F(ScalarTemporalTest, TestZoned1) { + // TODO(GH-48743): Re-enable when GCC bug is fixed + // https://github.com/apache/arrow/issues/48743 + // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=116110 +#if defined(_WIN32) && !defined(_MSC_VER) + GTEST_SKIP() << "Test triggers GCC bug TODO(GH-48743)."; +#endif std::vector timezones = {"Pacific/Marquesas", "-09:30"}; for (const auto& timezone : timezones) { auto unit = timestamp(TimeUnit::NANO, timezone); @@ -807,6 +813,12 @@ TEST_F(ScalarTemporalTest, TestZoned1) { } TEST_F(ScalarTemporalTest, TestZoned2) { + // TODO(GH-48743): Re-enable when GCC bug is fixed + // https://github.com/apache/arrow/issues/48743 + // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=116110 +#if defined(_WIN32) && !defined(_MSC_VER) + GTEST_SKIP() << "Test triggers GCC bug TODO(GH-48743)."; +#endif for (auto u : TimeUnit::values()) { auto unit = timestamp(u, "Australia/Broken_Hill"); auto month = "[1, 3, 1, 5, 1, 12, 12, 12, 1, 1, 1, 1, 12, 12, 12, 1, null]"; @@ -2768,6 +2780,12 @@ TEST_F(ScalarTemporalTestMultipleSinceGreaterUnit, CeilUTC) { } TEST_F(ScalarTemporalTestMultipleSinceGreaterUnit, CeilZoned) { + // TODO(GH-48743): Re-enable when GCC bug is fixed + // https://github.com/apache/arrow/issues/48743 + // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=116110 +#if defined(_WIN32) && !defined(_MSC_VER) + GTEST_SKIP() << "Test triggers GCC bug TODO(GH-48743)."; +#endif std::string op = "ceil_temporal"; // Data for tests below was generated via lubridate with the exception @@ -3158,6 +3176,12 @@ TEST_F(ScalarTemporalTestMultipleSinceGreaterUnit, FloorUTC) { } TEST_F(ScalarTemporalTestMultipleSinceGreaterUnit, FloorZoned) { + // TODO(GH-48743): Re-enable when GCC bug is fixed + // https://github.com/apache/arrow/issues/48743 + // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=116110 +#if defined(_WIN32) && !defined(_MSC_VER) + GTEST_SKIP() << "Test triggers GCC bug TODO(GH-48743)."; +#endif std::string op = "floor_temporal"; // Data for tests below was generated via lubridate with the exception @@ -3591,6 +3615,12 @@ TEST_F(ScalarTemporalTestMultipleSinceGreaterUnit, RoundUTC) { } TEST_F(ScalarTemporalTestMultipleSinceGreaterUnit, RoundZoned) { + // TODO(GH-48743): Re-enable when GCC bug is fixed + // https://github.com/apache/arrow/issues/48743 + // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=116110 +#if defined(_WIN32) && !defined(_MSC_VER) + GTEST_SKIP() << "Test triggers GCC bug TODO(GH-48743)."; +#endif std::string op = "round_temporal"; // Data for tests below was generated via lubridate with the exception diff --git a/cpp/src/arrow/config.cc b/cpp/src/arrow/config.cc index 90b8b95d929..b2f7a385e38 100644 --- a/cpp/src/arrow/config.cc +++ b/cpp/src/arrow/config.cc @@ -78,6 +78,8 @@ RuntimeInfo GetRuntimeInfo() { return info; } +// TODO(GH-48743): Remove when RTools upgrades to GCC with std::chrono timezone support +// https://github.com/apache/arrow/issues/48743 Status Initialize(const GlobalOptions& options) noexcept { if (options.timezone_db_path.has_value()) { #if !USE_OS_TZDB diff --git a/cpp/src/arrow/config.h b/cpp/src/arrow/config.h index c3d027944be..9fb1710cc23 100644 --- a/cpp/src/arrow/config.h +++ b/cpp/src/arrow/config.h @@ -79,12 +79,16 @@ const BuildInfo& GetBuildInfo(); ARROW_EXPORT RuntimeInfo GetRuntimeInfo(); +// TODO(GH-48743): Remove when RTools upgrades to GCC with std::chrono timezone support +// https://github.com/apache/arrow/issues/48743 struct GlobalOptions { /// Path to text timezone database. This is only used on Windows MinGW /// builds where std::chrono timezone support is not available. std::optional timezone_db_path; }; +// TODO(GH-48743): Remove when RTools upgrades to GCC with std::chrono timezone support +// https://github.com/apache/arrow/issues/48743 ARROW_EXPORT Status Initialize(const GlobalOptions& options) noexcept; diff --git a/python/pyarrow/tests/test_compute.py b/python/pyarrow/tests/test_compute.py index 713ea22b6fb..3e4f92eb5f5 100644 --- a/python/pyarrow/tests/test_compute.py +++ b/python/pyarrow/tests/test_compute.py @@ -2381,9 +2381,18 @@ def test_strftime(): for fmt in formats: options = pc.StrftimeOptions(fmt) result = pc.strftime(tsa, options=options) - # cast to the same type as result to ignore string vs large_string expected = pa.array(ts.strftime(fmt)).cast(result.type) - assert result.equals(expected) + if sys.platform == "win32" and fmt == "%Z": + # TODO(GH-48743): On Windows, std::chrono returns GMT + # offset style (e.g. "GMT+1") instead of timezone + # abbreviations (e.g. "CET") + # https://github.com/apache/arrow/issues/48743 + # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=116110 + for val in result: + assert val.as_py() is None or val.as_py().startswith("GMT") \ + or val.as_py() == "UTC" + else: + assert result.equals(expected) fmt = "%Y-%m-%dT%H:%M:%S" @@ -2397,7 +2406,15 @@ def test_strftime(): tsa = pa.array(ts, type=pa.timestamp("s", timezone)) result = pc.strftime(tsa, options=pc.StrftimeOptions(fmt + "%Z")) expected = pa.array(ts.strftime(fmt + "%Z")).cast(result.type) - assert result.equals(expected) + if sys.platform == "win32": + # TODO(GH-48743): On Windows, std::chrono returns GMT offset style + # https://github.com/apache/arrow/issues/48743 + # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=116110 + for val in result: + assert val.as_py() is None or "GMT" in val.as_py() \ + or "UTC" in val.as_py() + else: + assert result.equals(expected) # Pandas %S is equivalent to %S in arrow for unit="s" tsa = pa.array(ts, type=pa.timestamp("s", timezone)) @@ -2614,7 +2631,9 @@ def test_assume_timezone(): pc.assume_timezone(ta_zoned, options=options) invalid_options = pc.AssumeTimezoneOptions("Europe/Brusselsss") - with pytest.raises(ValueError, match="not found in timezone database"): + with pytest.raises(ValueError, + match="not found in timezone database|" + "unable to locate time_zone"): pc.assume_timezone(ta, options=invalid_options) timezone = "Europe/Brussels" @@ -2769,6 +2788,11 @@ def _check_temporal_rounding(ts, values, unit): np.testing.assert_array_equal(result, expected) +# TODO(GH-48743): Re-enable when GCC bug is fixed +# https://github.com/apache/arrow/issues/48743 +# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=116110 +@pytest.mark.skipif(sys.platform == 'win32', + reason="Test triggers GCC timezone bug on Windows") @pytest.mark.timezone_data @pytest.mark.parametrize('unit', ("nanosecond", "microsecond", "millisecond", "second", "minute", "hour", "day")) diff --git a/r/R/arrow-package.R b/r/R/arrow-package.R index 1c9d2804f30..37962035798 100644 --- a/r/R/arrow-package.R +++ b/r/R/arrow-package.R @@ -153,6 +153,8 @@ s3_finalizer <- new.env(parent = emptyenv()) # See https://issues.apache.org/jira/browse/ARROW-8379 options(arrow.use_threads = FALSE) + # TODO(GH-48743): Remove when RTools upgrades to GCC with std::chrono timezone support + # https://github.com/apache/arrow/issues/48743 # Try to set timezone database for MinGW builds configure_tzdb() } @@ -171,6 +173,8 @@ s3_finalizer <- new.env(parent = emptyenv()) invisible() } +# TODO(GH-48743): Remove when RTools upgrades to GCC with std::chrono timezone support +# https://github.com/apache/arrow/issues/48743 configure_tzdb <- function() { # This is needed on Windows MinGW builds where std::chrono timezone support # is not available (older GCC versions). The tzdb R package provides the diff --git a/r/src/config.cpp b/r/src/config.cpp index a45df73a64a..3cef8319a0e 100644 --- a/r/src/config.cpp +++ b/r/src/config.cpp @@ -34,6 +34,8 @@ std::vector runtime_info() { return {info.simd_level, info.detected_simd_level}; } +// TODO(GH-48743): Remove when RTools upgrades to GCC with std::chrono timezone support +// https://github.com/apache/arrow/issues/48743 // [[arrow::export]] void set_timezone_database(cpp11::strings path) { auto paths = cpp11::as_cpp>(path); From 9ceb057b670dfc9d1deac009a999500649c8b17d Mon Sep 17 00:00:00 2001 From: Rok Mihevc Date: Tue, 6 Jan 2026 20:31:58 +0100 Subject: [PATCH 13/29] add mingw tzdata --- .github/workflows/cpp.yml | 4 ++++ ci/scripts/msys2_setup.sh | 1 + cpp/src/arrow/compute/kernels/scalar_cast_test.cc | 5 +++++ 3 files changed, 10 insertions(+) diff --git a/.github/workflows/cpp.yml b/.github/workflows/cpp.yml index 36476395593..6301c374b34 100644 --- a/.github/workflows/cpp.yml +++ b/.github/workflows/cpp.yml @@ -388,5 +388,9 @@ jobs: ci/scripts/install_gcs_testbench.sh default - name: Test shell: msys2 {0} + env: + # TODO(GH-48743): TZDIR is needed for libstdc++ std::chrono timezone support + # https://github.com/apache/arrow/issues/48743 + TZDIR: /usr/share/zoneinfo run: | ci/scripts/cpp_test.sh "$(pwd)" "$(pwd)/build" diff --git a/ci/scripts/msys2_setup.sh b/ci/scripts/msys2_setup.sh index b4634070a87..cc234aa165c 100755 --- a/ci/scripts/msys2_setup.sh +++ b/ci/scripts/msys2_setup.sh @@ -48,6 +48,7 @@ case "${target}" in packages+=("${MINGW_PACKAGE_PREFIX}-snappy") packages+=("${MINGW_PACKAGE_PREFIX}-sqlite3") packages+=("${MINGW_PACKAGE_PREFIX}-thrift") + packages+=("${MINGW_PACKAGE_PREFIX}-tzdata") packages+=("${MINGW_PACKAGE_PREFIX}-xsimd") packages+=("${MINGW_PACKAGE_PREFIX}-uriparser") packages+=("${MINGW_PACKAGE_PREFIX}-zstd") diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_test.cc b/cpp/src/arrow/compute/kernels/scalar_cast_test.cc index e6f9cd357bf..5bbfc7268af 100644 --- a/cpp/src/arrow/compute/kernels/scalar_cast_test.cc +++ b/cpp/src/arrow/compute/kernels/scalar_cast_test.cc @@ -2396,6 +2396,11 @@ TEST(Cast, TimestampToDate) { } TEST_F(CastTimezone, ZonedTimestampToDate) { + // TODO(GH-48743): Re-enable when GCC bug is fixed or tzdata is available + // https://github.com/apache/arrow/issues/48743 +#if defined(_WIN32) && !defined(_MSC_VER) + GTEST_SKIP() << "Timezone database not available on Windows MinGW (GH-48743)."; +#endif { // See TestZoned in scalar_temporal_test.cc auto timestamps = From 02f4514ff4256dafa00dcb08b7a2ca75e99d3a10 Mon Sep 17 00:00:00 2001 From: Rok Mihevc Date: Tue, 6 Jan 2026 21:55:38 +0100 Subject: [PATCH 14/29] enable std::chrono for mingw --- .../arrow/compute/kernels/scalar_cast_test.cc | 5 ---- .../compute/kernels/scalar_temporal_test.cc | 30 ------------------- cpp/src/arrow/util/chrono_internal.h | 11 +++++-- 3 files changed, 8 insertions(+), 38 deletions(-) diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_test.cc b/cpp/src/arrow/compute/kernels/scalar_cast_test.cc index 5bbfc7268af..e6f9cd357bf 100644 --- a/cpp/src/arrow/compute/kernels/scalar_cast_test.cc +++ b/cpp/src/arrow/compute/kernels/scalar_cast_test.cc @@ -2396,11 +2396,6 @@ TEST(Cast, TimestampToDate) { } TEST_F(CastTimezone, ZonedTimestampToDate) { - // TODO(GH-48743): Re-enable when GCC bug is fixed or tzdata is available - // https://github.com/apache/arrow/issues/48743 -#if defined(_WIN32) && !defined(_MSC_VER) - GTEST_SKIP() << "Timezone database not available on Windows MinGW (GH-48743)."; -#endif { // See TestZoned in scalar_temporal_test.cc auto timestamps = diff --git a/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc b/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc index 0161610d92a..c2257c80e65 100644 --- a/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc +++ b/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc @@ -709,12 +709,6 @@ TEST_F(ScalarTemporalTest, TestIsLeapYear) { } TEST_F(ScalarTemporalTest, TestZoned1) { - // TODO(GH-48743): Re-enable when GCC bug is fixed - // https://github.com/apache/arrow/issues/48743 - // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=116110 -#if defined(_WIN32) && !defined(_MSC_VER) - GTEST_SKIP() << "Test triggers GCC bug TODO(GH-48743)."; -#endif std::vector timezones = {"Pacific/Marquesas", "-09:30"}; for (const auto& timezone : timezones) { auto unit = timestamp(TimeUnit::NANO, timezone); @@ -813,12 +807,6 @@ TEST_F(ScalarTemporalTest, TestZoned1) { } TEST_F(ScalarTemporalTest, TestZoned2) { - // TODO(GH-48743): Re-enable when GCC bug is fixed - // https://github.com/apache/arrow/issues/48743 - // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=116110 -#if defined(_WIN32) && !defined(_MSC_VER) - GTEST_SKIP() << "Test triggers GCC bug TODO(GH-48743)."; -#endif for (auto u : TimeUnit::values()) { auto unit = timestamp(u, "Australia/Broken_Hill"); auto month = "[1, 3, 1, 5, 1, 12, 12, 12, 1, 1, 1, 1, 12, 12, 12, 1, null]"; @@ -2780,12 +2768,6 @@ TEST_F(ScalarTemporalTestMultipleSinceGreaterUnit, CeilUTC) { } TEST_F(ScalarTemporalTestMultipleSinceGreaterUnit, CeilZoned) { - // TODO(GH-48743): Re-enable when GCC bug is fixed - // https://github.com/apache/arrow/issues/48743 - // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=116110 -#if defined(_WIN32) && !defined(_MSC_VER) - GTEST_SKIP() << "Test triggers GCC bug TODO(GH-48743)."; -#endif std::string op = "ceil_temporal"; // Data for tests below was generated via lubridate with the exception @@ -3176,12 +3158,6 @@ TEST_F(ScalarTemporalTestMultipleSinceGreaterUnit, FloorUTC) { } TEST_F(ScalarTemporalTestMultipleSinceGreaterUnit, FloorZoned) { - // TODO(GH-48743): Re-enable when GCC bug is fixed - // https://github.com/apache/arrow/issues/48743 - // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=116110 -#if defined(_WIN32) && !defined(_MSC_VER) - GTEST_SKIP() << "Test triggers GCC bug TODO(GH-48743)."; -#endif std::string op = "floor_temporal"; // Data for tests below was generated via lubridate with the exception @@ -3615,12 +3591,6 @@ TEST_F(ScalarTemporalTestMultipleSinceGreaterUnit, RoundUTC) { } TEST_F(ScalarTemporalTestMultipleSinceGreaterUnit, RoundZoned) { - // TODO(GH-48743): Re-enable when GCC bug is fixed - // https://github.com/apache/arrow/issues/48743 - // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=116110 -#if defined(_WIN32) && !defined(_MSC_VER) - GTEST_SKIP() << "Test triggers GCC bug TODO(GH-48743)."; -#endif std::string op = "round_temporal"; // Data for tests below was generated via lubridate with the exception diff --git a/cpp/src/arrow/util/chrono_internal.h b/cpp/src/arrow/util/chrono_internal.h index 5a86af459c8..26bd99f7a1f 100644 --- a/cpp/src/arrow/util/chrono_internal.h +++ b/cpp/src/arrow/util/chrono_internal.h @@ -39,15 +39,20 @@ // eliminating the need for users to install IANA tzdata separately. // // On Windows with MinGW/GCC: libstdc++ reads tzdata files via TZDIR env var. -// The tzdata files must be provided (e.g., via the tzdb R package). +// Set TZDIR=/usr/share/zoneinfo to use the system tzdata. // // On non-Windows: GCC libstdc++ has a bug where DST state is incorrectly reset when // a timezone transitions between rule sets (e.g., Australia/Broken_Hill around // 2000-02-29). Until this is fixed, we use the vendored date.h library. // See: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=116110 -#if defined(_WIN32) && defined(__cpp_lib_chrono) && __cpp_lib_chrono >= 201907L -# define ARROW_USE_STD_CHRONO 1 +#if defined(_WIN32) +// On Windows, use std::chrono if available (MSVC or MinGW with C++20 support) +# if defined(_MSC_VER) || (defined(__cpp_lib_chrono) && __cpp_lib_chrono >= 201907L) +# define ARROW_USE_STD_CHRONO 1 +# else +# define ARROW_USE_STD_CHRONO 0 +# endif #else # define ARROW_USE_STD_CHRONO 0 #endif From 9ded81f5f7405693b3d601d30b48ce2aa483973c Mon Sep 17 00:00:00 2001 From: Rok Mihevc Date: Tue, 6 Jan 2026 22:23:10 +0100 Subject: [PATCH 15/29] reenable downloading of tzdb for clang64 with mingw on windows --- .github/workflows/cpp.yml | 9 +++++++ .../compute/kernels/scalar_temporal_test.cc | 25 +++++++++++++++++++ 2 files changed, 34 insertions(+) diff --git a/.github/workflows/cpp.yml b/.github/workflows/cpp.yml index 6301c374b34..d7a44c90dd2 100644 --- a/.github/workflows/cpp.yml +++ b/.github/workflows/cpp.yml @@ -386,6 +386,15 @@ jobs: PIPX_BASE_PYTHON: ${{ steps.python-install.outputs.python-path }} run: | ci/scripts/install_gcs_testbench.sh default + - name: Download tzdata for vendored date library (Clang64) + if: matrix.msystem_upper == 'CLANG64' + shell: msys2 {0} + run: | + # TODO(GH-48743): Clang64 uses vendored date library which needs tzdata + # https://github.com/apache/arrow/issues/48743 + mkdir -p /c/Users/runneradmin/Downloads/tzdata + curl -sL https://data.iana.org/time-zones/releases/tzdata2024b.tar.gz | \ + tar -xz -C /c/Users/runneradmin/Downloads/tzdata - name: Test shell: msys2 {0} env: diff --git a/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc b/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc index c2257c80e65..49ea35621e7 100644 --- a/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc +++ b/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc @@ -709,6 +709,11 @@ TEST_F(ScalarTemporalTest, TestIsLeapYear) { } TEST_F(ScalarTemporalTest, TestZoned1) { + // TODO(GH-48743): GCC libstdc++ has a bug with DST transitions + // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=116110 +#if defined(_WIN32) && defined(__GNUC__) && !defined(__clang__) + GTEST_SKIP() << "Test triggers GCC libstdc++ bug (GH-48743)."; +#endif std::vector timezones = {"Pacific/Marquesas", "-09:30"}; for (const auto& timezone : timezones) { auto unit = timestamp(TimeUnit::NANO, timezone); @@ -807,6 +812,11 @@ TEST_F(ScalarTemporalTest, TestZoned1) { } TEST_F(ScalarTemporalTest, TestZoned2) { + // TODO(GH-48743): GCC libstdc++ has a bug with DST transitions + // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=116110 +#if defined(_WIN32) && defined(__GNUC__) && !defined(__clang__) + GTEST_SKIP() << "Test triggers GCC libstdc++ bug (GH-48743)."; +#endif for (auto u : TimeUnit::values()) { auto unit = timestamp(u, "Australia/Broken_Hill"); auto month = "[1, 3, 1, 5, 1, 12, 12, 12, 1, 1, 1, 1, 12, 12, 12, 1, null]"; @@ -2768,6 +2778,11 @@ TEST_F(ScalarTemporalTestMultipleSinceGreaterUnit, CeilUTC) { } TEST_F(ScalarTemporalTestMultipleSinceGreaterUnit, CeilZoned) { + // TODO(GH-48743): GCC libstdc++ has a bug with DST transitions + // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=116110 +#if defined(_WIN32) && defined(__GNUC__) && !defined(__clang__) + GTEST_SKIP() << "Test triggers GCC libstdc++ bug (GH-48743)."; +#endif std::string op = "ceil_temporal"; // Data for tests below was generated via lubridate with the exception @@ -3158,6 +3173,11 @@ TEST_F(ScalarTemporalTestMultipleSinceGreaterUnit, FloorUTC) { } TEST_F(ScalarTemporalTestMultipleSinceGreaterUnit, FloorZoned) { + // TODO(GH-48743): GCC libstdc++ has a bug with DST transitions + // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=116110 +#if defined(_WIN32) && defined(__GNUC__) && !defined(__clang__) + GTEST_SKIP() << "Test triggers GCC libstdc++ bug (GH-48743)."; +#endif std::string op = "floor_temporal"; // Data for tests below was generated via lubridate with the exception @@ -3591,6 +3611,11 @@ TEST_F(ScalarTemporalTestMultipleSinceGreaterUnit, RoundUTC) { } TEST_F(ScalarTemporalTestMultipleSinceGreaterUnit, RoundZoned) { + // TODO(GH-48743): GCC libstdc++ has a bug with DST transitions + // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=116110 +#if defined(_WIN32) && defined(__GNUC__) && !defined(__clang__) + GTEST_SKIP() << "Test triggers GCC libstdc++ bug (GH-48743)."; +#endif std::string op = "round_temporal"; // Data for tests below was generated via lubridate with the exception From 76b6f55afc9688f29eeb126d4e211ed4b69deedc Mon Sep 17 00:00:00 2001 From: Rok Mihevc Date: Tue, 6 Jan 2026 22:47:16 +0100 Subject: [PATCH 16/29] download windowsZones.xml, skip CastTimezone.ZonedTimestampToTime --- .github/workflows/cpp.yml | 3 +++ cpp/src/arrow/compute/kernels/scalar_cast_test.cc | 5 +++++ 2 files changed, 8 insertions(+) diff --git a/.github/workflows/cpp.yml b/.github/workflows/cpp.yml index d7a44c90dd2..32bf7bb5b60 100644 --- a/.github/workflows/cpp.yml +++ b/.github/workflows/cpp.yml @@ -395,6 +395,9 @@ jobs: mkdir -p /c/Users/runneradmin/Downloads/tzdata curl -sL https://data.iana.org/time-zones/releases/tzdata2024b.tar.gz | \ tar -xz -C /c/Users/runneradmin/Downloads/tzdata + # Also need windowsZones.xml from Unicode CLDR for Windows timezone mapping + curl -sL -o /c/Users/runneradmin/Downloads/tzdata/windowsZones.xml \ + https://raw.githubusercontent.com/unicode-org/cldr/main/common/supplemental/windowsZones.xml - name: Test shell: msys2 {0} env: diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_test.cc b/cpp/src/arrow/compute/kernels/scalar_cast_test.cc index e6f9cd357bf..4ff58040e05 100644 --- a/cpp/src/arrow/compute/kernels/scalar_cast_test.cc +++ b/cpp/src/arrow/compute/kernels/scalar_cast_test.cc @@ -2587,6 +2587,11 @@ TEST(Cast, TimestampToTime) { } TEST_F(CastTimezone, ZonedTimestampToTime) { + // TODO(GH-48743): GCC libstdc++ has a bug with DST transitions + // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=116110 +#if defined(_WIN32) && defined(__GNUC__) && !defined(__clang__) + GTEST_SKIP() << "Test triggers GCC libstdc++ bug (GH-48743)."; +#endif CheckCast(ArrayFromJSON(timestamp(TimeUnit::NANO, "Pacific/Marquesas"), kTimestampJson), ArrayFromJSON(time64(TimeUnit::NANO), R"([ 52259123456789, 50003999999999, 56480001001001, 65000000000000, From f77fc2d5e6ae2a2e7be03694ff283874e84ed5f1 Mon Sep 17 00:00:00 2001 From: Rok Mihevc Date: Tue, 6 Jan 2026 23:13:12 +0100 Subject: [PATCH 17/29] experiment --- .github/workflows/cpp.yml | 24 ++++++++++----------- c_glib/test/test-assume-timezone-options.rb | 1 - c_glib/test/test-day-of-week-options.rb | 2 -- c_glib/test/test-strftime-options.rb | 1 - 4 files changed, 12 insertions(+), 16 deletions(-) diff --git a/.github/workflows/cpp.yml b/.github/workflows/cpp.yml index 32bf7bb5b60..862d249ca15 100644 --- a/.github/workflows/cpp.yml +++ b/.github/workflows/cpp.yml @@ -366,6 +366,18 @@ jobs: run: | export CMAKE_BUILD_PARALLEL_LEVEL=$NUMBER_OF_PROCESSORS ci/scripts/cpp_build.sh "$(pwd)" "$(pwd)/build" + - name: Download Timezone Database for vendored date library (Clang64) + if: matrix.msystem_upper == 'CLANG64' + shell: msys2 {0} + run: | + # TODO(GH-48743): Clang64 uses vendored date library which needs tzdata + # https://github.com/apache/arrow/issues/48743 + mkdir -p /c/Users/runneradmin/Downloads/tzdata + curl -sL https://data.iana.org/time-zones/releases/tzdata2024b.tar.gz | \ + tar -xz -C /c/Users/runneradmin/Downloads/tzdata + # Also need windowsZones.xml from Unicode CLDR for Windows timezone mapping + curl -sL -o /c/Users/runneradmin/Downloads/tzdata/windowsZones.xml \ + https://raw.githubusercontent.com/unicode-org/cldr/main/common/supplemental/windowsZones.xml - name: Download MinIO shell: msys2 {0} run: | @@ -386,18 +398,6 @@ jobs: PIPX_BASE_PYTHON: ${{ steps.python-install.outputs.python-path }} run: | ci/scripts/install_gcs_testbench.sh default - - name: Download tzdata for vendored date library (Clang64) - if: matrix.msystem_upper == 'CLANG64' - shell: msys2 {0} - run: | - # TODO(GH-48743): Clang64 uses vendored date library which needs tzdata - # https://github.com/apache/arrow/issues/48743 - mkdir -p /c/Users/runneradmin/Downloads/tzdata - curl -sL https://data.iana.org/time-zones/releases/tzdata2024b.tar.gz | \ - tar -xz -C /c/Users/runneradmin/Downloads/tzdata - # Also need windowsZones.xml from Unicode CLDR for Windows timezone mapping - curl -sL -o /c/Users/runneradmin/Downloads/tzdata/windowsZones.xml \ - https://raw.githubusercontent.com/unicode-org/cldr/main/common/supplemental/windowsZones.xml - name: Test shell: msys2 {0} env: diff --git a/c_glib/test/test-assume-timezone-options.rb b/c_glib/test/test-assume-timezone-options.rb index 097efc0b04b..d60935964d7 100644 --- a/c_glib/test/test-assume-timezone-options.rb +++ b/c_glib/test/test-assume-timezone-options.rb @@ -45,7 +45,6 @@ def test_nonexistent_property end def test_assume_timezone_function - omit("std::chrono not available on Windows MinGW") if Gem.win_platform? args = [ Arrow::ArrayDatum.new(build_timestamp_array(:milli, [1504953190000])), ] diff --git a/c_glib/test/test-day-of-week-options.rb b/c_glib/test/test-day-of-week-options.rb index 85ac116c04d..d1c254d3780 100644 --- a/c_glib/test/test-day-of-week-options.rb +++ b/c_glib/test/test-day-of-week-options.rb @@ -39,7 +39,6 @@ def test_week_start_property end def test_day_of_week_function_with_count_from_zero_false - omit("std::chrono not available on Windows MinGW") if Gem.win_platform? args = [ # 2017-09-09T10:33:10Z (Saturday) Arrow::ArrayDatum.new(build_timestamp_array(:milli, [1504953190000])), @@ -51,7 +50,6 @@ def test_day_of_week_function_with_count_from_zero_false end def test_day_of_week_function_with_week_start - omit("std::chrono not available on Windows MinGW") if Gem.win_platform? args = [ # 2017-09-09T10:33:10Z (Saturday) Arrow::ArrayDatum.new(build_timestamp_array(:milli, [1504953190000])), diff --git a/c_glib/test/test-strftime-options.rb b/c_glib/test/test-strftime-options.rb index 93c3e0a5dec..aafcca98e42 100644 --- a/c_glib/test/test-strftime-options.rb +++ b/c_glib/test/test-strftime-options.rb @@ -35,7 +35,6 @@ def test_locale_property end def test_strftime_function - omit("std::chrono not available on Windows MinGW") if Gem.win_platform? args = [ Arrow::ArrayDatum.new(build_timestamp_array(:milli, [1504953190854])), ] From 1e6e34e34e83aa3edfd718942ae5d75d460ec4fc Mon Sep 17 00:00:00 2001 From: Rok Mihevc Date: Tue, 6 Jan 2026 23:30:32 +0100 Subject: [PATCH 18/29] work --- .github/workflows/cpp.yml | 11 +++-------- c_glib/test/test-assume-timezone-options.rb | 1 + c_glib/test/test-day-of-week-options.rb | 2 ++ c_glib/test/test-strftime-options.rb | 1 + ci/scripts/download_tz_database.sh | 3 --- r/src/arrowExports.cpp | 6 +++--- 6 files changed, 10 insertions(+), 14 deletions(-) diff --git a/.github/workflows/cpp.yml b/.github/workflows/cpp.yml index 862d249ca15..79305f2d153 100644 --- a/.github/workflows/cpp.yml +++ b/.github/workflows/cpp.yml @@ -366,18 +366,13 @@ jobs: run: | export CMAKE_BUILD_PARALLEL_LEVEL=$NUMBER_OF_PROCESSORS ci/scripts/cpp_build.sh "$(pwd)" "$(pwd)/build" - - name: Download Timezone Database for vendored date library (Clang64) + - name: Download Timezone Database if: matrix.msystem_upper == 'CLANG64' - shell: msys2 {0} + shell: bash run: | # TODO(GH-48743): Clang64 uses vendored date library which needs tzdata # https://github.com/apache/arrow/issues/48743 - mkdir -p /c/Users/runneradmin/Downloads/tzdata - curl -sL https://data.iana.org/time-zones/releases/tzdata2024b.tar.gz | \ - tar -xz -C /c/Users/runneradmin/Downloads/tzdata - # Also need windowsZones.xml from Unicode CLDR for Windows timezone mapping - curl -sL -o /c/Users/runneradmin/Downloads/tzdata/windowsZones.xml \ - https://raw.githubusercontent.com/unicode-org/cldr/main/common/supplemental/windowsZones.xml + ci/scripts/download_tz_database.sh - name: Download MinIO shell: msys2 {0} run: | diff --git a/c_glib/test/test-assume-timezone-options.rb b/c_glib/test/test-assume-timezone-options.rb index d60935964d7..10bf4261d33 100644 --- a/c_glib/test/test-assume-timezone-options.rb +++ b/c_glib/test/test-assume-timezone-options.rb @@ -45,6 +45,7 @@ def test_nonexistent_property end def test_assume_timezone_function + omit("Missing tzdata on Windows") if Gem.win_platform? args = [ Arrow::ArrayDatum.new(build_timestamp_array(:milli, [1504953190000])), ] diff --git a/c_glib/test/test-day-of-week-options.rb b/c_glib/test/test-day-of-week-options.rb index d1c254d3780..8f76956fb4b 100644 --- a/c_glib/test/test-day-of-week-options.rb +++ b/c_glib/test/test-day-of-week-options.rb @@ -39,6 +39,7 @@ def test_week_start_property end def test_day_of_week_function_with_count_from_zero_false + omit("Missing tzdata on Windows") if Gem.win_platform? args = [ # 2017-09-09T10:33:10Z (Saturday) Arrow::ArrayDatum.new(build_timestamp_array(:milli, [1504953190000])), @@ -50,6 +51,7 @@ def test_day_of_week_function_with_count_from_zero_false end def test_day_of_week_function_with_week_start + omit("Missing tzdata on Windows") if Gem.win_platform? args = [ # 2017-09-09T10:33:10Z (Saturday) Arrow::ArrayDatum.new(build_timestamp_array(:milli, [1504953190000])), diff --git a/c_glib/test/test-strftime-options.rb b/c_glib/test/test-strftime-options.rb index aafcca98e42..81440d5d086 100644 --- a/c_glib/test/test-strftime-options.rb +++ b/c_glib/test/test-strftime-options.rb @@ -35,6 +35,7 @@ def test_locale_property end def test_strftime_function + omit("Missing tzdata on Windows") if Gem.win_platform? args = [ Arrow::ArrayDatum.new(build_timestamp_array(:milli, [1504953190854])), ] diff --git a/ci/scripts/download_tz_database.sh b/ci/scripts/download_tz_database.sh index 4fc9d857ea0..b74d251a43b 100755 --- a/ci/scripts/download_tz_database.sh +++ b/ci/scripts/download_tz_database.sh @@ -17,9 +17,6 @@ # specific language governing permissions and limitations # under the License. -# Downloads IANA timezone database for use with the vendored date library -# on Windows when not using MSVC (e.g., MinGW builds). - set -ex # Download database diff --git a/r/src/arrowExports.cpp b/r/src/arrowExports.cpp index 0fa62edc86c..73bf81f83bb 100644 --- a/r/src/arrowExports.cpp +++ b/r/src/arrowExports.cpp @@ -5843,9 +5843,9 @@ static const R_CallMethodDef CallEntries[] = { { "_arrow_compute__GetFunctionNames", (DL_FUNC) &_arrow_compute__GetFunctionNames, 0}, { "_arrow_compute__Initialize", (DL_FUNC) &_arrow_compute__Initialize, 0}, { "_arrow_RegisterScalarUDF", (DL_FUNC) &_arrow_RegisterScalarUDF, 2}, - { "_arrow_build_info", (DL_FUNC) &_arrow_build_info, 0}, - { "_arrow_runtime_info", (DL_FUNC) &_arrow_runtime_info, 0}, - { "_arrow_set_timezone_database", (DL_FUNC) &_arrow_set_timezone_database, 1}, + { "_arrow_build_info", (DL_FUNC) &_arrow_build_info, 0}, + { "_arrow_runtime_info", (DL_FUNC) &_arrow_runtime_info, 0}, + { "_arrow_set_timezone_database", (DL_FUNC) &_arrow_set_timezone_database, 1}, { "_arrow_csv___WriteOptions__initialize", (DL_FUNC) &_arrow_csv___WriteOptions__initialize, 1}, { "_arrow_csv___ReadOptions__initialize", (DL_FUNC) &_arrow_csv___ReadOptions__initialize, 1}, { "_arrow_csv___ParseOptions__initialize", (DL_FUNC) &_arrow_csv___ParseOptions__initialize, 1}, From c1e7dfc87077ea77c3f504104a337625e22f293b Mon Sep 17 00:00:00 2001 From: Rok Mihevc Date: Tue, 6 Jan 2026 23:32:23 +0100 Subject: [PATCH 19/29] experiment --- ci/scripts/msys2_setup.sh | 1 - 1 file changed, 1 deletion(-) diff --git a/ci/scripts/msys2_setup.sh b/ci/scripts/msys2_setup.sh index cc234aa165c..b4634070a87 100755 --- a/ci/scripts/msys2_setup.sh +++ b/ci/scripts/msys2_setup.sh @@ -48,7 +48,6 @@ case "${target}" in packages+=("${MINGW_PACKAGE_PREFIX}-snappy") packages+=("${MINGW_PACKAGE_PREFIX}-sqlite3") packages+=("${MINGW_PACKAGE_PREFIX}-thrift") - packages+=("${MINGW_PACKAGE_PREFIX}-tzdata") packages+=("${MINGW_PACKAGE_PREFIX}-xsimd") packages+=("${MINGW_PACKAGE_PREFIX}-uriparser") packages+=("${MINGW_PACKAGE_PREFIX}-zstd") From 881558032fdfb77f864cd36f5c72f591078e1cdf Mon Sep 17 00:00:00 2001 From: Rok Mihevc Date: Tue, 6 Jan 2026 23:41:36 +0100 Subject: [PATCH 20/29] another experiment --- .github/workflows/cpp.yml | 4 ---- 1 file changed, 4 deletions(-) diff --git a/.github/workflows/cpp.yml b/.github/workflows/cpp.yml index 79305f2d153..fcae002e523 100644 --- a/.github/workflows/cpp.yml +++ b/.github/workflows/cpp.yml @@ -395,9 +395,5 @@ jobs: ci/scripts/install_gcs_testbench.sh default - name: Test shell: msys2 {0} - env: - # TODO(GH-48743): TZDIR is needed for libstdc++ std::chrono timezone support - # https://github.com/apache/arrow/issues/48743 - TZDIR: /usr/share/zoneinfo run: | ci/scripts/cpp_test.sh "$(pwd)" "$(pwd)/build" From 43808e8af70178c7bb97e28cefc3996080fbdd44 Mon Sep 17 00:00:00 2001 From: Rok Mihevc Date: Wed, 7 Jan 2026 15:53:16 +0100 Subject: [PATCH 21/29] experiment --- cpp/src/arrow/util/chrono_internal.h | 14 +++++++------- python/pyarrow/__init__.py | 2 +- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/cpp/src/arrow/util/chrono_internal.h b/cpp/src/arrow/util/chrono_internal.h index 26bd99f7a1f..fe6823d3b6a 100644 --- a/cpp/src/arrow/util/chrono_internal.h +++ b/cpp/src/arrow/util/chrono_internal.h @@ -46,14 +46,14 @@ // 2000-02-29). Until this is fixed, we use the vendored date.h library. // See: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=116110 -#if defined(_WIN32) -// On Windows, use std::chrono if available (MSVC or MinGW with C++20 support) -# if defined(_MSC_VER) || (defined(__cpp_lib_chrono) && __cpp_lib_chrono >= 201907L) -# define ARROW_USE_STD_CHRONO 1 -# else -# define ARROW_USE_STD_CHRONO 0 -# endif +#if defined(_WIN32) && defined(__cpp_lib_chrono) && __cpp_lib_chrono >= 201907L +// Use std::chrono on Windows when timezone support is available (MSVC or libstdc++) +// MSVC uses Windows' internal timezone database, libstdc++ uses TZDIR environment +// variable +# define ARROW_USE_STD_CHRONO 1 #else +// Use vendored date library (non-Windows, or libc++/older libraries without timezone +// support) # define ARROW_USE_STD_CHRONO 0 #endif diff --git a/python/pyarrow/__init__.py b/python/pyarrow/__init__.py index 18a40d877c3..d03181264c4 100644 --- a/python/pyarrow/__init__.py +++ b/python/pyarrow/__init__.py @@ -56,7 +56,7 @@ def parse_git(root, **kwargs): except ImportError: __version__ = None -from pyarrow.lib import (BuildInfo, CppBuildInfo, RuntimeInfo, set_timezone_db_path, +from pyarrow.lib import (BuildInfo, CppBuildInfo, RuntimeInfo, MonthDayNano, VersionInfo, build_info, cpp_build_info, cpp_version, cpp_version_info, runtime_info, cpu_count, set_cpu_count, enable_signal_handlers, From 08aca8306cb41274ecaa9389713ed4a8084db68a Mon Sep 17 00:00:00 2001 From: Rok Mihevc Date: Wed, 7 Jan 2026 18:09:50 +0100 Subject: [PATCH 22/29] Reverting some changes --- cpp/src/arrow/config.cc | 17 +++++++++- cpp/src/arrow/config.h | 17 ++++++++-- cpp/src/arrow/public_api_test.cc | 44 ++++++++++++++++++++++++++ cpp/src/arrow/testing/util.cc | 23 ++++++++++++++ cpp/src/arrow/testing/util.h | 11 +++++++ docs/source/cpp/build_system.rst | 26 +++++++++++++++ docs/source/developers/cpp/windows.rst | 12 +++++++ docs/source/python/install.rst | 36 +++++++++++++++++++++ python/pyarrow/__init__.py | 2 +- python/pyarrow/config.pxi | 20 ++++++++++++ python/pyarrow/includes/libarrow.pxd | 7 ++++ python/pyarrow/tests/strategies.py | 2 +- python/pyarrow/tests/test_compute.py | 7 ++-- python/pyarrow/tests/test_misc.py | 14 ++++++++ python/pyarrow/tests/test_util.py | 24 +++++++++++++- python/pyarrow/util.py | 34 ++++++++++++++++++++ 16 files changed, 287 insertions(+), 9 deletions(-) diff --git a/cpp/src/arrow/config.cc b/cpp/src/arrow/config.cc index b2f7a385e38..8ff5a3df89b 100644 --- a/cpp/src/arrow/config.cc +++ b/cpp/src/arrow/config.cc @@ -64,6 +64,10 @@ std::string MakeSimdLevelString(QueryFlagFunction&& query_flag) { } } +// TODO(GH-48593): Remove when libc++ supports std::chrono timezone +// https://github.com/apache/arrow/issues/48593 +std::optional timezone_db_path; + }; // namespace const BuildInfo& GetBuildInfo() { return kBuildInfo; } @@ -75,6 +79,12 @@ RuntimeInfo GetRuntimeInfo() { MakeSimdLevelString([&](int64_t flags) { return cpu_info->IsSupported(flags); }); info.detected_simd_level = MakeSimdLevelString([&](int64_t flags) { return cpu_info->IsDetected(flags); }); + info.using_os_timezone_db = USE_OS_TZDB; +#if !USE_OS_TZDB + info.timezone_db_path = timezone_db_path; +#else + info.timezone_db_path = std::optional(); +#endif return info; } @@ -89,7 +99,12 @@ Status Initialize(const GlobalOptions& options) noexcept { } catch (const std::runtime_error& e) { return Status::IOError(e.what()); } -#endif + timezone_db_path = options.timezone_db_path.value(); +#else + return Status::Invalid( + "Arrow was set to use OS timezone database at compile time, " + "so a downloaded database cannot be provided at runtime."); +#endif // !USE_OS_TZDB } return Status::OK(); } diff --git a/cpp/src/arrow/config.h b/cpp/src/arrow/config.h index 9fb1710cc23..54f42355bd2 100644 --- a/cpp/src/arrow/config.h +++ b/cpp/src/arrow/config.h @@ -64,6 +64,17 @@ struct RuntimeInfo { /// The SIMD level available on the OS and CPU std::string detected_simd_level; + + /// Whether using the OS-based timezone database + /// This is set at compile-time. + // ARROW_DEPRECATED("Deprecated in 23.0.0") + bool using_os_timezone_db; + + /// The path to the timezone database; by default None. + /// This is only used on some Windows builds where + /// std::chrono timezone support is not available. + // ARROW_DEPRECATED("Deprecated in 23.0.0") + std::optional timezone_db_path; }; /// \brief Get runtime build info. @@ -82,8 +93,10 @@ RuntimeInfo GetRuntimeInfo(); // TODO(GH-48743): Remove when RTools upgrades to GCC with std::chrono timezone support // https://github.com/apache/arrow/issues/48743 struct GlobalOptions { - /// Path to text timezone database. This is only used on Windows MinGW - /// builds where std::chrono timezone support is not available. + /// The path to the timezone database; by default None. + /// This is only used on some Windows builds where + /// std::chrono timezone support is not available. + // ARROW_DEPRECATED("Deprecated in 23.0.0") std::optional timezone_db_path; }; diff --git a/cpp/src/arrow/public_api_test.cc b/cpp/src/arrow/public_api_test.cc index 0b6608913a6..25ee485b5f6 100644 --- a/cpp/src/arrow/public_api_test.cc +++ b/cpp/src/arrow/public_api_test.cc @@ -122,4 +122,48 @@ TEST(Misc, BuildInfo) { ASSERT_THAT(info.full_so_version, ::testing::HasSubstr(info.so_version)); } +// TODO(GH-48593): Remove when libc++ supports std::chrono timezone +// https://github.com/apache/arrow/issues/48593 +TEST(Misc, SetTimezoneConfig) { +#ifndef _WIN32 + GTEST_SKIP() << "Can only set the Timezone database on Windows"; +#elif !defined(ARROW_FILESYSTEM) + GTEST_SKIP() << "Need filesystem support to test timezone config."; +#else + auto fs = std::make_shared(); + + std::optional tzdata_result = GetTestTimezoneDatabaseRoot(); + std::string tzdata_dir; + if (tzdata_result.has_value()) { + tzdata_dir = tzdata_result.value(); + } else { + auto home_raw = std::getenv("USERPROFILE"); + std::string home = home_raw == nullptr ? "~" : std::string(home_raw); + ASSERT_OK_AND_ASSIGN(tzdata_dir, fs->NormalizePath(home + "\\Downloads\\tzdata")); + } + ASSERT_OK_AND_ASSIGN(tzdata_dir, fs->NormalizePath(tzdata_dir)); + ASSERT_OK_AND_ASSIGN(auto tzdata_path, + arrow::internal::PlatformFilename::FromString(tzdata_dir)); + + if (!arrow::internal::FileExists(tzdata_path).ValueOr(false)) { + GTEST_SKIP() << "Couldn't find timezone database in expected dir: " << tzdata_dir; + } + // Create a tmp directory + ASSERT_OK_AND_ASSIGN(auto tempdir, arrow::internal::TemporaryDir::Make("tzdata")); + + // Validate that setting tzdb to that dir fails + arrow::GlobalOptions options = {std::make_optional(tempdir->path().ToString())}; + ASSERT_NOT_OK(arrow::Initialize(options)); + + // Copy tzdb data from ~/Downloads + auto selector = arrow::fs::FileSelector(); + selector.base_dir = tzdata_dir; + selector.recursive = true; + ASSERT_OK(arrow::fs::CopyFiles(fs, selector, fs, tempdir->path().ToString())); + + // Validate that tzdb is working + ASSERT_OK(arrow::Initialize(options)); +#endif +} + } // namespace arrow diff --git a/cpp/src/arrow/testing/util.cc b/cpp/src/arrow/testing/util.cc index 8846347e1c1..204d61fa03e 100644 --- a/cpp/src/arrow/testing/util.cc +++ b/cpp/src/arrow/testing/util.cc @@ -122,6 +122,29 @@ Status GetTestResourceRoot(std::string* out) { return Status::OK(); } +// TODO(GH-48593): Remove when libc++ supports std::chrono timezone +// https://github.com/apache/arrow/issues/48593 +std::optional GetTestTimezoneDatabaseRoot() { + const char* c_root = std::getenv("ARROW_TIMEZONE_DATABASE"); + if (!c_root) { + return std::optional(); + } + return std::make_optional(std::string(c_root)); +} + +// TODO(GH-48593): Remove when libc++ supports std::chrono timezone +// https://github.com/apache/arrow/issues/48593 +Status InitTestTimezoneDatabase() { + auto maybe_tzdata = GetTestTimezoneDatabaseRoot(); + // If missing, timezone database will default to %USERPROFILE%\Downloads\tzdata + if (!maybe_tzdata.has_value()) return Status::OK(); + + auto tzdata_path = std::string(maybe_tzdata.value()); + arrow::GlobalOptions options = {std::make_optional(tzdata_path)}; + ARROW_RETURN_NOT_OK(arrow::Initialize(options)); + return Status::OK(); +} + int GetListenPort() { // Get a new available port number by binding a socket to an ephemeral port // and then closing it. Since ephemeral port allocation tends to avoid diff --git a/cpp/src/arrow/testing/util.h b/cpp/src/arrow/testing/util.h index 98b1bdb134e..3304889b0b4 100644 --- a/cpp/src/arrow/testing/util.h +++ b/cpp/src/arrow/testing/util.h @@ -112,6 +112,17 @@ UnionTypeFactories() { // Status ARROW_TESTING_EXPORT Status GetTestResourceRoot(std::string*); +// TODO(GH-48593): Remove when libc++ supports std::chrono timezone +// https://github.com/apache/arrow/issues/48593 +// Return the value of the ARROW_TIMEZONE_DATABASE environment variable +ARROW_TESTING_EXPORT std::optional GetTestTimezoneDatabaseRoot(); + +// TODO(GH-48593): Remove when libc++ supports std::chrono timezone +// https://github.com/apache/arrow/issues/48593 +// Set the Timezone database based on the ARROW_TIMEZONE_DATABASE env variable +// This is only relevant on Windows, since other OSs have compatible databases built-in +ARROW_TESTING_EXPORT Status InitTestTimezoneDatabase(); + // Get a TCP port number to listen on. This is a different number every time, // as reusing the same port across tests can produce spurious bind errors on // Windows. diff --git a/docs/source/cpp/build_system.rst b/docs/source/cpp/build_system.rst index b124060053b..d3c47ce0412 100644 --- a/docs/source/cpp/build_system.rst +++ b/docs/source/cpp/build_system.rst @@ -228,3 +228,29 @@ can control the source of each dependency and whether it is statically or dynamically linked. See :doc:`/developers/cpp/building` for instructions. Or alternatively, use Arrow from a package manager such as Conda or vcpkg which will manage consistent versions of Arrow and its dependencies. + + +.. _download-timezone-database: + +Runtime Dependencies +==================== + +.. TODO(GH-48593): Remove when libc++ supports std::chrono timezone + https://github.com/apache/arrow/issues/48593 + +While Arrow uses the OS-provided timezone database on Linux and macOS, it +requires a user-provided database on Windows. You must download and extract the +text version of the IANA timezone database and add the Windows timezone mapping +XML. To download, you can use the following batch script: + +.. literalinclude:: ../../../ci/appveyor-cpp-setup.bat + :language: batch + :start-after: @rem (Doc section: Download timezone database) + :end-before: @rem (Doc section: Download timezone database) + +By default, the timezone database will be detected at ``%USERPROFILE%\Downloads\tzdata``, +but you can set a custom path at runtime in :struct:`arrow::ArrowGlobalOptions`:: + + arrow::GlobalOptions options; + options.timezone_db_path = "path/to/tzdata"; + ARROW_RETURN_NOT_OK(arrow::Initialize(options)); diff --git a/docs/source/developers/cpp/windows.rst b/docs/source/developers/cpp/windows.rst index b4d8f19dc26..a28ff0722fb 100644 --- a/docs/source/developers/cpp/windows.rst +++ b/docs/source/developers/cpp/windows.rst @@ -381,6 +381,18 @@ be defined, and similarly for ``-DARROW_FLIGHT_SQL=ON``. ARROW_FLIGHT_STATIC ARROW_FLIGHT_SQL_STATIC) +Downloading the Timezone Database +================================= + +.. TODO(GH-48593): Remove when libc++ supports std::chrono timezone + https://github.com/apache/arrow/issues/48593 + +To run some of the compute unit tests on Windows, the IANA timezone database +and the Windows timezone mapping need to be downloaded first. See +:ref:`download-timezone-database` for download instructions. To set a non-default +path for the timezone database while running the unit tests, set the +``ARROW_TIMEZONE_DATABASE`` environment variable. + Replicating Appveyor Builds =========================== diff --git a/docs/source/python/install.rst b/docs/source/python/install.rst index b948905df78..616430836a1 100644 --- a/docs/source/python/install.rst +++ b/docs/source/python/install.rst @@ -80,6 +80,42 @@ Optional dependencies Additional packages PyArrow is compatible with are :ref:`fsspec ` and **pytz**, **dateutil** or **tzdata** package for timezones. +tzdata on Windows +^^^^^^^^^^^^^^^^^ + +.. TODO(GH-48593): Remove when libc++ supports std::chrono timezone + https://github.com/apache/arrow/issues/48593 + +While Arrow uses the OS-provided timezone database on Linux and macOS, it requires a +user-provided database on Windows. To download and extract the text version of +the IANA timezone database follow the instructions in the C++ +:ref:`download-timezone-database` or use pyarrow utility function +``pyarrow.util.download_tzdata_on_windows()`` that does the same. + +By default, the timezone database will be detected at ``%USERPROFILE%\Downloads\tzdata``. +If the database has been downloaded in a different location, you will need to set +a custom path to the database from Python: + +.. code-block:: python + + >>> import pyarrow as pa + >>> pa.set_timezone_db_path("custom_path") + +You may encounter problems writing datetime data to an ORC file if you install +pyarrow with pip. One possible solution to fix this problem: + + 1. Install tzdata with ``pip install tzdata`` + 2. Set the environment variable ``TZDIR = path\to\.venv\Lib\site-packages\tzdata\`` + +You can find where ``tzdata`` is installed with the following python +command: + +.. code-block:: python + + >>> import tzdata + >>> print(tzdata.__file__) + path\to\.venv\Lib\site-packages\tzdata\__init__.py + .. _python-conda-differences: diff --git a/python/pyarrow/__init__.py b/python/pyarrow/__init__.py index d03181264c4..18a40d877c3 100644 --- a/python/pyarrow/__init__.py +++ b/python/pyarrow/__init__.py @@ -56,7 +56,7 @@ def parse_git(root, **kwargs): except ImportError: __version__ = None -from pyarrow.lib import (BuildInfo, CppBuildInfo, RuntimeInfo, +from pyarrow.lib import (BuildInfo, CppBuildInfo, RuntimeInfo, set_timezone_db_path, MonthDayNano, VersionInfo, build_info, cpp_build_info, cpp_version, cpp_version_info, runtime_info, cpu_count, set_cpu_count, enable_signal_handlers, diff --git a/python/pyarrow/config.pxi b/python/pyarrow/config.pxi index 4fdaaf0bdb9..c8ecf202567 100644 --- a/python/pyarrow/config.pxi +++ b/python/pyarrow/config.pxi @@ -96,3 +96,23 @@ build_info = _build_info() cpp_build_info = build_info.cpp_build_info cpp_version = build_info.cpp_build_info.version cpp_version_info = build_info.cpp_build_info.version_info + + +# TODO(GH-48593): Remove when libc++ supports std::chrono timezone +# https://github.com/apache/arrow/issues/48593 +def set_timezone_db_path(path): + """ + Configure the path to text timezone database on Windows. + + Parameters + ---------- + path : str + Path to text timezone database. + """ + cdef: + CGlobalOptions options + + if path is not None: + options.timezone_db_path = tobytes(path) + + check_status(Initialize(options)) diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd index 897ead17397..617a4b76721 100644 --- a/python/pyarrow/includes/libarrow.pxd +++ b/python/pyarrow/includes/libarrow.pxd @@ -90,6 +90,13 @@ cdef extern from "arrow/config.h" namespace "arrow" nogil: CRuntimeInfo GetRuntimeInfo() + # TODO(GH-48593): Remove when libc++ supports std::chrono timezone + # https://github.com/apache/arrow/issues/48593 + cdef cppclass CGlobalOptions" arrow::GlobalOptions": + optional[c_string] timezone_db_path + + CStatus Initialize(const CGlobalOptions& options) + cdef extern from "arrow/util/future.h" namespace "arrow" nogil: cdef cppclass CFuture_Void" arrow::Future<>": diff --git a/python/pyarrow/tests/strategies.py b/python/pyarrow/tests/strategies.py index f18d58b097e..3c31650ddf9 100644 --- a/python/pyarrow/tests/strategies.py +++ b/python/pyarrow/tests/strategies.py @@ -327,7 +327,7 @@ def arrays(draw, type, size=None, nullable=True): value = st.dates() elif pa.types.is_timestamp(ty): if zoneinfo is None: - pytest.skip('no module named zoneinfo') + pytest.skip('no module named zoneinfo (or tzdata on Windows)') if ty.tz is None: pytest.skip('requires timezone not None') min_int64 = -(2**63) diff --git a/python/pyarrow/tests/test_compute.py b/python/pyarrow/tests/test_compute.py index 3e4f92eb5f5..425d612ef43 100644 --- a/python/pyarrow/tests/test_compute.py +++ b/python/pyarrow/tests/test_compute.py @@ -203,14 +203,14 @@ def test_option_class_equality(request): first_week_is_fully_in_year=False), pc.ZeroFillOptions(4, "0"), ] - # Timezone database might not be installed on Emscripten + # Timezone database might not be installed on Windows or Emscripten if request.config.pyarrow.is_enabled["timezone_data"]: options.append(pc.AssumeTimezoneOptions("Europe/Ljubljana")) classes = {type(option) for option in options} for cls in exported_option_classes: - # Timezone database might not be installed on Emscripten + # Timezone database might not be installed on Windows or Emscripten if ( cls not in classes and (request.config.pyarrow.is_enabled["timezone_data"]) @@ -2381,6 +2381,7 @@ def test_strftime(): for fmt in formats: options = pc.StrftimeOptions(fmt) result = pc.strftime(tsa, options=options) + # cast to the same type as result to ignore string vs large_string expected = pa.array(ts.strftime(fmt)).cast(result.type) if sys.platform == "win32" and fmt == "%Z": # TODO(GH-48743): On Windows, std::chrono returns GMT @@ -2564,7 +2565,7 @@ def test_extract_datetime_components(request): # Test timezone aware timestamp array if not request.config.pyarrow.is_enabled["timezone_data"]: - pytest.skip('Timezone database is not available') + pytest.skip('Timezone database is not installed on Windows') else: for timezone in timezones: _check_datetime_components(timestamps, timezone) diff --git a/python/pyarrow/tests/test_misc.py b/python/pyarrow/tests/test_misc.py index fb73d654ae3..af017234703 100644 --- a/python/pyarrow/tests/test_misc.py +++ b/python/pyarrow/tests/test_misc.py @@ -22,6 +22,7 @@ import pytest import pyarrow as pa +from pyarrow.lib import ArrowInvalid def test_get_include(): @@ -137,6 +138,19 @@ def import_arrow(): subprocess.check_call([sys.executable, "-c", code]) +# TODO(GH-48593): Remove when libc++ supports std::chrono timezone +# https://github.com/apache/arrow/issues/48593 +@pytest.mark.skipif(sys.platform == "win32", + reason="Path to timezone database is not configurable " + "on non-Windows platforms") +def test_set_timezone_db_path_non_windows(): + # set_timezone_db_path raises an error on non-Windows platforms + with pytest.raises(ArrowInvalid, + match="Arrow was set to use OS timezone " + "database at compile time"): + pa.set_timezone_db_path("path") + + @pytest.mark.parametrize('klass', [ pa.Field, pa.Schema, diff --git a/python/pyarrow/tests/test_util.py b/python/pyarrow/tests/test_util.py index 9fccb76112d..31cb74050c2 100644 --- a/python/pyarrow/tests/test_util.py +++ b/python/pyarrow/tests/test_util.py @@ -16,14 +16,17 @@ # under the License. import gc +import os import signal +import shutil import sys import textwrap import weakref import pytest -from pyarrow.util import doc, _break_traceback_cycle_from_frame +from pyarrow.util import (doc, _break_traceback_cycle_from_frame, + download_tzdata_on_windows) from pyarrow.tests.util import disabled_gc @@ -207,3 +210,22 @@ def test_signal_refcycle(): assert wr() is not None _break_traceback_cycle_from_frame(sys._getframe(0)) assert wr() is None + + +# TODO(GH-48593): Remove when libc++ supports std::chrono timezone +# https://github.com/apache/arrow/issues/48593 +@pytest.mark.skipif(sys.platform != "win32", + reason="Timezone database is already provided.") +def test_download_tzdata_on_windows(): + tzdata_path = os.path.expandvars(r"%USERPROFILE%\Downloads\tzdata") + + # Download timezone database and remove data in case it already exists + if (os.path.exists(tzdata_path)): + shutil.rmtree(tzdata_path) + download_tzdata_on_windows() + + # Inspect the folder + assert os.path.exists(tzdata_path) + assert os.path.exists(os.path.join(tzdata_path, "windowsZones.xml")) + assert os.path.exists(os.path.join(tzdata_path, "europe")) + assert 'version' in os.listdir(tzdata_path) diff --git a/python/pyarrow/util.py b/python/pyarrow/util.py index a9827c36585..f84d8030b50 100644 --- a/python/pyarrow/util.py +++ b/python/pyarrow/util.py @@ -242,3 +242,37 @@ def _download_requests(url, out_path): with requests.get(url) as response: with open(out_path, 'wb') as f: f.write(response.content) + + +# TODO(GH-48593): Remove when libc++ supports std::chrono timezone +# https://github.com/apache/arrow/issues/48593 +def download_tzdata_on_windows(): + r""" + Download and extract latest IANA timezone database into the + location expected by Arrow which is %USERPROFILE%\Downloads\tzdata. + """ + if sys.platform != 'win32': + raise TypeError(f"Timezone database is already provided by {sys.platform}") + + import tarfile + + tzdata_url = "https://data.iana.org/time-zones/tzdata-latest.tar.gz" + tzdata_path = os.path.expandvars(r"%USERPROFILE%\Downloads\tzdata") + tzdata_compressed_path = os.path.join(tzdata_path, "tzdata.tar.gz") + windows_zones_url = "https://raw.githubusercontent.com/unicode-org/cldr/master/common/supplemental/windowsZones.xml" # noqa + windows_zones_path = os.path.join(tzdata_path, "windowsZones.xml") + os.makedirs(tzdata_path, exist_ok=True) + + # Try to download the files with requests and then fall back to urllib. This + # works around possible issues in certain older environment (GH-45295) + try: + _download_requests(tzdata_url, tzdata_compressed_path) + _download_requests(windows_zones_url, windows_zones_path) + except ImportError: + _download_urllib(tzdata_url, tzdata_compressed_path) + _download_urllib(windows_zones_url, windows_zones_path) + + assert os.path.exists(tzdata_compressed_path) + assert os.path.exists(windows_zones_path) + + tarfile.open(tzdata_compressed_path).extractall(tzdata_path) From 1729854e1992d76e3908bf1ab13045404c8e66ce Mon Sep 17 00:00:00 2001 From: Rok Mihevc Date: Wed, 7 Jan 2026 19:36:08 +0100 Subject: [PATCH 23/29] Reverting more changes --- .github/workflows/cpp.yml | 3 ++- cpp/src/arrow/config.cc | 6 ++---- cpp/src/arrow/config.h | 14 ++------------ cpp/src/arrow/public_api_test.cc | 2 -- cpp/src/arrow/testing/util.cc | 4 ++-- cpp/src/arrow/testing/util.h | 4 ++-- docs/source/cpp/build_system.rst | 3 --- docs/source/developers/cpp/windows.rst | 3 --- docs/source/python/install.rst | 3 --- python/pyarrow/config.pxi | 12 ++++++++++++ python/pyarrow/util.py | 11 +++++++++++ r/R/arrow-package.R | 4 ++-- r/src/config.cpp | 2 +- 13 files changed, 36 insertions(+), 35 deletions(-) diff --git a/.github/workflows/cpp.yml b/.github/workflows/cpp.yml index fcae002e523..f0d8d0b5767 100644 --- a/.github/workflows/cpp.yml +++ b/.github/workflows/cpp.yml @@ -370,7 +370,8 @@ jobs: if: matrix.msystem_upper == 'CLANG64' shell: bash run: | - # TODO(GH-48743): Clang64 uses vendored date library which needs tzdata + # TODO(GH-48743): msys2 clang64 uses libc++ and vendored date.h library + # which needs tzdata database to build Arrow with time zone support. # https://github.com/apache/arrow/issues/48743 ci/scripts/download_tz_database.sh - name: Download MinIO diff --git a/cpp/src/arrow/config.cc b/cpp/src/arrow/config.cc index 8ff5a3df89b..18b901aa94c 100644 --- a/cpp/src/arrow/config.cc +++ b/cpp/src/arrow/config.cc @@ -64,8 +64,6 @@ std::string MakeSimdLevelString(QueryFlagFunction&& query_flag) { } } -// TODO(GH-48593): Remove when libc++ supports std::chrono timezone -// https://github.com/apache/arrow/issues/48593 std::optional timezone_db_path; }; // namespace @@ -88,8 +86,8 @@ RuntimeInfo GetRuntimeInfo() { return info; } -// TODO(GH-48743): Remove when RTools upgrades to GCC with std::chrono timezone support -// https://github.com/apache/arrow/issues/48743 +// TODO(GH-48743): Remove when RTools upgrades to libstdc++ with std::chrono timezone +// support https://github.com/apache/arrow/issues/48743 Status Initialize(const GlobalOptions& options) noexcept { if (options.timezone_db_path.has_value()) { #if !USE_OS_TZDB diff --git a/cpp/src/arrow/config.h b/cpp/src/arrow/config.h index 54f42355bd2..617d6c268b5 100644 --- a/cpp/src/arrow/config.h +++ b/cpp/src/arrow/config.h @@ -67,13 +67,9 @@ struct RuntimeInfo { /// Whether using the OS-based timezone database /// This is set at compile-time. - // ARROW_DEPRECATED("Deprecated in 23.0.0") bool using_os_timezone_db; /// The path to the timezone database; by default None. - /// This is only used on some Windows builds where - /// std::chrono timezone support is not available. - // ARROW_DEPRECATED("Deprecated in 23.0.0") std::optional timezone_db_path; }; @@ -90,18 +86,12 @@ const BuildInfo& GetBuildInfo(); ARROW_EXPORT RuntimeInfo GetRuntimeInfo(); -// TODO(GH-48743): Remove when RTools upgrades to GCC with std::chrono timezone support -// https://github.com/apache/arrow/issues/48743 struct GlobalOptions { - /// The path to the timezone database; by default None. - /// This is only used on some Windows builds where - /// std::chrono timezone support is not available. - // ARROW_DEPRECATED("Deprecated in 23.0.0") + /// Path to text timezone database. This is only configurable on Windows, + /// which does not have a compatible OS timezone database. std::optional timezone_db_path; }; -// TODO(GH-48743): Remove when RTools upgrades to GCC with std::chrono timezone support -// https://github.com/apache/arrow/issues/48743 ARROW_EXPORT Status Initialize(const GlobalOptions& options) noexcept; diff --git a/cpp/src/arrow/public_api_test.cc b/cpp/src/arrow/public_api_test.cc index 25ee485b5f6..ccc80dc93a5 100644 --- a/cpp/src/arrow/public_api_test.cc +++ b/cpp/src/arrow/public_api_test.cc @@ -122,8 +122,6 @@ TEST(Misc, BuildInfo) { ASSERT_THAT(info.full_so_version, ::testing::HasSubstr(info.so_version)); } -// TODO(GH-48593): Remove when libc++ supports std::chrono timezone -// https://github.com/apache/arrow/issues/48593 TEST(Misc, SetTimezoneConfig) { #ifndef _WIN32 GTEST_SKIP() << "Can only set the Timezone database on Windows"; diff --git a/cpp/src/arrow/testing/util.cc b/cpp/src/arrow/testing/util.cc index 204d61fa03e..6c84f703f91 100644 --- a/cpp/src/arrow/testing/util.cc +++ b/cpp/src/arrow/testing/util.cc @@ -122,7 +122,7 @@ Status GetTestResourceRoot(std::string* out) { return Status::OK(); } -// TODO(GH-48593): Remove when libc++ supports std::chrono timezone +// TODO(GH-48593): Remove when we have full std::chrono support // https://github.com/apache/arrow/issues/48593 std::optional GetTestTimezoneDatabaseRoot() { const char* c_root = std::getenv("ARROW_TIMEZONE_DATABASE"); @@ -132,7 +132,7 @@ std::optional GetTestTimezoneDatabaseRoot() { return std::make_optional(std::string(c_root)); } -// TODO(GH-48593): Remove when libc++ supports std::chrono timezone +// TODO(GH-48593): Remove when we have full std::chrono support // https://github.com/apache/arrow/issues/48593 Status InitTestTimezoneDatabase() { auto maybe_tzdata = GetTestTimezoneDatabaseRoot(); diff --git a/cpp/src/arrow/testing/util.h b/cpp/src/arrow/testing/util.h index 3304889b0b4..0bddd642dc0 100644 --- a/cpp/src/arrow/testing/util.h +++ b/cpp/src/arrow/testing/util.h @@ -112,12 +112,12 @@ UnionTypeFactories() { // Status ARROW_TESTING_EXPORT Status GetTestResourceRoot(std::string*); -// TODO(GH-48593): Remove when libc++ supports std::chrono timezone +// TODO(GH-48593): Remove when we have full std::chrono support // https://github.com/apache/arrow/issues/48593 // Return the value of the ARROW_TIMEZONE_DATABASE environment variable ARROW_TESTING_EXPORT std::optional GetTestTimezoneDatabaseRoot(); -// TODO(GH-48593): Remove when libc++ supports std::chrono timezone +// TODO(GH-48593): Remove when we have full std::chrono support // https://github.com/apache/arrow/issues/48593 // Set the Timezone database based on the ARROW_TIMEZONE_DATABASE env variable // This is only relevant on Windows, since other OSs have compatible databases built-in diff --git a/docs/source/cpp/build_system.rst b/docs/source/cpp/build_system.rst index d3c47ce0412..01dbe5e45f8 100644 --- a/docs/source/cpp/build_system.rst +++ b/docs/source/cpp/build_system.rst @@ -235,9 +235,6 @@ will manage consistent versions of Arrow and its dependencies. Runtime Dependencies ==================== -.. TODO(GH-48593): Remove when libc++ supports std::chrono timezone - https://github.com/apache/arrow/issues/48593 - While Arrow uses the OS-provided timezone database on Linux and macOS, it requires a user-provided database on Windows. You must download and extract the text version of the IANA timezone database and add the Windows timezone mapping diff --git a/docs/source/developers/cpp/windows.rst b/docs/source/developers/cpp/windows.rst index a28ff0722fb..21bde92d0b7 100644 --- a/docs/source/developers/cpp/windows.rst +++ b/docs/source/developers/cpp/windows.rst @@ -384,9 +384,6 @@ be defined, and similarly for ``-DARROW_FLIGHT_SQL=ON``. Downloading the Timezone Database ================================= -.. TODO(GH-48593): Remove when libc++ supports std::chrono timezone - https://github.com/apache/arrow/issues/48593 - To run some of the compute unit tests on Windows, the IANA timezone database and the Windows timezone mapping need to be downloaded first. See :ref:`download-timezone-database` for download instructions. To set a non-default diff --git a/docs/source/python/install.rst b/docs/source/python/install.rst index 616430836a1..c6f098ee20a 100644 --- a/docs/source/python/install.rst +++ b/docs/source/python/install.rst @@ -83,9 +83,6 @@ and **pytz**, **dateutil** or **tzdata** package for timezones. tzdata on Windows ^^^^^^^^^^^^^^^^^ -.. TODO(GH-48593): Remove when libc++ supports std::chrono timezone - https://github.com/apache/arrow/issues/48593 - While Arrow uses the OS-provided timezone database on Linux and macOS, it requires a user-provided database on Windows. To download and extract the text version of the IANA timezone database follow the instructions in the C++ diff --git a/python/pyarrow/config.pxi b/python/pyarrow/config.pxi index c8ecf202567..f6cd1be4077 100644 --- a/python/pyarrow/config.pxi +++ b/python/pyarrow/config.pxi @@ -108,7 +108,19 @@ def set_timezone_db_path(path): ---------- path : str Path to text timezone database. + .. deprecated:: 23.0.0 + This function is deprecated and will be removed in a future version. + PyArrow now uses the operating system's timezone database on Windows. """ + + warnings.warn( + "pyarrow.set_timezone_db_path is deprecated as of 23.0.0 " + "and will be removed in a future version. PyArrow now uses the " + "operating system's timezone database on Windows.", + FutureWarning, + stacklevel=2 + ) + cdef: CGlobalOptions options diff --git a/python/pyarrow/util.py b/python/pyarrow/util.py index f84d8030b50..2c64be63fbf 100644 --- a/python/pyarrow/util.py +++ b/python/pyarrow/util.py @@ -250,7 +250,18 @@ def download_tzdata_on_windows(): r""" Download and extract latest IANA timezone database into the location expected by Arrow which is %USERPROFILE%\Downloads\tzdata. + .. deprecated:: 23.0.0 + This function is deprecated and will be removed in a future version. + PyArrow now uses the operating system's timezone database on Windows. """ + + warnings.warn( + "pyarrow.util.download_tzdata_on_windows is deprecated as of 23.0.0 " + "and will be removed in a future version. PyArrow now uses the " + "operating system's timezone database on Windows.", + FutureWarning, + stacklevel=2 + ) if sys.platform != 'win32': raise TypeError(f"Timezone database is already provided by {sys.platform}") diff --git a/r/R/arrow-package.R b/r/R/arrow-package.R index 37962035798..0d1a92e12ad 100644 --- a/r/R/arrow-package.R +++ b/r/R/arrow-package.R @@ -153,7 +153,7 @@ s3_finalizer <- new.env(parent = emptyenv()) # See https://issues.apache.org/jira/browse/ARROW-8379 options(arrow.use_threads = FALSE) - # TODO(GH-48743): Remove when RTools upgrades to GCC with std::chrono timezone support + # TODO(GH-48743): Remove when RTools upgrades to libstdc++ with std::chrono timezone support # https://github.com/apache/arrow/issues/48743 # Try to set timezone database for MinGW builds configure_tzdb() @@ -173,7 +173,7 @@ s3_finalizer <- new.env(parent = emptyenv()) invisible() } -# TODO(GH-48743): Remove when RTools upgrades to GCC with std::chrono timezone support +# TODO(GH-48743): Remove when RTools upgrades to libstdc++ with std::chrono timezone support # https://github.com/apache/arrow/issues/48743 configure_tzdb <- function() { # This is needed on Windows MinGW builds where std::chrono timezone support diff --git a/r/src/config.cpp b/r/src/config.cpp index 3cef8319a0e..4e7739e2bbd 100644 --- a/r/src/config.cpp +++ b/r/src/config.cpp @@ -34,7 +34,7 @@ std::vector runtime_info() { return {info.simd_level, info.detected_simd_level}; } -// TODO(GH-48743): Remove when RTools upgrades to GCC with std::chrono timezone support +// TODO(GH-48743): Remove when RTools upgrades to libstdc++ with std::chrono timezone support // https://github.com/apache/arrow/issues/48743 // [[arrow::export]] void set_timezone_database(cpp11::strings path) { From 1def93532d1388b5616b57d5a7f593300781118d Mon Sep 17 00:00:00 2001 From: Rok Mihevc Date: Wed, 7 Jan 2026 20:17:30 +0100 Subject: [PATCH 24/29] Review feedback --- cpp/src/arrow/testing/util.cc | 2 ++ cpp/src/arrow/testing/util.h | 2 ++ python/pyarrow/includes/libarrow.pxd | 1 + python/pyarrow/tests/test_compute.py | 10 ++++------ r/src/config.cpp | 4 ++-- 5 files changed, 11 insertions(+), 8 deletions(-) diff --git a/cpp/src/arrow/testing/util.cc b/cpp/src/arrow/testing/util.cc index 6c84f703f91..100c8bb3820 100644 --- a/cpp/src/arrow/testing/util.cc +++ b/cpp/src/arrow/testing/util.cc @@ -123,6 +123,7 @@ Status GetTestResourceRoot(std::string* out) { } // TODO(GH-48593): Remove when we have full std::chrono support +// on windows. // https://github.com/apache/arrow/issues/48593 std::optional GetTestTimezoneDatabaseRoot() { const char* c_root = std::getenv("ARROW_TIMEZONE_DATABASE"); @@ -133,6 +134,7 @@ std::optional GetTestTimezoneDatabaseRoot() { } // TODO(GH-48593): Remove when we have full std::chrono support +// on windows. // https://github.com/apache/arrow/issues/48593 Status InitTestTimezoneDatabase() { auto maybe_tzdata = GetTestTimezoneDatabaseRoot(); diff --git a/cpp/src/arrow/testing/util.h b/cpp/src/arrow/testing/util.h index 0bddd642dc0..33ea0e37876 100644 --- a/cpp/src/arrow/testing/util.h +++ b/cpp/src/arrow/testing/util.h @@ -113,11 +113,13 @@ UnionTypeFactories() { ARROW_TESTING_EXPORT Status GetTestResourceRoot(std::string*); // TODO(GH-48593): Remove when we have full std::chrono support +// on windows. // https://github.com/apache/arrow/issues/48593 // Return the value of the ARROW_TIMEZONE_DATABASE environment variable ARROW_TESTING_EXPORT std::optional GetTestTimezoneDatabaseRoot(); // TODO(GH-48593): Remove when we have full std::chrono support +// on windows. // https://github.com/apache/arrow/issues/48593 // Set the Timezone database based on the ARROW_TIMEZONE_DATABASE env variable // This is only relevant on Windows, since other OSs have compatible databases built-in diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd index 617a4b76721..bc75a1ef7c6 100644 --- a/python/pyarrow/includes/libarrow.pxd +++ b/python/pyarrow/includes/libarrow.pxd @@ -91,6 +91,7 @@ cdef extern from "arrow/config.h" namespace "arrow" nogil: CRuntimeInfo GetRuntimeInfo() # TODO(GH-48593): Remove when libc++ supports std::chrono timezone + # on Windows. # https://github.com/apache/arrow/issues/48593 cdef cppclass CGlobalOptions" arrow::GlobalOptions": optional[c_string] timezone_db_path diff --git a/python/pyarrow/tests/test_compute.py b/python/pyarrow/tests/test_compute.py index 425d612ef43..9223d5d649e 100644 --- a/python/pyarrow/tests/test_compute.py +++ b/python/pyarrow/tests/test_compute.py @@ -2384,11 +2384,10 @@ def test_strftime(): # cast to the same type as result to ignore string vs large_string expected = pa.array(ts.strftime(fmt)).cast(result.type) if sys.platform == "win32" and fmt == "%Z": - # TODO(GH-48743): On Windows, std::chrono returns GMT + # TODO(GH-48767): On Windows, std::chrono returns GMT + # https://github.com/apache/arrow/issues/48767 # offset style (e.g. "GMT+1") instead of timezone # abbreviations (e.g. "CET") - # https://github.com/apache/arrow/issues/48743 - # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=116110 for val in result: assert val.as_py() is None or val.as_py().startswith("GMT") \ or val.as_py() == "UTC" @@ -2408,9 +2407,8 @@ def test_strftime(): result = pc.strftime(tsa, options=pc.StrftimeOptions(fmt + "%Z")) expected = pa.array(ts.strftime(fmt + "%Z")).cast(result.type) if sys.platform == "win32": - # TODO(GH-48743): On Windows, std::chrono returns GMT offset style - # https://github.com/apache/arrow/issues/48743 - # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=116110 + # TODO(GH-48767): On Windows, std::chrono returns GMT offset style + # https://github.com/apache/arrow/issues/48767 for val in result: assert val.as_py() is None or "GMT" in val.as_py() \ or "UTC" in val.as_py() diff --git a/r/src/config.cpp b/r/src/config.cpp index 4e7739e2bbd..9d37263c5e3 100644 --- a/r/src/config.cpp +++ b/r/src/config.cpp @@ -34,8 +34,8 @@ std::vector runtime_info() { return {info.simd_level, info.detected_simd_level}; } -// TODO(GH-48743): Remove when RTools upgrades to libstdc++ with std::chrono timezone support -// https://github.com/apache/arrow/issues/48743 +// TODO(GH-48743): Remove when RTools upgrades to libstdc++ with std::chrono timezone +// support https://github.com/apache/arrow/issues/48743 // [[arrow::export]] void set_timezone_database(cpp11::strings path) { auto paths = cpp11::as_cpp>(path); From 310bbc4ec0a0953e81f1f51b4bb2f4ab72e46f61 Mon Sep 17 00:00:00 2001 From: Rok Mihevc Date: Wed, 7 Jan 2026 21:18:03 +0100 Subject: [PATCH 25/29] doctest --- python/pyarrow/config.pxi | 7 ++++--- python/pyarrow/util.py | 1 + 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/python/pyarrow/config.pxi b/python/pyarrow/config.pxi index f6cd1be4077..30281c6717c 100644 --- a/python/pyarrow/config.pxi +++ b/python/pyarrow/config.pxi @@ -104,13 +104,14 @@ def set_timezone_db_path(path): """ Configure the path to text timezone database on Windows. + .. deprecated:: 23.0.0 + This function is deprecated and will be removed in a future version. + PyArrow now uses the operating system's timezone database on Windows. + Parameters ---------- path : str Path to text timezone database. - .. deprecated:: 23.0.0 - This function is deprecated and will be removed in a future version. - PyArrow now uses the operating system's timezone database on Windows. """ warnings.warn( diff --git a/python/pyarrow/util.py b/python/pyarrow/util.py index 2c64be63fbf..0909f586f6f 100644 --- a/python/pyarrow/util.py +++ b/python/pyarrow/util.py @@ -250,6 +250,7 @@ def download_tzdata_on_windows(): r""" Download and extract latest IANA timezone database into the location expected by Arrow which is %USERPROFILE%\Downloads\tzdata. + .. deprecated:: 23.0.0 This function is deprecated and will be removed in a future version. PyArrow now uses the operating system's timezone database on Windows. From 3374e5c31c02ddf087f56f2125e026fda7792c34 Mon Sep 17 00:00:00 2001 From: Rok Mihevc Date: Tue, 13 Jan 2026 20:31:34 +0100 Subject: [PATCH 26/29] Assume gcc13+ for R --- .github/workflows/cpp.yml | 4 +-- cpp/src/arrow/config.cc | 4 +-- docs/source/cpp/build_system.rst | 12 ++++++--- docs/source/developers/cpp/windows.rst | 15 +++++++---- docs/source/python/install.rst | 37 +++++++++++++------------- python/pyarrow/tests/test_compute.py | 7 ++--- r/DESCRIPTION | 1 - r/R/arrow-package.R | 23 ---------------- r/R/arrowExports.R | 4 --- r/src/arrowExports.cpp | 14 ++-------- r/src/config.cpp | 16 ----------- 11 files changed, 47 insertions(+), 90 deletions(-) diff --git a/.github/workflows/cpp.yml b/.github/workflows/cpp.yml index f0d8d0b5767..02bc98a1cc3 100644 --- a/.github/workflows/cpp.yml +++ b/.github/workflows/cpp.yml @@ -370,9 +370,9 @@ jobs: if: matrix.msystem_upper == 'CLANG64' shell: bash run: | - # TODO(GH-48743): msys2 clang64 uses libc++ and vendored date.h library + # TODO(GH-48593): msys2 clang64 uses libc++ and vendored date.h library # which needs tzdata database to build Arrow with time zone support. - # https://github.com/apache/arrow/issues/48743 + # https://github.com/apache/arrow/issues/48593 ci/scripts/download_tz_database.sh - name: Download MinIO shell: msys2 {0} diff --git a/cpp/src/arrow/config.cc b/cpp/src/arrow/config.cc index 18b901aa94c..ad3a13657de 100644 --- a/cpp/src/arrow/config.cc +++ b/cpp/src/arrow/config.cc @@ -86,8 +86,8 @@ RuntimeInfo GetRuntimeInfo() { return info; } -// TODO(GH-48743): Remove when RTools upgrades to libstdc++ with std::chrono timezone -// support https://github.com/apache/arrow/issues/48743 +// TODO(GH-48593): Remove when libc++ supports std::chrono timezone +// https://github.com/apache/arrow/issues/48593 Status Initialize(const GlobalOptions& options) noexcept { if (options.timezone_db_path.has_value()) { #if !USE_OS_TZDB diff --git a/docs/source/cpp/build_system.rst b/docs/source/cpp/build_system.rst index 01dbe5e45f8..16b592de293 100644 --- a/docs/source/cpp/build_system.rst +++ b/docs/source/cpp/build_system.rst @@ -235,10 +235,14 @@ will manage consistent versions of Arrow and its dependencies. Runtime Dependencies ==================== -While Arrow uses the OS-provided timezone database on Linux and macOS, it -requires a user-provided database on Windows. You must download and extract the -text version of the IANA timezone database and add the Windows timezone mapping -XML. To download, you can use the following batch script: +On Linux and macOS, Arrow uses the OS-provided timezone database. On Windows, +Arrow uses the Windows timezone database when built with MSVC or recent MinGW GCC +(version 13+). However, when built with Clang/libc++ on Windows, Arrow requires +a user-provided IANA timezone database. + +To download the timezone database for libc++ builds, you must download and +extract the text version of the IANA timezone database and add the Windows +timezone mapping XML. To download, you can use the following batch script: .. literalinclude:: ../../../ci/appveyor-cpp-setup.bat :language: batch diff --git a/docs/source/developers/cpp/windows.rst b/docs/source/developers/cpp/windows.rst index 21bde92d0b7..26e00194d29 100644 --- a/docs/source/developers/cpp/windows.rst +++ b/docs/source/developers/cpp/windows.rst @@ -384,11 +384,16 @@ be defined, and similarly for ``-DARROW_FLIGHT_SQL=ON``. Downloading the Timezone Database ================================= -To run some of the compute unit tests on Windows, the IANA timezone database -and the Windows timezone mapping need to be downloaded first. See -:ref:`download-timezone-database` for download instructions. To set a non-default -path for the timezone database while running the unit tests, set the -``ARROW_TIMEZONE_DATABASE`` environment variable. +When building with MSVC or recent MinGW GCC (version 13+), Arrow uses the +Windows timezone database or the system-provided tzdata respectively, and +no additional setup is needed. + +When building with Clang/libc++ (e.g., MSYS2 Clang64), the IANA timezone +database and the Windows timezone mapping need to be downloaded first to run +some of the compute unit tests. See :ref:`download-timezone-database` for +download instructions. To set a non-default path for the timezone database +while running the unit tests, set the ``ARROW_TIMEZONE_DATABASE`` environment +variable. Replicating Appveyor Builds =========================== diff --git a/docs/source/python/install.rst b/docs/source/python/install.rst index c6f098ee20a..6c2a4d42142 100644 --- a/docs/source/python/install.rst +++ b/docs/source/python/install.rst @@ -83,35 +83,36 @@ and **pytz**, **dateutil** or **tzdata** package for timezones. tzdata on Windows ^^^^^^^^^^^^^^^^^ -While Arrow uses the OS-provided timezone database on Linux and macOS, it requires a -user-provided database on Windows. To download and extract the text version of +On Linux and macOS, Arrow uses the OS-provided timezone database. On Windows, +Arrow uses the Windows timezone database when built with MSVC or recent MinGW GCC +(version 13+), which covers most pre-built packages. No additional setup is needed +for these builds. + +However, when PyArrow is built with Clang/libc++ on Windows, a user-provided +IANA timezone database is required. To download and extract the text version of the IANA timezone database follow the instructions in the C++ -:ref:`download-timezone-database` or use pyarrow utility function -``pyarrow.util.download_tzdata_on_windows()`` that does the same. +:ref:`download-timezone-database` or use the (deprecated) pyarrow utility function +``pyarrow.util.download_tzdata_on_windows()``. By default, the timezone database will be detected at ``%USERPROFILE%\Downloads\tzdata``. If the database has been downloaded in a different location, you will need to set -a custom path to the database from Python: - -.. code-block:: python +a custom path to the database from Python using the (deprecated) +``pa.set_timezone_db_path("custom_path")`` function. - >>> import pyarrow as pa - >>> pa.set_timezone_db_path("custom_path") - -You may encounter problems writing datetime data to an ORC file if you install -pyarrow with pip. One possible solution to fix this problem: +.. note:: + You may encounter problems writing datetime data to an ORC file if you install + pyarrow with pip. One possible solution to fix this problem: 1. Install tzdata with ``pip install tzdata`` 2. Set the environment variable ``TZDIR = path\to\.venv\Lib\site-packages\tzdata\`` -You can find where ``tzdata`` is installed with the following python -command: + You can find where ``tzdata`` is installed with the following python command: -.. code-block:: python + .. code-block:: python - >>> import tzdata - >>> print(tzdata.__file__) - path\to\.venv\Lib\site-packages\tzdata\__init__.py + >>> import tzdata + >>> print(tzdata.__file__) + path\to\.venv\Lib\site-packages\tzdata\__init__.py .. _python-conda-differences: diff --git a/python/pyarrow/tests/test_compute.py b/python/pyarrow/tests/test_compute.py index 9223d5d649e..80b1710c271 100644 --- a/python/pyarrow/tests/test_compute.py +++ b/python/pyarrow/tests/test_compute.py @@ -2787,11 +2787,12 @@ def _check_temporal_rounding(ts, values, unit): np.testing.assert_array_equal(result, expected) -# TODO(GH-48743): Re-enable when GCC bug is fixed +# TODO(GH-48743): Re-enable when GCC libstdc++ bug is fixed # https://github.com/apache/arrow/issues/48743 # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=116110 -@pytest.mark.skipif(sys.platform == 'win32', - reason="Test triggers GCC timezone bug on Windows") +@pytest.mark.skipif( + sys.platform == 'win32' and pa.cpp_build_info.compiler_id == 'GNU', + reason="Test triggers GCC libstdc++ timezone bug on Windows") @pytest.mark.timezone_data @pytest.mark.parametrize('unit', ("nanosecond", "microsecond", "millisecond", "second", "minute", "hour", "day")) diff --git a/r/DESCRIPTION b/r/DESCRIPTION index 34d1f5a9f46..56cd089abff 100644 --- a/r/DESCRIPTION +++ b/r/DESCRIPTION @@ -70,7 +70,6 @@ Suggests: sys, testthat (>= 3.1.0), tibble, - tzdb, withr LinkingTo: cpp11 (>= 0.4.2) Collate: diff --git a/r/R/arrow-package.R b/r/R/arrow-package.R index 0d1a92e12ad..9e0bfe77974 100644 --- a/r/R/arrow-package.R +++ b/r/R/arrow-package.R @@ -152,11 +152,6 @@ s3_finalizer <- new.env(parent = emptyenv()) # Disable multithreading on Windows # See https://issues.apache.org/jira/browse/ARROW-8379 options(arrow.use_threads = FALSE) - - # TODO(GH-48743): Remove when RTools upgrades to libstdc++ with std::chrono timezone support - # https://github.com/apache/arrow/issues/48743 - # Try to set timezone database for MinGW builds - configure_tzdb() } # Set interrupt handlers @@ -173,24 +168,6 @@ s3_finalizer <- new.env(parent = emptyenv()) invisible() } -# TODO(GH-48743): Remove when RTools upgrades to libstdc++ with std::chrono timezone support -# https://github.com/apache/arrow/issues/48743 -configure_tzdb <- function() { - # This is needed on Windows MinGW builds where std::chrono timezone support - # is not available (older GCC versions). The tzdb R package provides the - # IANA timezone database. - if (requireNamespace("tzdb", quietly = TRUE)) { - tzdb::tzdb_initialize() - set_timezone_database(tzdb::tzdb_path("text")) - } else { - msg <- paste( - "The tzdb package is not installed.", - "Timezones will not be available to Arrow compute functions." - ) - packageStartupMessage(msg) - } -} - .onAttach <- function(libname, pkgname) { # Just to be extra safe, let's wrap this in a try(); # we don't want a failed startup message to prevent the package from loading diff --git a/r/R/arrowExports.R b/r/R/arrowExports.R index a8387526b25..3f4d9aa4a87 100644 --- a/r/R/arrowExports.R +++ b/r/R/arrowExports.R @@ -552,10 +552,6 @@ runtime_info <- function() { .Call(`_arrow_runtime_info`) } -set_timezone_database <- function(path) { - invisible(.Call(`_arrow_set_timezone_database`, path)) -} - csv___WriteOptions__initialize <- function(options) { .Call(`_arrow_csv___WriteOptions__initialize`, options) } diff --git a/r/src/arrowExports.cpp b/r/src/arrowExports.cpp index 73bf81f83bb..bcf351c120f 100644 --- a/r/src/arrowExports.cpp +++ b/r/src/arrowExports.cpp @@ -1382,15 +1382,6 @@ BEGIN_CPP11 return cpp11::as_sexp(runtime_info()); END_CPP11 } -// config.cpp -void set_timezone_database(cpp11::strings path); -extern "C" SEXP _arrow_set_timezone_database(SEXP path_sexp){ -BEGIN_CPP11 - arrow::r::Input::type path(path_sexp); - set_timezone_database(path); - return R_NilValue; -END_CPP11 -} // csv.cpp std::shared_ptr csv___WriteOptions__initialize(cpp11::list options); extern "C" SEXP _arrow_csv___WriteOptions__initialize(SEXP options_sexp){ @@ -5843,9 +5834,8 @@ static const R_CallMethodDef CallEntries[] = { { "_arrow_compute__GetFunctionNames", (DL_FUNC) &_arrow_compute__GetFunctionNames, 0}, { "_arrow_compute__Initialize", (DL_FUNC) &_arrow_compute__Initialize, 0}, { "_arrow_RegisterScalarUDF", (DL_FUNC) &_arrow_RegisterScalarUDF, 2}, - { "_arrow_build_info", (DL_FUNC) &_arrow_build_info, 0}, - { "_arrow_runtime_info", (DL_FUNC) &_arrow_runtime_info, 0}, - { "_arrow_set_timezone_database", (DL_FUNC) &_arrow_set_timezone_database, 1}, + { "_arrow_build_info", (DL_FUNC) &_arrow_build_info, 0}, + { "_arrow_runtime_info", (DL_FUNC) &_arrow_runtime_info, 0}, { "_arrow_csv___WriteOptions__initialize", (DL_FUNC) &_arrow_csv___WriteOptions__initialize, 1}, { "_arrow_csv___ReadOptions__initialize", (DL_FUNC) &_arrow_csv___ReadOptions__initialize, 1}, { "_arrow_csv___ParseOptions__initialize", (DL_FUNC) &_arrow_csv___ParseOptions__initialize, 1}, diff --git a/r/src/config.cpp b/r/src/config.cpp index 9d37263c5e3..1855f96ac6a 100644 --- a/r/src/config.cpp +++ b/r/src/config.cpp @@ -17,8 +17,6 @@ #include "./arrow_types.h" -#include - #include // [[arrow::export]] @@ -33,17 +31,3 @@ std::vector runtime_info() { auto info = arrow::GetRuntimeInfo(); return {info.simd_level, info.detected_simd_level}; } - -// TODO(GH-48743): Remove when RTools upgrades to libstdc++ with std::chrono timezone -// support https://github.com/apache/arrow/issues/48743 -// [[arrow::export]] -void set_timezone_database(cpp11::strings path) { - auto paths = cpp11::as_cpp>(path); - if (path.size() != 1) { - cpp11::stop("Must provide a single path to the timezone database."); - } - - arrow::GlobalOptions options; - options.timezone_db_path = std::make_optional(paths[0]); - arrow::StopIfNotOk(arrow::Initialize(options)); -} From ded386eb650aea2d246b759f2520963f7aa1d240 Mon Sep 17 00:00:00 2001 From: Rok Mihevc Date: Tue, 13 Jan 2026 21:21:17 +0100 Subject: [PATCH 27/29] deprecate some functions, set deprecation vesion to 24.0.0 --- cpp/src/arrow/config.cc | 2 ++ cpp/src/arrow/config.h | 15 ++++++++++++--- cpp/src/arrow/public_api_test.cc | 3 +++ cpp/src/arrow/testing/util.cc | 10 ++++------ cpp/src/arrow/testing/util.h | 16 ++++++++-------- python/pyarrow/config.pxi | 4 ++-- python/pyarrow/tests/test_misc.py | 11 ++++++----- python/pyarrow/util.py | 4 ++-- 8 files changed, 39 insertions(+), 26 deletions(-) diff --git a/cpp/src/arrow/config.cc b/cpp/src/arrow/config.cc index ad3a13657de..9624471101f 100644 --- a/cpp/src/arrow/config.cc +++ b/cpp/src/arrow/config.cc @@ -88,6 +88,7 @@ RuntimeInfo GetRuntimeInfo() { // TODO(GH-48593): Remove when libc++ supports std::chrono timezone // https://github.com/apache/arrow/issues/48593 +ARROW_SUPPRESS_DEPRECATION_WARNING Status Initialize(const GlobalOptions& options) noexcept { if (options.timezone_db_path.has_value()) { #if !USE_OS_TZDB @@ -106,5 +107,6 @@ Status Initialize(const GlobalOptions& options) noexcept { } return Status::OK(); } +ARROW_UNSUPPRESS_DEPRECATION_WARNING } // namespace arrow diff --git a/cpp/src/arrow/config.h b/cpp/src/arrow/config.h index 617d6c268b5..b747d084f15 100644 --- a/cpp/src/arrow/config.h +++ b/cpp/src/arrow/config.h @@ -22,6 +22,7 @@ #include "arrow/status.h" #include "arrow/util/config.h" // IWYU pragma: export +#include "arrow/util/macros.h" #include "arrow/util/visibility.h" namespace arrow { @@ -86,12 +87,20 @@ const BuildInfo& GetBuildInfo(); ARROW_EXPORT RuntimeInfo GetRuntimeInfo(); -struct GlobalOptions { - /// Path to text timezone database. This is only configurable on Windows, - /// which does not have a compatible OS timezone database. +/// \deprecated Deprecated in 24.0.0. This struct is only needed for +/// Windows builds with Clang/libc++ and will be removed once libc++ +/// supports std::chrono timezones. +struct ARROW_DEPRECATED("Deprecated in 24.0.0. Only needed for Clang/libc++ on Windows.") + GlobalOptions { + /// Path to text timezone database. This is only configurable on Windows + /// builds using Clang/libc++ which require the vendored date library. std::optional timezone_db_path; }; +/// \deprecated Deprecated in 24.0.0. This function is only needed for +/// Windows builds with Clang/libc++ and will be removed once libc++ +/// supports std::chrono timezones. +ARROW_DEPRECATED("Deprecated in 24.0.0. Only needed for Clang/libc++ on Windows.") ARROW_EXPORT Status Initialize(const GlobalOptions& options) noexcept; diff --git a/cpp/src/arrow/public_api_test.cc b/cpp/src/arrow/public_api_test.cc index ccc80dc93a5..25482847a07 100644 --- a/cpp/src/arrow/public_api_test.cc +++ b/cpp/src/arrow/public_api_test.cc @@ -122,6 +122,8 @@ TEST(Misc, BuildInfo) { ASSERT_THAT(info.full_so_version, ::testing::HasSubstr(info.so_version)); } +// TODO(GH-48593): Remove when libc++ supports std::chrono timezones. +ARROW_SUPPRESS_DEPRECATION_WARNING TEST(Misc, SetTimezoneConfig) { #ifndef _WIN32 GTEST_SKIP() << "Can only set the Timezone database on Windows"; @@ -163,5 +165,6 @@ TEST(Misc, SetTimezoneConfig) { ASSERT_OK(arrow::Initialize(options)); #endif } +ARROW_UNSUPPRESS_DEPRECATION_WARNING } // namespace arrow diff --git a/cpp/src/arrow/testing/util.cc b/cpp/src/arrow/testing/util.cc index 100c8bb3820..9ead3654e18 100644 --- a/cpp/src/arrow/testing/util.cc +++ b/cpp/src/arrow/testing/util.cc @@ -122,9 +122,8 @@ Status GetTestResourceRoot(std::string* out) { return Status::OK(); } -// TODO(GH-48593): Remove when we have full std::chrono support -// on windows. -// https://github.com/apache/arrow/issues/48593 +// TODO(GH-48593): Remove when libc++ supports std::chrono timezones. +ARROW_SUPPRESS_DEPRECATION_WARNING std::optional GetTestTimezoneDatabaseRoot() { const char* c_root = std::getenv("ARROW_TIMEZONE_DATABASE"); if (!c_root) { @@ -133,9 +132,7 @@ std::optional GetTestTimezoneDatabaseRoot() { return std::make_optional(std::string(c_root)); } -// TODO(GH-48593): Remove when we have full std::chrono support -// on windows. -// https://github.com/apache/arrow/issues/48593 +// TODO(GH-48593): Remove when libc++ supports std::chrono timezones. Status InitTestTimezoneDatabase() { auto maybe_tzdata = GetTestTimezoneDatabaseRoot(); // If missing, timezone database will default to %USERPROFILE%\Downloads\tzdata @@ -146,6 +143,7 @@ Status InitTestTimezoneDatabase() { ARROW_RETURN_NOT_OK(arrow::Initialize(options)); return Status::OK(); } +ARROW_UNSUPPRESS_DEPRECATION_WARNING int GetListenPort() { // Get a new available port number by binding a socket to an ephemeral port diff --git a/cpp/src/arrow/testing/util.h b/cpp/src/arrow/testing/util.h index 33ea0e37876..4069bd84281 100644 --- a/cpp/src/arrow/testing/util.h +++ b/cpp/src/arrow/testing/util.h @@ -112,17 +112,17 @@ UnionTypeFactories() { // Status ARROW_TESTING_EXPORT Status GetTestResourceRoot(std::string*); -// TODO(GH-48593): Remove when we have full std::chrono support -// on windows. -// https://github.com/apache/arrow/issues/48593 +/// \deprecated Deprecated in 24.0.0. Only needed for Clang/libc++ on Windows. +// TODO(GH-48593): Remove when libc++ supports std::chrono timezones. // Return the value of the ARROW_TIMEZONE_DATABASE environment variable +ARROW_DEPRECATED("Deprecated in 24.0.0. Only needed for Clang/libc++ on Windows.") ARROW_TESTING_EXPORT std::optional GetTestTimezoneDatabaseRoot(); -// TODO(GH-48593): Remove when we have full std::chrono support -// on windows. -// https://github.com/apache/arrow/issues/48593 -// Set the Timezone database based on the ARROW_TIMEZONE_DATABASE env variable -// This is only relevant on Windows, since other OSs have compatible databases built-in +/// \deprecated Deprecated in 24.0.0. Only needed for Clang/libc++ on Windows. +// TODO(GH-48593): Remove when libc++ supports std::chrono timezones. +// Set the Timezone database based on the ARROW_TIMEZONE_DATABASE env variable. +// Only relevant for Windows builds with Clang/libc++ which use vendored date library. +ARROW_DEPRECATED("Deprecated in 24.0.0. Only needed for Clang/libc++ on Windows.") ARROW_TESTING_EXPORT Status InitTestTimezoneDatabase(); // Get a TCP port number to listen on. This is a different number every time, diff --git a/python/pyarrow/config.pxi b/python/pyarrow/config.pxi index 30281c6717c..3af14f6c145 100644 --- a/python/pyarrow/config.pxi +++ b/python/pyarrow/config.pxi @@ -104,7 +104,7 @@ def set_timezone_db_path(path): """ Configure the path to text timezone database on Windows. - .. deprecated:: 23.0.0 + .. deprecated:: 24.0.0 This function is deprecated and will be removed in a future version. PyArrow now uses the operating system's timezone database on Windows. @@ -115,7 +115,7 @@ def set_timezone_db_path(path): """ warnings.warn( - "pyarrow.set_timezone_db_path is deprecated as of 23.0.0 " + "pyarrow.set_timezone_db_path is deprecated as of 24.0.0 " "and will be removed in a future version. PyArrow now uses the " "operating system's timezone database on Windows.", FutureWarning, diff --git a/python/pyarrow/tests/test_misc.py b/python/pyarrow/tests/test_misc.py index af017234703..5e229241f5b 100644 --- a/python/pyarrow/tests/test_misc.py +++ b/python/pyarrow/tests/test_misc.py @@ -140,11 +140,12 @@ def import_arrow(): # TODO(GH-48593): Remove when libc++ supports std::chrono timezone # https://github.com/apache/arrow/issues/48593 -@pytest.mark.skipif(sys.platform == "win32", - reason="Path to timezone database is not configurable " - "on non-Windows platforms") -def test_set_timezone_db_path_non_windows(): - # set_timezone_db_path raises an error on non-Windows platforms +@pytest.mark.skipif( + sys.platform == "win32" and pa.cpp_build_info.compiler_id == "Clang", + reason="Path to timezone database is configurable on Windows with Clang/libc++") +def test_set_timezone_db_path_raises_with_os_tzdb(): + # set_timezone_db_path raises an error when Arrow uses OS timezone database + # (non-Windows, or Windows with MSVC/GCC which use std::chrono) with pytest.raises(ArrowInvalid, match="Arrow was set to use OS timezone " "database at compile time"): diff --git a/python/pyarrow/util.py b/python/pyarrow/util.py index 0909f586f6f..4897b0893f5 100644 --- a/python/pyarrow/util.py +++ b/python/pyarrow/util.py @@ -251,13 +251,13 @@ def download_tzdata_on_windows(): Download and extract latest IANA timezone database into the location expected by Arrow which is %USERPROFILE%\Downloads\tzdata. - .. deprecated:: 23.0.0 + .. deprecated:: 24.0.0 This function is deprecated and will be removed in a future version. PyArrow now uses the operating system's timezone database on Windows. """ warnings.warn( - "pyarrow.util.download_tzdata_on_windows is deprecated as of 23.0.0 " + "pyarrow.util.download_tzdata_on_windows is deprecated as of 24.0.0 " "and will be removed in a future version. PyArrow now uses the " "operating system's timezone database on Windows.", FutureWarning, From 5b0abceb45dac6a2d1cb777cada9d7aa91337e81 Mon Sep 17 00:00:00 2001 From: Rok Mihevc Date: Tue, 13 Jan 2026 21:26:21 +0100 Subject: [PATCH 28/29] skip a test --- python/pyarrow/tests/test_compute.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/python/pyarrow/tests/test_compute.py b/python/pyarrow/tests/test_compute.py index 80b1710c271..981091f2077 100644 --- a/python/pyarrow/tests/test_compute.py +++ b/python/pyarrow/tests/test_compute.py @@ -2787,12 +2787,11 @@ def _check_temporal_rounding(ts, values, unit): np.testing.assert_array_equal(result, expected) -# TODO(GH-48743): Re-enable when GCC libstdc++ bug is fixed +# TODO(GH-48743): Re-enable when Windows timezone issues are resolved # https://github.com/apache/arrow/issues/48743 -# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=116110 @pytest.mark.skipif( - sys.platform == 'win32' and pa.cpp_build_info.compiler_id == 'GNU', - reason="Test triggers GCC libstdc++ timezone bug on Windows") + sys.platform == 'win32', + reason="Timezone rounding tests have platform-specific issues on Windows") @pytest.mark.timezone_data @pytest.mark.parametrize('unit', ("nanosecond", "microsecond", "millisecond", "second", "minute", "hour", "day")) From cecf3e6cf0f5b3023c6f691439955e38cd0725aa Mon Sep 17 00:00:00 2001 From: Rok Mihevc Date: Tue, 13 Jan 2026 21:47:44 +0100 Subject: [PATCH 29/29] skip a test --- python/pyarrow/tests/test_misc.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/python/pyarrow/tests/test_misc.py b/python/pyarrow/tests/test_misc.py index 5e229241f5b..15a5a0cc4b3 100644 --- a/python/pyarrow/tests/test_misc.py +++ b/python/pyarrow/tests/test_misc.py @@ -141,11 +141,10 @@ def import_arrow(): # TODO(GH-48593): Remove when libc++ supports std::chrono timezone # https://github.com/apache/arrow/issues/48593 @pytest.mark.skipif( - sys.platform == "win32" and pa.cpp_build_info.compiler_id == "Clang", - reason="Path to timezone database is configurable on Windows with Clang/libc++") + sys.platform == "win32", + reason="Timezone database path behavior varies by Windows build configuration") def test_set_timezone_db_path_raises_with_os_tzdb(): # set_timezone_db_path raises an error when Arrow uses OS timezone database - # (non-Windows, or Windows with MSVC/GCC which use std::chrono) with pytest.raises(ArrowInvalid, match="Arrow was set to use OS timezone " "database at compile time"):