From e67b13b37f7f21300e135b3ccb84ed6230114c71 Mon Sep 17 00:00:00 2001 From: Antoine Pitrou Date: Thu, 9 Sep 2021 14:38:37 +0200 Subject: [PATCH] ARROW-13842: [C++] Bump vendored date library This fixed an undefined behaviour issue on extreme timestamp values. --- cpp/src/arrow/pretty_print_test.cc | 7 +- cpp/src/arrow/vendored/datetime/README.md | 9 +- cpp/src/arrow/vendored/datetime/date.h | 466 +++++++++++++++---- cpp/src/arrow/vendored/datetime/ios.h | 4 +- cpp/src/arrow/vendored/datetime/ios.mm | 110 ++--- cpp/src/arrow/vendored/datetime/tz.cpp | 252 ++++++---- cpp/src/arrow/vendored/datetime/tz.h | 99 ++-- cpp/src/arrow/vendored/datetime/tz_private.h | 8 +- 8 files changed, 660 insertions(+), 295 deletions(-) diff --git a/cpp/src/arrow/pretty_print_test.cc b/cpp/src/arrow/pretty_print_test.cc index d01e5377d07..7ab38d2c056 100644 --- a/cpp/src/arrow/pretty_print_test.cc +++ b/cpp/src/arrow/pretty_print_test.cc @@ -451,9 +451,9 @@ TEST_F(TestPrettyPrint, DateTimeTypesWithOutOfRangeValues) { CheckPrimitive(timestamp(TimeUnit::MICRO), {0, 10}, is_valid, values, expected); } -#ifndef ARROW_UBSAN - // While the values below are legal and correct, they trigger an internal - // signed overflow inside the arrow_vendored::date library. + // Note that while the values below are legal and correct, they used to + // trigger an internal signed overflow inside the vendored "date" library + // (https://github.com/HowardHinnant/date/issues/696). { std::vector values = {min_int64, max_int64}; static const char* expected = R"expected([ @@ -463,7 +463,6 @@ TEST_F(TestPrettyPrint, DateTimeTypesWithOutOfRangeValues) { CheckPrimitive(timestamp(TimeUnit::NANO), {0, 10}, {true, true}, values, expected); } -#endif } TEST_F(TestPrettyPrint, StructTypeBasic) { diff --git a/cpp/src/arrow/vendored/datetime/README.md b/cpp/src/arrow/vendored/datetime/README.md index 811b6935ff2..cff53e7e307 100644 --- a/cpp/src/arrow/vendored/datetime/README.md +++ b/cpp/src/arrow/vendored/datetime/README.md @@ -17,5 +17,12 @@ copies or substantial portions of the Software. Sources for datetime are adapted from Howard Hinnant's date library (https://github.com/HowardHinnant/date). -Sources are taken from v3.0.0 release of the above project. +Sources are taken from changeset 2e19c006e2218447ee31f864191859517603f59f +of the above project. +The following changes are made: +- fix internal inclusion paths (from "date/xxx.h" to simply "xxx.h") +- enclose the `date` namespace inside the `arrow_vendored` namespace +- include a custom "visibility.h" header from "tz.cpp" for proper DLL + exports on Windows +- disable curl-based database downloading in "tz.h" diff --git a/cpp/src/arrow/vendored/datetime/date.h b/cpp/src/arrow/vendored/datetime/date.h index 6d0455a354b..3b38b263a8f 100644 --- a/cpp/src/arrow/vendored/datetime/date.h +++ b/cpp/src/arrow/vendored/datetime/date.h @@ -45,9 +45,7 @@ #include #include #include -#if !(__cplusplus >= 201402) -# include -#endif +#include #include #include #include @@ -140,7 +138,7 @@ namespace date #endif #ifndef HAS_UNCAUGHT_EXCEPTIONS -# if __cplusplus > 201703 || (defined(_MSVC_LANG) && _MSVC_LANG > 201703L) +# if __cplusplus >= 201703 || (defined(_MSVC_LANG) && _MSVC_LANG >= 201703L) # define HAS_UNCAUGHT_EXCEPTIONS 1 # else # define HAS_UNCAUGHT_EXCEPTIONS 0 @@ -1008,6 +1006,8 @@ struct is_clock inline constexpr bool is_clock_v = is_clock::value; + #endif // HAS_VOID_T //----------------+ @@ -1024,6 +1024,7 @@ class save_istream std::basic_ios& is_; CharT fill_; std::ios::fmtflags flags_; + std::streamsize precision_; std::streamsize width_; std::basic_ostream* tie_; std::locale loc_; @@ -1033,6 +1034,7 @@ class save_istream { is_.fill(fill_); is_.flags(flags_); + is_.precision(precision_); is_.width(width_); is_.imbue(loc_); is_.tie(tie_); @@ -1045,6 +1047,7 @@ class save_istream : is_(is) , fill_(is.fill()) , flags_(is.flags()) + , precision_(is.precision()) , width_(is.width(0)) , tie_(is.tie(nullptr)) , loc_(is.getloc()) @@ -1168,7 +1171,11 @@ struct no_overflow static const std::intmax_t d1 = R1::den / gcd_d1_d2; static const std::intmax_t n2 = R2::num / gcd_n1_n2; static const std::intmax_t d2 = R2::den / gcd_d1_d2; +#ifdef __cpp_constexpr static const std::intmax_t max = std::numeric_limits::max(); +#else + static const std::intmax_t max = LLONG_MAX; +#endif template struct mul // overflow == false @@ -1356,6 +1363,47 @@ using std::chrono::abs; #endif // HAS_CHRONO_ROUNDING +namespace detail +{ + +template +CONSTCD14 +inline +typename std::enable_if +< + !std::chrono::treat_as_floating_point::value, + To +>::type +round_i(const std::chrono::duration& d) +{ + return round(d); +} + +template +CONSTCD14 +inline +typename std::enable_if +< + std::chrono::treat_as_floating_point::value, + To +>::type +round_i(const std::chrono::duration& d) +{ + return d; +} + +template +CONSTCD11 +inline +std::chrono::time_point +round_i(const std::chrono::time_point& tp) +{ + using std::chrono::time_point; + return time_point{round_i(tp.time_since_epoch())}; +} + +} // detail + // trunc towards zero template CONSTCD11 @@ -1460,16 +1508,29 @@ operator-(const day& x, const days& y) NOEXCEPT return x + -y; } +namespace detail +{ + template -inline std::basic_ostream& -operator<<(std::basic_ostream& os, const day& d) +low_level_fmt(std::basic_ostream& os, const day& d) { detail::save_ostream _(os); os.fill('0'); os.flags(std::ios::dec | std::ios::right); os.width(2); os << static_cast(d); + return os; +} + +} // namespace detail + +template +inline +std::basic_ostream& +operator<<(std::basic_ostream& os, const day& d) +{ + detail::low_level_fmt(os, d); if (!d.ok()) os << " is not a valid day"; return os; @@ -1587,10 +1648,12 @@ operator-(const month& x, const months& y) NOEXCEPT return x + -y; } +namespace detail +{ + template -inline std::basic_ostream& -operator<<(std::basic_ostream& os, const month& m) +low_level_fmt(std::basic_ostream& os, const month& m) { if (m.ok()) { @@ -1598,7 +1661,20 @@ operator<<(std::basic_ostream& os, const month& m) os << format(os.getloc(), fmt, m); } else - os << static_cast(m) << " is not a valid month"; + os << static_cast(m); + return os; +} + +} // namespace detail + +template +inline +std::basic_ostream& +operator<<(std::basic_ostream& os, const month& m) +{ + detail::low_level_fmt(os, m); + if (!m.ok()) + os << " is not a valid month"; return os; } @@ -1712,10 +1788,12 @@ operator-(const year& x, const years& y) NOEXCEPT return year{static_cast(x) - y.count()}; } +namespace detail +{ + template -inline std::basic_ostream& -operator<<(std::basic_ostream& os, const year& y) +low_level_fmt(std::basic_ostream& os, const year& y) { detail::save_ostream _(os); os.fill('0'); @@ -1723,6 +1801,17 @@ operator<<(std::basic_ostream& os, const year& y) os.width(4 + (y < year{0})); os.imbue(std::locale::classic()); os << static_cast(y); + return os; +} + +} // namespace detail + +template +inline +std::basic_ostream& +operator<<(std::basic_ostream& os, const year& y) +{ + detail::low_level_fmt(os, y); if (!y.ok()) os << " is not a valid year"; return os; @@ -1848,10 +1937,12 @@ operator-(const weekday& x, const days& y) NOEXCEPT return x + -y; } +namespace detail +{ + template -inline std::basic_ostream& -operator<<(std::basic_ostream& os, const weekday& wd) +low_level_fmt(std::basic_ostream& os, const weekday& wd) { if (wd.ok()) { @@ -1859,7 +1950,20 @@ operator<<(std::basic_ostream& os, const weekday& wd) os << format(fmt, wd); } else - os << static_cast(wd.wd_) << " is not a valid weekday"; + os << wd.c_encoding(); + return os; +} + +} // namespace detail + +template +inline +std::basic_ostream& +operator<<(std::basic_ostream& os, const weekday& wd) +{ + detail::low_level_fmt(os, wd); + if (!wd.ok()) + os << " is not a valid weekday"; return os; } @@ -1968,15 +2072,26 @@ weekday_indexed::weekday_indexed(const date::weekday& wd, unsigned index) NOEXCE # pragma GCC diagnostic pop #endif // __GNUC__ +namespace detail +{ + +template +std::basic_ostream& +low_level_fmt(std::basic_ostream& os, const weekday_indexed& wdi) +{ + return low_level_fmt(os, wdi.weekday()) << '[' << wdi.index() << ']'; +} + +} // namespace detail + template inline std::basic_ostream& operator<<(std::basic_ostream& os, const weekday_indexed& wdi) { - os << wdi.weekday() << '[' << wdi.index(); - if (!(1 <= wdi.index() && wdi.index() <= 5)) - os << " is not a valid index"; - os << ']'; + detail::low_level_fmt(os, wdi); + if (!wdi.ok()) + os << " is not a valid weekday_indexed"; return os; } @@ -2026,12 +2141,27 @@ operator!=(const weekday_last& x, const weekday_last& y) NOEXCEPT return !(x == y); } +namespace detail +{ + +template +std::basic_ostream& +low_level_fmt(std::basic_ostream& os, const weekday_last& wdl) +{ + return low_level_fmt(os, wdl.weekday()) << "[last]"; +} + +} // namespace detail + template inline std::basic_ostream& operator<<(std::basic_ostream& os, const weekday_last& wdl) { - return os << wdl.weekday() << "[last]"; + detail::low_level_fmt(os, wdl); + if (!wdl.ok()) + os << " is not a valid weekday_last"; + return os; } CONSTCD11 @@ -2206,12 +2336,28 @@ operator-(const year_month& ym, const years& dy) NOEXCEPT return ym + -dy; } +namespace detail +{ + +template +std::basic_ostream& +low_level_fmt(std::basic_ostream& os, const year_month& ym) +{ + low_level_fmt(os, ym.year()) << '/'; + return low_level_fmt(os, ym.month()); +} + +} // namespace detail + template inline std::basic_ostream& operator<<(std::basic_ostream& os, const year_month& ym) { - return os << ym.year() << '/' << ym.month(); + detail::low_level_fmt(os, ym); + if (!ym.ok()) + os << " is not a valid year_month"; + return os; } // month_day @@ -2291,12 +2437,28 @@ operator>=(const month_day& x, const month_day& y) NOEXCEPT return !(x < y); } +namespace detail +{ + +template +std::basic_ostream& +low_level_fmt(std::basic_ostream& os, const month_day& md) +{ + low_level_fmt(os, md.month()) << '/'; + return low_level_fmt(os, md.day()); +} + +} // namespace detail + template inline std::basic_ostream& operator<<(std::basic_ostream& os, const month_day& md) { - return os << md.month() << '/' << md.day(); + detail::low_level_fmt(os, md); + if (!md.ok()) + os << " is not a valid month_day"; + return os; } // month_day_last @@ -2353,12 +2515,27 @@ operator>=(const month_day_last& x, const month_day_last& y) NOEXCEPT return !(x < y); } +namespace detail +{ + +template +std::basic_ostream& +low_level_fmt(std::basic_ostream& os, const month_day_last& mdl) +{ + return low_level_fmt(os, mdl.month()) << "/last"; +} + +} // namespace detail + template inline std::basic_ostream& operator<<(std::basic_ostream& os, const month_day_last& mdl) { - return os << mdl.month() << "/last"; + detail::low_level_fmt(os, mdl); + if (!mdl.ok()) + os << " is not a valid month_day_last"; + return os; } // month_weekday @@ -2405,12 +2582,28 @@ operator!=(const month_weekday& x, const month_weekday& y) NOEXCEPT return !(x == y); } +namespace detail +{ + +template +std::basic_ostream& +low_level_fmt(std::basic_ostream& os, const month_weekday& mwd) +{ + low_level_fmt(os, mwd.month()) << '/'; + return low_level_fmt(os, mwd.weekday_indexed()); +} + +} // namespace detail + template inline std::basic_ostream& operator<<(std::basic_ostream& os, const month_weekday& mwd) { - return os << mwd.month() << '/' << mwd.weekday_indexed(); + detail::low_level_fmt(os, mwd); + if (!mwd.ok()) + os << " is not a valid month_weekday"; + return os; } // month_weekday_last @@ -2457,12 +2650,28 @@ operator!=(const month_weekday_last& x, const month_weekday_last& y) NOEXCEPT return !(x == y); } +namespace detail +{ + +template +std::basic_ostream& +low_level_fmt(std::basic_ostream& os, const month_weekday_last& mwdl) +{ + low_level_fmt(os, mwdl.month()) << '/'; + return low_level_fmt(os, mwdl.weekday_last()); +} + +} // namespace detail + template inline std::basic_ostream& operator<<(std::basic_ostream& os, const month_weekday_last& mwdl) { - return os << mwdl.month() << '/' << mwdl.weekday_last(); + detail::low_level_fmt(os, mwdl); + if (!mwdl.ok()) + os << " is not a valid month_weekday_last"; + return os; } // year_month_day_last @@ -2612,12 +2821,28 @@ operator>=(const year_month_day_last& x, const year_month_day_last& y) NOEXCEPT return !(x < y); } +namespace detail +{ + +template +std::basic_ostream& +low_level_fmt(std::basic_ostream& os, const year_month_day_last& ymdl) +{ + low_level_fmt(os, ymdl.year()) << '/'; + return low_level_fmt(os, ymdl.month_day_last()); +} + +} // namespace detail + template inline std::basic_ostream& operator<<(std::basic_ostream& os, const year_month_day_last& ymdl) { - return os << ymdl.year() << '/' << ymdl.month_day_last(); + detail::low_level_fmt(os, ymdl); + if (!ymdl.ok()) + os << " is not a valid year_month_day_last"; + return os; } template @@ -2848,12 +3073,13 @@ operator<<(std::basic_ostream& os, const year_month_day& ymd) os.fill('0'); os.flags(std::ios::dec | std::ios::right); os.imbue(std::locale::classic()); - os << ymd.year() << '-'; + os << static_cast(ymd.year()) << '-'; os.width(2); os << static_cast(ymd.month()) << '-'; - os << ymd.day(); + os.width(2); + os << static_cast(ymd.day()); if (!ymd.ok()) - os << " is not a valid date"; + os << " is not a valid year_month_day"; return os; } @@ -3089,8 +3315,12 @@ inline std::basic_ostream& operator<<(std::basic_ostream& os, const year_month_weekday& ymwdi) { - return os << ymwdi.year() << '/' << ymwdi.month() - << '/' << ymwdi.weekday_indexed(); + detail::low_level_fmt(os, ymwdi.year()) << '/'; + detail::low_level_fmt(os, ymwdi.month()) << '/'; + detail::low_level_fmt(os, ymwdi.weekday_indexed()); + if (!ymwdi.ok()) + os << " is not a valid year_month_weekday"; + return os; } template @@ -3266,7 +3496,12 @@ inline std::basic_ostream& operator<<(std::basic_ostream& os, const year_month_weekday_last& ymwdl) { - return os << ymwdl.year() << '/' << ymwdl.month() << '/' << ymwdl.weekday_last(); + detail::low_level_fmt(os, ymwdl.year()) << '/'; + detail::low_level_fmt(os, ymwdl.month()) << '/'; + detail::low_level_fmt(os, ymwdl.weekday_last()); + if (!ymwdl.ok()) + os << " is not a valid year_month_weekday_last"; + return os; } template @@ -3683,11 +3918,12 @@ struct undocumented {explicit undocumented() = default;}; // Example: width<4>::value == 2 // Example: width<10>::value == 1 // Example: width<1000>::value == 3 -template +template struct width { - static CONSTDATA unsigned value = 1 + width::value; + static_assert(d > 0, "width called with zero denominator"); + static CONSTDATA unsigned value = 1 + width::value; }; template @@ -3716,9 +3952,10 @@ class decimal_format_seconds { using CT = typename std::common_type::type; using rep = typename CT::rep; + static unsigned CONSTDATA trial_width = + detail::width::value; public: - static unsigned constexpr width = detail::width::value < 19 ? - detail::width::value : 6u; + static unsigned CONSTDATA width = trial_width < 19 ? trial_width : 6u; using precision = std::chrono::duration::value>>; @@ -3767,6 +4004,7 @@ class decimal_format_seconds std::chrono::duration d = s_ + sub_s_; if (d < std::chrono::seconds{10}) os << '0'; + os.precision(width+6); os << std::fixed << d.count(); return os; } @@ -3957,9 +4195,7 @@ make24(std::chrono::hours h, bool is_pm) NOEXCEPT template using time_of_day = hh_mm_ss; -template ::value>::type> +template CONSTCD11 inline hh_mm_ss> @@ -3972,8 +4208,7 @@ template inline typename std::enable_if < - !std::chrono::treat_as_floating_point::value && - std::ratio_less::value + std::ratio_less::value , std::basic_ostream& >::type operator<<(std::basic_ostream& os, const sys_time& tp) @@ -4554,7 +4789,11 @@ struct fields hh_mm_ss tod{}; bool has_tod = false; +#if !defined(__clang__) && defined(__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__ <= 409) + fields() : ymd{nanyear/0/0}, wd{8u}, tod{}, has_tod{false} {} +#else fields() = default; +#endif fields(year_month_day ymd_) : ymd(ymd_) {} fields(weekday wd_) : wd(wd_) {} @@ -4736,7 +4975,7 @@ scan_keyword(std::basic_istream& is, FwdIter kb, FwdIter ke) is.setstate(std::ios::eofbit); break; } - auto c = static_cast(toupper(ic)); + auto c = static_cast(toupper(static_cast(ic))); bool consume = false; // For each keyword which might match, see if the indx character is c // If a match if found, consume c @@ -4749,7 +4988,7 @@ scan_keyword(std::basic_istream& is, FwdIter kb, FwdIter ke) { if (*st == might_match) { - if (c == static_cast(toupper((*ky)[indx]))) + if (c == static_cast(toupper(static_cast((*ky)[indx])))) { consume = true; if (ky->size() == indx+1) @@ -5976,8 +6215,13 @@ to_stream(std::basic_ostream& os, const CharT* fmt, const std::chrono::seconds* offset_sec = nullptr) { using CT = typename std::common_type::type; - auto ld = floor(tp); - fields fds{year_month_day{ld}, hh_mm_ss{tp-local_seconds{ld}}}; + auto ld = std::chrono::time_point_cast(tp); + fields fds; + if (ld <= tp) + fds = fields{year_month_day{ld}, hh_mm_ss{tp-local_seconds{ld}}}; + else + fds = fields{year_month_day{ld - days{1}}, + hh_mm_ss{days{1} - (local_seconds{ld} - tp)}}; return to_stream(os, fmt, fds, abbrev, offset_sec); } @@ -5990,8 +6234,13 @@ to_stream(std::basic_ostream& os, const CharT* fmt, using CT = typename std::common_type::type; const std::string abbrev("UTC"); CONSTDATA seconds offset{0}; - auto sd = floor(tp); - fields fds{year_month_day{sd}, hh_mm_ss{tp-sys_seconds{sd}}}; + auto sd = std::chrono::time_point_cast(tp); + fields fds; + if (sd <= tp) + fds = fields{year_month_day{sd}, hh_mm_ss{tp-sys_seconds{sd}}}; + else + fds = fields{year_month_day{sd - days{1}}, + hh_mm_ss{days{1} - (sys_seconds{sd} - tp)}}; return to_stream(os, fmt, fds, &abbrev, &offset); } @@ -6124,9 +6373,16 @@ long double read_long_double(std::basic_istream& is, unsigned m = 1, unsigned M = 10) { unsigned count = 0; + unsigned fcount = 0; + unsigned long long i = 0; + unsigned long long f = 0; + bool parsing_fraction = false; +#if ONLY_C_LOCALE + typename Traits::int_type decimal_point = '.'; +#else auto decimal_point = Traits::to_int_type( std::use_facet>(is.getloc()).decimal_point()); - std::string buf; +#endif while (true) { auto ic = is.peek(); @@ -6134,18 +6390,25 @@ read_long_double(std::basic_istream& is, unsigned m = 1, unsigned break; if (Traits::eq_int_type(ic, decimal_point)) { - buf += '.'; decimal_point = Traits::eof(); - is.get(); + parsing_fraction = true; } else { auto c = static_cast(Traits::to_char_type(ic)); if (!('0' <= c && c <= '9')) break; - buf += c; - (void)is.get(); + if (!parsing_fraction) + { + i = 10*i + static_cast(c - '0'); + } + else + { + f = 10*f + static_cast(c - '0'); + ++fcount; + } } + (void)is.get(); if (++count == M) break; } @@ -6154,7 +6417,7 @@ read_long_double(std::basic_istream& is, unsigned m = 1, unsigned is.setstate(std::ios::failbit); return 0; } - return std::stold(buf); + return static_cast(i) + static_cast(f)/std::pow(10.L, fcount); } struct rs @@ -6311,6 +6574,7 @@ from_stream(std::basic_istream& is, const CharT* fmt, using std::chrono::seconds; using std::chrono::minutes; using std::chrono::hours; + using detail::round_i; typename std::basic_istream::sentry ok{is, true}; if (ok) { @@ -6326,7 +6590,7 @@ from_stream(std::basic_istream& is, const CharT* fmt, auto modified = CharT{}; auto width = -1; - CONSTDATA int not_a_year = numeric_limits::min(); + CONSTDATA int not_a_year = numeric_limits::min(); CONSTDATA int not_a_2digit_year = 100; CONSTDATA int not_a_century = not_a_year / 100; CONSTDATA int not_a_month = 0; @@ -6519,12 +6783,12 @@ from_stream(std::basic_istream& is, const CharT* fmt, CONSTDATA auto w = Duration::period::den == 1 ? 2 : 3 + dfs::width; int tH; int tM; - long double S; + long double S{}; read(is, ru{tH, 1, 2}, CharT{':'}, ru{tM, 1, 2}, CharT{':'}, rld{S, 1, w}); checked_set(H, tH, not_a_hour, is); checked_set(M, tM, not_a_minute, is); - checked_set(s, round(duration{S}), + checked_set(s, round_i(duration{S}), not_a_second, is); ws(is); int tY = not_a_year; @@ -6599,12 +6863,12 @@ from_stream(std::basic_istream& is, const CharT* fmt, CONSTDATA auto w = Duration::period::den == 1 ? 2 : 3 + dfs::width; int tH = not_a_hour; int tM = not_a_minute; - long double S; + long double S{}; read(is, ru{tH, 1, 2}, CharT{':'}, ru{tM, 1, 2}, CharT{':'}, rld{S, 1, w}); checked_set(H, tH, not_a_hour, is); checked_set(M, tM, not_a_minute, is); - checked_set(s, round(duration{S}), + checked_set(s, round_i(duration{S}), not_a_second, is); #endif } @@ -6920,7 +7184,7 @@ from_stream(std::basic_istream& is, const CharT* fmt, #else auto nm = detail::ampm_names(); auto i = detail::scan_keyword(is, nm.first, nm.second) - nm.first; - tp = i; + tp = static_cast(i); #endif checked_set(p, tp, not_a_ampm, is); } @@ -6954,14 +7218,14 @@ from_stream(std::basic_istream& is, const CharT* fmt, // "%I:%M:%S %p" using dfs = detail::decimal_format_seconds; CONSTDATA auto w = Duration::period::den == 1 ? 2 : 3 + dfs::width; - long double S; + long double S{}; int tI = not_a_hour_12_value; int tM = not_a_minute; read(is, ru{tI, 1, 2}, CharT{':'}, ru{tM, 1, 2}, CharT{':'}, rld{S, 1, w}); checked_set(I, tI, not_a_hour_12_value, is); checked_set(M, tM, not_a_minute, is); - checked_set(s, round(duration{S}), + checked_set(s, round_i(duration{S}), not_a_second, is); ws(is); auto nm = detail::ampm_names(); @@ -7010,9 +7274,9 @@ from_stream(std::basic_istream& is, const CharT* fmt, { using dfs = detail::decimal_format_seconds; CONSTDATA auto w = Duration::period::den == 1 ? 2 : 3 + dfs::width; - long double S; + long double S{}; read(is, rld{S, 1, width == -1 ? w : static_cast(width)}); - checked_set(s, round(duration{S}), + checked_set(s, round_i(duration{S}), not_a_second, is); } #if !ONLY_C_LOCALE @@ -7044,12 +7308,12 @@ from_stream(std::basic_istream& is, const CharT* fmt, CONSTDATA auto w = Duration::period::den == 1 ? 2 : 3 + dfs::width; int tH = not_a_hour; int tM = not_a_minute; - long double S; + long double S{}; read(is, ru{tH, 1, 2}, CharT{':'}, ru{tM, 1, 2}, CharT{':'}, rld{S, 1, w}); checked_set(H, tH, not_a_hour, is); checked_set(M, tM, not_a_minute, is); - checked_set(s, round(duration{S}), + checked_set(s, round_i(duration{S}), not_a_second, is); } else @@ -7507,11 +7771,11 @@ from_stream(std::basic_istream& is, const CharT* fmt, if (j != not_a_doy && Y != not_a_year) { auto ymd_trial = year_month_day{local_days(year{Y}/1/1) + days{j-1}}; - if (m == 0) + if (m == not_a_month) m = static_cast(static_cast(ymd_trial.month())); else if (month(static_cast(m)) != ymd_trial.month()) goto broken; - if (d == 0) + if (d == not_a_day) d = static_cast(static_cast(ymd_trial.day())); else if (day(static_cast(d)) != ymd_trial.day()) goto broken; @@ -7597,6 +7861,8 @@ from_stream(std::basic_istream& is, const CharT* fmt, goto broken; } } + else // I is ambiguous, AM or PM? + goto broken; } } if (H != not_a_hour) @@ -7641,7 +7907,7 @@ from_stream(std::basic_istream& is, const CharT* fmt, year& y, { using CT = std::chrono::seconds; fields fds{}; - from_stream(is, fmt, fds, abbrev, offset); + date::from_stream(is, fmt, fds, abbrev, offset); if (!fds.ymd.year().ok()) is.setstate(std::ios::failbit); if (!is.fail()) @@ -7657,7 +7923,7 @@ from_stream(std::basic_istream& is, const CharT* fmt, month& m, { using CT = std::chrono::seconds; fields fds{}; - from_stream(is, fmt, fds, abbrev, offset); + date::from_stream(is, fmt, fds, abbrev, offset); if (!fds.ymd.month().ok()) is.setstate(std::ios::failbit); if (!is.fail()) @@ -7673,7 +7939,7 @@ from_stream(std::basic_istream& is, const CharT* fmt, day& d, { using CT = std::chrono::seconds; fields fds{}; - from_stream(is, fmt, fds, abbrev, offset); + date::from_stream(is, fmt, fds, abbrev, offset); if (!fds.ymd.day().ok()) is.setstate(std::ios::failbit); if (!is.fail()) @@ -7689,7 +7955,7 @@ from_stream(std::basic_istream& is, const CharT* fmt, weekday& wd { using CT = std::chrono::seconds; fields fds{}; - from_stream(is, fmt, fds, abbrev, offset); + date::from_stream(is, fmt, fds, abbrev, offset); if (!fds.wd.ok()) is.setstate(std::ios::failbit); if (!is.fail()) @@ -7705,7 +7971,7 @@ from_stream(std::basic_istream& is, const CharT* fmt, year_month& { using CT = std::chrono::seconds; fields fds{}; - from_stream(is, fmt, fds, abbrev, offset); + date::from_stream(is, fmt, fds, abbrev, offset); if (!fds.ymd.month().ok()) is.setstate(std::ios::failbit); if (!is.fail()) @@ -7721,7 +7987,7 @@ from_stream(std::basic_istream& is, const CharT* fmt, month_day& { using CT = std::chrono::seconds; fields fds{}; - from_stream(is, fmt, fds, abbrev, offset); + date::from_stream(is, fmt, fds, abbrev, offset); if (!fds.ymd.month().ok() || !fds.ymd.day().ok()) is.setstate(std::ios::failbit); if (!is.fail()) @@ -7737,7 +8003,7 @@ from_stream(std::basic_istream& is, const CharT* fmt, { using CT = std::chrono::seconds; fields fds{}; - from_stream(is, fmt, fds, abbrev, offset); + date::from_stream(is, fmt, fds, abbrev, offset); if (!fds.ymd.ok()) is.setstate(std::ios::failbit); if (!is.fail()) @@ -7752,15 +8018,16 @@ from_stream(std::basic_istream& is, const CharT* fmt, std::chrono::minutes* offset = nullptr) { using CT = typename std::common_type::type; + using detail::round_i; std::chrono::minutes offset_local{}; auto offptr = offset ? offset : &offset_local; fields fds{}; fds.has_tod = true; - from_stream(is, fmt, fds, abbrev, offptr); + date::from_stream(is, fmt, fds, abbrev, offptr); if (!fds.ymd.ok() || !fds.tod.in_conventional_range()) is.setstate(std::ios::failbit); if (!is.fail()) - tp = round(sys_days(fds.ymd) - *offptr + fds.tod.to_duration()); + tp = round_i(sys_days(fds.ymd) - *offptr + fds.tod.to_duration()); return is; } @@ -7771,13 +8038,14 @@ from_stream(std::basic_istream& is, const CharT* fmt, std::chrono::minutes* offset = nullptr) { using CT = typename std::common_type::type; + using detail::round_i; fields fds{}; fds.has_tod = true; - from_stream(is, fmt, fds, abbrev, offset); + date::from_stream(is, fmt, fds, abbrev, offset); if (!fds.ymd.ok() || !fds.tod.in_conventional_range()) is.setstate(std::ios::failbit); if (!is.fail()) - tp = round(local_seconds{local_days(fds.ymd)} + fds.tod.to_duration()); + tp = round_i(local_seconds{local_days(fds.ymd)} + fds.tod.to_duration()); return is; } @@ -7790,12 +8058,13 @@ from_stream(std::basic_istream& is, const CharT* fmt, { using Duration = std::chrono::duration; using CT = typename std::common_type::type; + using detail::round_i; fields fds{}; - from_stream(is, fmt, fds, abbrev, offset); + date::from_stream(is, fmt, fds, abbrev, offset); if (!fds.has_tod) is.setstate(std::ios::failbit); if (!is.fail()) - d = std::chrono::duration_cast(fds.tod.to_duration()); + d = round_i(fds.tod.to_duration()); return is; } @@ -7818,6 +8087,25 @@ struct parse_manip , offset_(offset) {} +#if HAS_STRING_VIEW + parse_manip(const CharT* format, Parsable& tp, + std::basic_string* abbrev = nullptr, + std::chrono::minutes* offset = nullptr) + : format_(format) + , tp_(tp) + , abbrev_(abbrev) + , offset_(offset) + {} + + parse_manip(std::basic_string_view format, Parsable& tp, + std::basic_string* abbrev = nullptr, + std::chrono::minutes* offset = nullptr) + : format_(format) + , tp_(tp) + , abbrev_(abbrev) + , offset_(offset) + {} +#endif // HAS_STRING_VIEW }; template @@ -7825,14 +8113,14 @@ std::basic_istream& operator>>(std::basic_istream& is, const parse_manip& x) { - return from_stream(is, x.format_.c_str(), x.tp_, x.abbrev_, x.offset_); + return date::from_stream(is, x.format_.c_str(), x.tp_, x.abbrev_, x.offset_); } template inline auto parse(const std::basic_string& format, Parsable& tp) - -> decltype(from_stream(std::declval&>(), + -> decltype(date::from_stream(std::declval&>(), format.c_str(), tp), parse_manip{format, tp}) { @@ -7844,7 +8132,7 @@ inline auto parse(const std::basic_string& format, Parsable& tp, std::basic_string& abbrev) - -> decltype(from_stream(std::declval&>(), + -> decltype(date::from_stream(std::declval&>(), format.c_str(), tp, &abbrev), parse_manip{format, tp, &abbrev}) { @@ -7856,7 +8144,7 @@ inline auto parse(const std::basic_string& format, Parsable& tp, std::chrono::minutes& offset) - -> decltype(from_stream(std::declval&>(), + -> decltype(date::from_stream(std::declval&>(), format.c_str(), tp, std::declval*>(), &offset), @@ -7870,7 +8158,7 @@ inline auto parse(const std::basic_string& format, Parsable& tp, std::basic_string& abbrev, std::chrono::minutes& offset) - -> decltype(from_stream(std::declval&>(), + -> decltype(date::from_stream(std::declval&>(), format.c_str(), tp, &abbrev, &offset), parse_manip{format, tp, &abbrev, &offset}) { @@ -7883,7 +8171,7 @@ template inline auto parse(const CharT* format, Parsable& tp) - -> decltype(from_stream(std::declval&>(), format, tp), + -> decltype(date::from_stream(std::declval&>(), format, tp), parse_manip{format, tp}) { return {format, tp}; @@ -7893,7 +8181,7 @@ template inline auto parse(const CharT* format, Parsable& tp, std::basic_string& abbrev) - -> decltype(from_stream(std::declval&>(), format, + -> decltype(date::from_stream(std::declval&>(), format, tp, &abbrev), parse_manip{format, tp, &abbrev}) { @@ -7904,7 +8192,7 @@ template inline auto parse(const CharT* format, Parsable& tp, std::chrono::minutes& offset) - -> decltype(from_stream(std::declval&>(), format, + -> decltype(date::from_stream(std::declval&>(), format, tp, std::declval*>(), &offset), parse_manip{format, tp, nullptr, &offset}) { @@ -7916,7 +8204,7 @@ inline auto parse(const CharT* format, Parsable& tp, std::basic_string& abbrev, std::chrono::minutes& offset) - -> decltype(from_stream(std::declval&>(), format, + -> decltype(date::from_stream(std::declval&>(), format, tp, &abbrev, &offset), parse_manip{format, tp, &abbrev, &offset}) { diff --git a/cpp/src/arrow/vendored/datetime/ios.h b/cpp/src/arrow/vendored/datetime/ios.h index 46567d69b18..acad28d13b5 100644 --- a/cpp/src/arrow/vendored/datetime/ios.h +++ b/cpp/src/arrow/vendored/datetime/ios.h @@ -38,10 +38,10 @@ { namespace iOSUtils { - + std::string get_tzdata_path(); std::string get_current_timezone(); - + } // namespace iOSUtils } // namespace date } // namespace arrow_vendored diff --git a/cpp/src/arrow/vendored/datetime/ios.mm b/cpp/src/arrow/vendored/datetime/ios.mm index 18c521201d3..22b7ce6c30b 100644 --- a/cpp/src/arrow/vendored/datetime/ios.mm +++ b/cpp/src/arrow/vendored/datetime/ios.mm @@ -53,7 +53,7 @@ { namespace iOSUtils { - + struct TarInfo { char objType; @@ -62,14 +62,14 @@ size_t blocksContentSize; // adjusted size to 512 bytes blocks bool success; }; - + std::string convertCFStringRefPathToCStringPath(CFStringRef ref); bool extractTzdata(CFURLRef homeUrl, CFURLRef archiveUrl, std::string destPath); TarInfo getTarObjectInfo(std::ifstream &readStream); std::string getTarObject(std::ifstream &readStream, int64_t size); bool writeFile(const std::string &tzdataPath, const std::string &fileName, const std::string &data, size_t realContentSize); - + std::string get_current_timezone() { @@ -77,18 +77,18 @@ bool writeFile(const std::string &tzdataPath, const std::string &fileName, CFStringRef tzNameRef = CFTimeZoneGetName(tzRef); CFIndex bufferSize = CFStringGetLength(tzNameRef) + 1; char buffer[bufferSize]; - + if (CFStringGetCString(tzNameRef, buffer, bufferSize, kCFStringEncodingUTF8)) { CFRelease(tzRef); return std::string(buffer); } - + CFRelease(tzRef); - + return ""; } - + std::string get_tzdata_path() { @@ -98,7 +98,7 @@ bool writeFile(const std::string &tzdataPath, const std::string &fileName, INTERNAL_DIR + "/" + TZDATA_DIR); std::string result_path(std::string(convertCFStringRefPathToCStringPath(homePath)) + INTERNAL_DIR); - + if (access(path.c_str(), F_OK) == 0) { #if TAR_DEBUG @@ -106,34 +106,34 @@ bool writeFile(const std::string &tzdataPath, const std::string &fileName, #endif CFRelease(homeUrlRef); CFRelease(homePath); - + return result_path; } - + CFBundleRef mainBundle = CFBundleGetMainBundle(); CFArrayRef paths = CFBundleCopyResourceURLsOfType(mainBundle, CFSTR(TARGZ_EXTENSION), NULL); - + if (CFArrayGetCount(paths) != 0) { // get archive path, assume there is no other tar.gz in bundle CFURLRef archiveUrl = static_cast(CFArrayGetValueAtIndex(paths, 0)); CFStringRef archiveName = CFURLCopyPath(archiveUrl); archiveUrl = CFBundleCopyResourceURL(mainBundle, archiveName, NULL, NULL); - + extractTzdata(homeUrlRef, archiveUrl, path); - + CFRelease(archiveUrl); CFRelease(archiveName); } - + CFRelease(homeUrlRef); CFRelease(homePath); CFRelease(paths); - + return result_path; } - + std::string convertCFStringRefPathToCStringPath(CFStringRef ref) { @@ -144,55 +144,55 @@ bool writeFile(const std::string &tzdataPath, const std::string &fileName, delete[] buffer; return result; } - + bool extractTzdata(CFURLRef homeUrl, CFURLRef archiveUrl, std::string destPath) { std::string TAR_TMP_PATH = "/tmp.tar"; - + CFStringRef homeStringRef = CFURLCopyPath(homeUrl); auto homePath = convertCFStringRefPathToCStringPath(homeStringRef); CFRelease(homeStringRef); - + CFStringRef archiveStringRef = CFURLCopyPath(archiveUrl); auto archivePath = convertCFStringRefPathToCStringPath(archiveStringRef); CFRelease(archiveStringRef); - + // create Library path auto libraryPath = homePath + INTERNAL_DIR; - + // create tzdata path auto tzdataPath = libraryPath + "/" + TZDATA_DIR; - + // -- replace %20 with " " const std::string search = "%20"; const std::string replacement = " "; size_t pos = 0; - + while ((pos = archivePath.find(search, pos)) != std::string::npos) { archivePath.replace(pos, search.length(), replacement); pos += replacement.length(); } - + gzFile tarFile = gzopen(archivePath.c_str(), "rb"); - + // create tar unpacking path auto tarPath = libraryPath + TAR_TMP_PATH; - + // create tzdata directory mkdir(destPath.c_str(), S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH); - + // ======= extract tar ======== - + std::ofstream os(tarPath.c_str(), std::ofstream::out | std::ofstream::app); unsigned int bufferLength = 1024 * 256; // 256Kb unsigned char *buffer = (unsigned char *)malloc(bufferLength); bool success = true; - + while (true) { int readBytes = gzread(tarFile, buffer, bufferLength); - + if (readBytes > 0) { os.write((char *) &buffer[0], readBytes); @@ -216,21 +216,21 @@ bool writeFile(const std::string &tzdataPath, const std::string &fileName, break; } } - + os.close(); free(buffer); gzclose(tarFile); - + if (!success) { remove(tarPath.c_str()); return false; } - + // ======== extract files ========= - + uint64_t location = 0; // Position in the file - + // get file size struct stat stat_buf; int res = stat(tarPath.c_str(), &stat_buf); @@ -241,20 +241,20 @@ bool writeFile(const std::string &tzdataPath, const std::string &fileName, return false; } int64_t tarSize = stat_buf.st_size; - + // create read stream std::ifstream is(tarPath.c_str(), std::ifstream::in | std::ifstream::binary); - + // process files while (location < tarSize) { TarInfo info = getTarObjectInfo(is); - + if (!info.success || info.realContentSize == 0) { break; // something wrong or all files are read } - + switch (info.objType) { case '0': // file @@ -268,17 +268,17 @@ bool writeFile(const std::string &tzdataPath, const std::string &fileName, #endif writeFile(tzdataPath, info.objName, obj, info.realContentSize); location += info.blocksContentSize; - + break; } } } - + remove(tarPath.c_str()); - + return true; } - + TarInfo getTarObjectInfo(std::ifstream &readStream) { @@ -287,22 +287,22 @@ bool writeFile(const std::string &tzdataPath, const std::string &fileName, char type; char name[TAR_NAME_SIZE + 1]; char sizeBuf[TAR_SIZE_SIZE + 1]; - + readStream.read(buffer, length); - + memcpy(&type, &buffer[TAR_TYPE_POSITION], 1); - + memset(&name, '\0', TAR_NAME_SIZE + 1); memcpy(&name, &buffer[TAR_NAME_POSITION], TAR_NAME_SIZE); - + memset(&sizeBuf, '\0', TAR_SIZE_SIZE + 1); memcpy(&sizeBuf, &buffer[TAR_SIZE_POSITION], TAR_SIZE_SIZE); size_t realSize = strtol(sizeBuf, NULL, 8); size_t blocksSize = realSize + (TAR_BLOCK_SIZE - (realSize % TAR_BLOCK_SIZE)); - + return {type, std::string(name), realSize, blocksSize, true}; } - + std::string getTarObject(std::ifstream &readStream, int64_t size) { @@ -310,29 +310,29 @@ bool writeFile(const std::string &tzdataPath, const std::string &fileName, readStream.read(buffer, size); return std::string(buffer); } - + bool writeFile(const std::string &tzdataPath, const std::string &fileName, const std::string &data, size_t realContentSize) { std::ofstream os(tzdataPath + "/" + fileName, std::ofstream::out | std::ofstream::binary); - + if (!os) { return false; } - + // trim empty space char trimmedData[realContentSize + 1]; memset(&trimmedData, '\0', realContentSize); memcpy(&trimmedData, data.c_str(), realContentSize); - + // write os.write(trimmedData, realContentSize); os.close(); - + return true; } - + } // namespace iOSUtils } // namespace date } // namespace arrow_vendored diff --git a/cpp/src/arrow/vendored/datetime/tz.cpp b/cpp/src/arrow/vendored/datetime/tz.cpp index 9efc01d8d7c..9047a31c79a 100644 --- a/cpp/src/arrow/vendored/datetime/tz.cpp +++ b/cpp/src/arrow/vendored/datetime/tz.cpp @@ -33,6 +33,7 @@ // NOTE(ARROW): This is required so that symbols are properly exported from the DLL #include "visibility.h" + #ifdef _WIN32 // windows.h will be included directly and indirectly (e.g. by curl). // We need to define these macros to prevent windows.h bringing in @@ -144,7 +145,7 @@ # endif // HAS_REMOTE_API #else // !_WIN32 # include -# if !USE_OS_TZDB +# if !USE_OS_TZDB && !defined(INSTALL) # include # endif # include @@ -425,20 +426,20 @@ tzdb_list::~tzdb_list() } } -tzdb_list::tzdb_list(tzdb_list&& x) noexcept +tzdb_list::tzdb_list(tzdb_list&& x) NOEXCEPT : head_{x.head_.exchange(nullptr)} { } void -tzdb_list::push_front(tzdb* tzdb) noexcept +tzdb_list::push_front(tzdb* tzdb) NOEXCEPT { tzdb->next = head_; head_ = tzdb; } tzdb_list::const_iterator -tzdb_list::erase_after(const_iterator p) noexcept +tzdb_list::erase_after(const_iterator p) NOEXCEPT { auto t = p.p_->next; p.p_->next = p.p_->next->next; @@ -448,7 +449,7 @@ tzdb_list::erase_after(const_iterator p) noexcept struct tzdb_list::undocumented_helper { - static void push_front(tzdb_list& db_list, tzdb* tzdb) noexcept + static void push_front(tzdb_list& db_list, tzdb* tzdb) NOEXCEPT { db_list.push_front(tzdb); } @@ -470,6 +471,32 @@ get_tzdb_list() return tz_db; } +static +std::string +parse3(std::istream& in) +{ + std::string r(3, ' '); + ws(in); + r[0] = static_cast(in.get()); + r[1] = static_cast(in.get()); + r[2] = static_cast(in.get()); + return r; +} + +static +unsigned +parse_month(std::istream& in) +{ + CONSTDATA char*const month_names[] = + {"Jan", "Feb", "Mar", "Apr", "May", "Jun", + "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"}; + auto s = parse3(in); + auto m = std::find(std::begin(month_names), std::end(month_names), s) - month_names; + if (m >= std::end(month_names) - std::begin(month_names)) + throw std::runtime_error("oops: bad month name: " + s); + return static_cast(++m); +} + #if !USE_OS_TZDB #ifdef _WIN32 @@ -684,18 +711,6 @@ load_timezone_mappings_from_xml_file(const std::string& input_path) // Parsing helpers -static -std::string -parse3(std::istream& in) -{ - std::string r(3, ' '); - ws(in); - r[0] = static_cast(in.get()); - r[1] = static_cast(in.get()); - r[2] = static_cast(in.get()); - return r; -} - static unsigned parse_dow(std::istream& in) @@ -709,20 +724,6 @@ parse_dow(std::istream& in) return static_cast(dow); } -static -unsigned -parse_month(std::istream& in) -{ - CONSTDATA char*const month_names[] = - {"Jan", "Feb", "Mar", "Apr", "May", "Jun", - "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"}; - auto s = parse3(in); - auto m = std::find(std::begin(month_names), std::end(month_names), s) - month_names; - if (m >= std::end(month_names) - std::begin(month_names)) - throw std::runtime_error("oops: bad month name: " + s); - return static_cast(++m); -} - static std::chrono::seconds parse_unsigned_time(std::istream& in) @@ -2130,14 +2131,25 @@ time_zone::load_sys_info(std::vector::const_iterator i) cons { using namespace std::chrono; assert(!transitions_.empty()); - assert(i != transitions_.begin()); sys_info r; - r.begin = i[-1].timepoint; - r.end = i != transitions_.end() ? i->timepoint : - sys_seconds(sys_days(year::max()/max_day)); - r.offset = i[-1].info->offset; - r.save = i[-1].info->is_dst ? minutes{1} : minutes{0}; - r.abbrev = i[-1].info->abbrev; + if (i != transitions_.begin()) + { + r.begin = i[-1].timepoint; + r.end = i != transitions_.end() ? i->timepoint : + sys_seconds(sys_days(year::max()/max_day)); + r.offset = i[-1].info->offset; + r.save = i[-1].info->is_dst ? minutes{1} : minutes{0}; + r.abbrev = i[-1].info->abbrev; + } + else + { + r.begin = sys_days(year::min()/min_day); + r.end = i+1 != transitions_.end() ? i[1].timepoint : + sys_seconds(sys_days(year::max()/max_day)); + r.offset = i[0].info->offset; + r.save = i[0].info->is_dst ? minutes{1} : minutes{0}; + r.abbrev = i[0].info->abbrev; + } return r; } @@ -2158,7 +2170,7 @@ time_zone::get_info_impl(local_seconds tp) const { using namespace std::chrono; init(); - local_info i; + local_info i{}; i.result = local_info::unique; auto tr = upper_bound(transitions_.begin(), transitions_.end(), tp, [](const local_seconds& x, const transition& t) @@ -2172,7 +2184,7 @@ time_zone::get_info_impl(local_seconds tp) const { i.second = load_sys_info(--tr); tps = sys_seconds{(tp - i.second.offset).time_since_epoch()}; - if (tps < i.second.end) + if (tps < i.second.end && i.first.end != i.second.end) { i.result = local_info::ambiguous; std::swap(i.first, i.second); @@ -2215,15 +2227,11 @@ operator<<(std::ostream& os, const time_zone& z) return os; } -#if !MISSING_LEAP_SECONDS - leap_second::leap_second(const sys_seconds& s, detail::undocumented) : date_(s) { } -#endif // !MISSING_LEAP_SECONDS - #else // !USE_OS_TZDB time_zone::time_zone(const std::string& s, detail::undocumented) @@ -2620,8 +2628,6 @@ operator<<(std::ostream& os, const time_zone& z) #endif // !USE_OS_TZDB -#if !MISSING_LEAP_SECONDS - std::ostream& operator<<(std::ostream& os, const leap_second& x) { @@ -2629,11 +2635,8 @@ operator<<(std::ostream& os, const leap_second& x) return os << x.date_ << " +"; } -#endif // !MISSING_LEAP_SECONDS - #if USE_OS_TZDB -# ifdef __APPLE__ static std::string get_version() @@ -2642,12 +2645,99 @@ get_version() auto path = get_tz_dir() + string("/+VERSION"); ifstream in{path}; string version; - in >> version; - if (in.fail()) - throw std::runtime_error("Unable to get Timezone database version from " + path); - return version; + if (in) + { + in >> version; + return version; + } + in.clear(); + in.open(get_tz_dir() + std::string(1, folder_delimiter) + "version"); + if (in) + { + in >> version; + return version; + } + return "unknown"; +} + +static +std::vector +find_read_and_leap_seconds() +{ + std::ifstream in(get_tz_dir() + std::string(1, folder_delimiter) + "leapseconds", + std::ios_base::binary); + if (in) + { + std::vector leap_seconds; + std::string line; + while (in) + { + std::getline(in, line); + if (!line.empty() && line[0] != '#') + { + std::istringstream in(line); + in.exceptions(std::ios::failbit | std::ios::badbit); + std::string word; + in >> word; + if (word == "Leap") + { + int y, m, d; + in >> y; + m = static_cast(parse_month(in)); + in >> d; + leap_seconds.push_back(leap_second(sys_days{year{y}/m/d} + days{1}, + detail::undocumented{})); + } + else + { + std::cerr << line << '\n'; + } + } + } + return leap_seconds; + } + in.clear(); + in.open(get_tz_dir() + std::string(1, folder_delimiter) + "leap-seconds.list", + std::ios_base::binary); + if (in) + { + std::vector leap_seconds; + std::string line; + const auto offset = sys_days{1970_y/1/1}-sys_days{1900_y/1/1}; + while (in) + { + std::getline(in, line); + if (!line.empty() && line[0] != '#') + { + std::istringstream in(line); + in.exceptions(std::ios::failbit | std::ios::badbit); + using seconds = std::chrono::seconds; + seconds::rep s; + in >> s; + if (s == 2272060800) + continue; + leap_seconds.push_back(leap_second(sys_seconds{seconds{s}} - offset, + detail::undocumented{})); + } + } + return leap_seconds; + } + in.clear(); + in.open(get_tz_dir() + std::string(1, folder_delimiter) + "right/UTC", + std::ios_base::binary); + if (in) + { + return load_just_leaps(in); + } + in.clear(); + in.open(get_tz_dir() + std::string(1, folder_delimiter) + "UTC", + std::ios_base::binary); + if (in) + { + return load_just_leaps(in); + } + return {}; } -# endif static std::unique_ptr @@ -2676,6 +2766,7 @@ init_tzdb() strcmp(d->d_name, "iso3166.tab") == 0 || strcmp(d->d_name, "right") == 0 || strcmp(d->d_name, "+VERSION") == 0 || + strcmp(d->d_name, "version") == 0 || strcmp(d->d_name, "zone.tab") == 0 || strcmp(d->d_name, "zone1970.tab") == 0 || strcmp(d->d_name, "tzdata.zi") == 0 || @@ -2703,28 +2794,8 @@ init_tzdb() } db->zones.shrink_to_fit(); std::sort(db->zones.begin(), db->zones.end()); -# if !MISSING_LEAP_SECONDS - std::ifstream in(get_tz_dir() + std::string(1, folder_delimiter) + "right/UTC", - std::ios_base::binary); - if (in) - { - in.exceptions(std::ios::failbit | std::ios::badbit); - db->leap_seconds = load_just_leaps(in); - } - else - { - in.clear(); - in.open(get_tz_dir() + std::string(1, folder_delimiter) + - "UTC", std::ios_base::binary); - if (!in) - throw std::runtime_error("Unable to extract leap second information"); - in.exceptions(std::ios::failbit | std::ios::badbit); - db->leap_seconds = load_just_leaps(in); - } -# endif // !MISSING_LEAP_SECONDS -# ifdef __APPLE__ + db->leap_seconds = find_read_and_leap_seconds(); db->version = get_version(); -# endif return db; } @@ -2781,17 +2852,23 @@ file_exists(const std::string& filename) // CURL tools -static -int -curl_global() +namespace { - if (::curl_global_init(CURL_GLOBAL_DEFAULT) != 0) - throw std::runtime_error("CURL global initialization failed"); - return 0; -} -namespace +struct curl_global_init_and_cleanup { + ~curl_global_init_and_cleanup() + { + ::curl_global_cleanup(); + } + curl_global_init_and_cleanup() + { + if (::curl_global_init(CURL_GLOBAL_DEFAULT) != 0) + throw std::runtime_error("CURL global initialization failed"); + } + curl_global_init_and_cleanup(curl_global_init_and_cleanup const&) = delete; + curl_global_init_and_cleanup& operator=(curl_global_init_and_cleanup const&) = delete; +}; struct curl_deleter { @@ -2807,8 +2884,7 @@ static std::unique_ptr curl_init() { - static const auto curl_is_now_initiailized = curl_global(); - (void)curl_is_now_initiailized; + static const curl_global_init_and_cleanup _{}; return std::unique_ptr{::curl_easy_init()}; } @@ -3299,8 +3375,8 @@ remote_download(const std::string& version, char* error_buffer) { auto mapping_file = get_download_mapping_file(version); result = download_to_file( - "https://raw.githubusercontent.com/unicode-org/cldr/master/" - "common/supplemental/windowsZones.xml", + "https://raw.githubusercontent.com/unicode-org/cldr/master/" + "common/supplemental/windowsZones.xml", mapping_file, download_file_options::text, error_buffer); } # endif // _WIN32 @@ -3574,11 +3650,9 @@ operator<<(std::ostream& os, const tzdb& db) os << "Version: " << db.version << "\n\n"; for (const auto& x : db.zones) os << x << '\n'; -#if !MISSING_LEAP_SECONDS os << '\n'; for (const auto& x : db.leap_seconds) os << x << '\n'; -#endif // !MISSING_LEAP_SECONDS return os; } diff --git a/cpp/src/arrow/vendored/datetime/tz.h b/cpp/src/arrow/vendored/datetime/tz.h index 23c6742143c..6d54e49ea22 100644 --- a/cpp/src/arrow/vendored/datetime/tz.h +++ b/cpp/src/arrow/vendored/datetime/tz.h @@ -44,8 +44,7 @@ // Technically any OS may use the mapping process but currently only Windows does use it. // NOTE(ARROW): If this is not set, then the library will attempt to -// use libcurl to obtain a timezone database, and we do not yet have -// curl in our build toolchain +// use libcurl to obtain a timezone database, and we probably do not want this. #ifndef _WIN32 #define USE_OS_TZDB 1 #endif @@ -93,15 +92,6 @@ static_assert(HAS_REMOTE_API == 0 ? AUTO_DOWNLOAD == 0 : true, # ifdef _WIN32 # error "USE_OS_TZDB can not be used on Windows" # endif -# ifndef MISSING_LEAP_SECONDS -# ifdef __APPLE__ -# define MISSING_LEAP_SECONDS 1 -# else -# define MISSING_LEAP_SECONDS 0 -# endif -# endif -#else -# define MISSING_LEAP_SECONDS 0 #endif #ifndef HAS_DEDUCTION_GUIDES @@ -712,6 +702,11 @@ class zoned_time private: template friend class zoned_time; + + template + static + TimeZonePtr2&& + check(TimeZonePtr2&& p); }; using zoned_seconds = zoned_time; @@ -999,8 +994,6 @@ inline bool operator>=(const time_zone_link& x, const time_zone_link& y) {return #endif // !USE_OS_TZDB -#if !MISSING_LEAP_SECONDS - class leap_second { private: @@ -1124,8 +1117,6 @@ operator>=(const sys_time& x, const leap_second& y) using leap = leap_second; -#endif // !MISSING_LEAP_SECONDS - #ifdef _WIN32 namespace detail @@ -1171,9 +1162,7 @@ struct tzdb #if !USE_OS_TZDB std::vector links; #endif -#if !MISSING_LEAP_SECONDS std::vector leap_seconds; -#endif #if !USE_OS_TZDB std::vector rules; #endif @@ -1230,31 +1219,31 @@ class tzdb_list public: ~tzdb_list(); tzdb_list() = default; - tzdb_list(tzdb_list&& x) noexcept; + tzdb_list(tzdb_list&& x) NOEXCEPT; - const tzdb& front() const noexcept {return *head_;} - tzdb& front() noexcept {return *head_;} + const tzdb& front() const NOEXCEPT {return *head_;} + tzdb& front() NOEXCEPT {return *head_;} class const_iterator; - const_iterator begin() const noexcept; - const_iterator end() const noexcept; + const_iterator begin() const NOEXCEPT; + const_iterator end() const NOEXCEPT; - const_iterator cbegin() const noexcept; - const_iterator cend() const noexcept; + const_iterator cbegin() const NOEXCEPT; + const_iterator cend() const NOEXCEPT; - const_iterator erase_after(const_iterator p) noexcept; + const_iterator erase_after(const_iterator p) NOEXCEPT; struct undocumented_helper; private: - void push_front(tzdb* tzdb) noexcept; + void push_front(tzdb* tzdb) NOEXCEPT; }; class tzdb_list::const_iterator { tzdb* p_ = nullptr; - explicit const_iterator(tzdb* p) noexcept : p_{p} {} + explicit const_iterator(tzdb* p) NOEXCEPT : p_{p} {} public: const_iterator() = default; @@ -1264,20 +1253,20 @@ class tzdb_list::const_iterator using pointer = const value_type*; using difference_type = std::ptrdiff_t; - reference operator*() const noexcept {return *p_;} - pointer operator->() const noexcept {return p_;} + reference operator*() const NOEXCEPT {return *p_;} + pointer operator->() const NOEXCEPT {return p_;} - const_iterator& operator++() noexcept {p_ = p_->next; return *this;} - const_iterator operator++(int) noexcept {auto t = *this; ++(*this); return t;} + const_iterator& operator++() NOEXCEPT {p_ = p_->next; return *this;} + const_iterator operator++(int) NOEXCEPT {auto t = *this; ++(*this); return t;} friend bool - operator==(const const_iterator& x, const const_iterator& y) noexcept + operator==(const const_iterator& x, const const_iterator& y) NOEXCEPT {return x.p_ == y.p_;} friend bool - operator!=(const const_iterator& x, const const_iterator& y) noexcept + operator!=(const const_iterator& x, const const_iterator& y) NOEXCEPT {return !(x == y);} friend class tzdb_list; @@ -1285,28 +1274,28 @@ class tzdb_list::const_iterator inline tzdb_list::const_iterator -tzdb_list::begin() const noexcept +tzdb_list::begin() const NOEXCEPT { return const_iterator{head_}; } inline tzdb_list::const_iterator -tzdb_list::end() const noexcept +tzdb_list::end() const NOEXCEPT { return const_iterator{nullptr}; } inline tzdb_list::const_iterator -tzdb_list::cbegin() const noexcept +tzdb_list::cbegin() const NOEXCEPT { return begin(); } inline tzdb_list::const_iterator -tzdb_list::cend() const noexcept +tzdb_list::cend() const NOEXCEPT { return end(); } @@ -1337,7 +1326,7 @@ namespace detail template inline T* -to_raw_pointer(T* p) noexcept +to_raw_pointer(T* p) NOEXCEPT { return p; } @@ -1345,7 +1334,7 @@ to_raw_pointer(T* p) noexcept template inline auto -to_raw_pointer(Pointer p) noexcept +to_raw_pointer(Pointer p) NOEXCEPT -> decltype(detail::to_raw_pointer(p.operator->())) { return detail::to_raw_pointer(p.operator->()); @@ -1353,13 +1342,25 @@ to_raw_pointer(Pointer p) noexcept } // namespace detail +template +template +inline +TimeZonePtr2&& +zoned_time::check(TimeZonePtr2&& p) +{ + if (detail::to_raw_pointer(p) == nullptr) + throw std::runtime_error( + "zoned_time constructed with a time zone pointer == nullptr"); + return std::forward(p); +} + template #if !defined(_MSC_VER) || (_MSC_VER > 1916) template #endif inline zoned_time::zoned_time() - : zone_(zoned_traits::default_zone()) + : zone_(check(zoned_traits::default_zone())) {} template @@ -1368,15 +1369,15 @@ template #endif inline zoned_time::zoned_time(const sys_time& st) - : zone_(zoned_traits::default_zone()) + : zone_(check(zoned_traits::default_zone())) , tp_(st) {} template inline zoned_time::zoned_time(TimeZonePtr z) - : zone_(std::move(z)) - {assert(detail::to_raw_pointer(zone_) != nullptr);} + : zone_(check(std::move(z))) + {} #if HAS_STRING_VIEW @@ -1411,7 +1412,7 @@ zoned_time::zoned_time(const zoned_time inline zoned_time::zoned_time(TimeZonePtr z, const sys_time& st) - : zone_(std::move(z)) + : zone_(check(std::move(z))) , tp_(st) {} @@ -1421,7 +1422,7 @@ template #endif inline zoned_time::zoned_time(TimeZonePtr z, const local_time& t) - : zone_(std::move(z)) + : zone_(check(std::move(z))) , tp_(zone_->to_sys(t)) {} @@ -1432,7 +1433,7 @@ template inline zoned_time::zoned_time(TimeZonePtr z, const local_time& t, choose c) - : zone_(std::move(z)) + : zone_(check(std::move(z))) , tp_(zone_->to_sys(t, c)) {} @@ -1441,7 +1442,7 @@ template inline zoned_time::zoned_time(TimeZonePtr z, const zoned_time& zt) - : zone_(std::move(z)) + : zone_(check(std::move(z))) , tp_(zt.tp_) {} @@ -1856,8 +1857,6 @@ operator<<(std::basic_ostream& os, const zoned_time& t) return gps_clock::from_utc(tai_clock::to_utc(t)); } -#endif // !MISSING_LEAP_SECONDS - } // namespace date } // namespace arrow_vendored diff --git a/cpp/src/arrow/vendored/datetime/tz_private.h b/cpp/src/arrow/vendored/datetime/tz_private.h index 282842e7441..6b7a91493e1 100644 --- a/cpp/src/arrow/vendored/datetime/tz_private.h +++ b/cpp/src/arrow/vendored/datetime/tz_private.h @@ -97,9 +97,9 @@ class MonthDayTime U& operator=(const pair& x); } u; - std::chrono::hours h_{}; - std::chrono::minutes m_{}; - std::chrono::seconds s_{}; + std::chrono::hours h_{0}; + std::chrono::minutes m_{0}; + std::chrono::seconds s_{0}; tz zone_{tz::local}; public: @@ -247,7 +247,7 @@ struct zonelet sys_seconds until_utc_; local_seconds until_std_; local_seconds until_loc_; - std::chrono::minutes initial_save_{}; + std::chrono::minutes initial_save_{0}; std::string initial_abbrev_; std::pair first_rule_{nullptr, date::year::min()}; std::pair last_rule_{nullptr, date::year::max()};