From c31c36e121b63e0c796d953713983037746e4e81 Mon Sep 17 00:00:00 2001 From: Rok Date: Wed, 9 Feb 2022 03:55:40 +0100 Subject: [PATCH 1/3] Added Epiyear --- cpp/src/arrow/compute/api_scalar.cc | 1 + cpp/src/arrow/compute/api_scalar.h | 14 +++++++ .../compute/kernels/scalar_temporal_test.cc | 16 +++++++ .../compute/kernels/scalar_temporal_unary.cc | 42 +++++++++++++++++++ docs/source/cpp/compute.rst | 2 + docs/source/python/api/compute.rst | 1 + 6 files changed, 76 insertions(+) diff --git a/cpp/src/arrow/compute/api_scalar.cc b/cpp/src/arrow/compute/api_scalar.cc index cead4ec4f61..36277ae2264 100644 --- a/cpp/src/arrow/compute/api_scalar.cc +++ b/cpp/src/arrow/compute/api_scalar.cc @@ -774,6 +774,7 @@ SCALAR_EAGER_UNARY(DayOfYear, "day_of_year") SCALAR_EAGER_UNARY(Hour, "hour") SCALAR_EAGER_UNARY(YearMonthDay, "year_month_day") SCALAR_EAGER_UNARY(IsDaylightSavings, "is_dst") +SCALAR_EAGER_UNARY(Epiyear, "epiyear") SCALAR_EAGER_UNARY(ISOCalendar, "iso_calendar") SCALAR_EAGER_UNARY(ISOWeek, "iso_week") SCALAR_EAGER_UNARY(ISOYear, "iso_year") diff --git a/cpp/src/arrow/compute/api_scalar.h b/cpp/src/arrow/compute/api_scalar.h index c13b45b2798..6272c3086a3 100644 --- a/cpp/src/arrow/compute/api_scalar.h +++ b/cpp/src/arrow/compute/api_scalar.h @@ -1199,6 +1199,20 @@ ARROW_EXPORT Result DayOfYear(const Datum& values, ExecContext* ctx = NUL ARROW_EXPORT Result ISOYear(const Datum& values, ExecContext* ctx = NULLPTR); +/// \brief Epiyear returns epidemiological year number for each element of `values`. +/// First week of an epidemiological year has the majority (4 or more) of it's +/// days in January. Last week of an epidemiological year has the year's last +/// Wednesday in it. Epidemiological week starts on Sunday. +/// +/// \param[in] values input to extract epidemiological year from +/// \param[in] ctx the function execution context, optional +/// \return the resulting datum +/// +/// \since 8.0.0 +/// \note API not yet finalized +ARROW_EXPORT +Result Epiyear(const Datum& values, ExecContext* ctx = NULLPTR); + /// \brief ISOWeek returns ISO week of year number for each element of `values`. /// First ISO week has the majority (4 or more) of its days in January. /// ISO week starts on Monday. Year can have 52 or 53 weeks. diff --git a/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc b/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc index 0ab74453610..912d5d8dd61 100644 --- a/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc +++ b/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc @@ -166,6 +166,9 @@ class ScalarTemporalTest : public ::testing::Test { std::string day_of_week = "[3, 1, 6, 2, 2, 1, 0, 3, 4, 6, 0, 6, 5, 6, 0, 6, null]"; std::string day_of_year = "[1, 60, 1, 138, 1, 365, 364, 365, 1, 3, 4, 1, 365, 363, 364, 1, null]"; + std::string epiyear = + "[1969, 2000, 1899, 2033, 2020, 2020, 2020, 2009, 2009, 2010, 2010, 2006, 2005, " + "2008, 2008, 2012, null]"; std::string iso_year = "[1970, 2000, 1898, 2033, 2020, 2020, 2020, 2009, 2009, 2009, 2010, 2005, " "2005, 2008, 2009, 2011, null]"; @@ -411,6 +414,7 @@ TEST_F(ScalarTemporalTest, TestTemporalComponentExtractionAllTemporalTypes) { CheckScalarUnary("year_month_day", ArrayFromJSON(unit, sample), year_month_day); CheckScalarUnary("day_of_week", unit, sample, int64(), day_of_week); CheckScalarUnary("day_of_year", unit, sample, int64(), day_of_year); + CheckScalarUnary("epiyear", unit, sample, int64(), epiyear); CheckScalarUnary("iso_year", unit, sample, int64(), iso_year); CheckScalarUnary("iso_week", unit, sample, int64(), iso_week); CheckScalarUnary("us_week", unit, sample, int64(), us_week); @@ -477,6 +481,7 @@ TEST_F(ScalarTemporalTest, TestTemporalComponentExtractionWithDifferentUnits) { CheckScalarUnary("day_of_year", unit, times_seconds_precision, int64(), day_of_year); ASSERT_RAISES(Invalid, IsDaylightSavings(ArrayFromJSON(unit, times_seconds_precision))); + CheckScalarUnary("epiyear", unit, times_seconds_precision, int64(), epiyear); CheckScalarUnary("iso_year", unit, times_seconds_precision, int64(), iso_year); CheckScalarUnary("iso_week", unit, times_seconds_precision, int64(), iso_week); CheckScalarUnary("us_week", unit, times_seconds_precision, int64(), us_week); @@ -505,6 +510,7 @@ TEST_F(ScalarTemporalTest, TestOutsideNanosecondRange) { {"year": 2262, "month": 4, "day": 13}])"); auto day_of_week = "[0, 6]"; auto day_of_year = "[263, 103]"; + auto epiyear = "[1677, 2262]"; auto iso_year = "[1677, 2262]"; auto iso_week = "[38, 15]"; auto us_week = "[38, 16]"; @@ -528,6 +534,7 @@ TEST_F(ScalarTemporalTest, TestOutsideNanosecondRange) { CheckScalarUnary("year_month_day", ArrayFromJSON(unit, times), year_month_day); CheckScalarUnary("day_of_week", unit, times, int64(), day_of_week); CheckScalarUnary("day_of_year", unit, times, int64(), day_of_year); + CheckScalarUnary("epiyear", unit, times, int64(), epiyear); CheckScalarUnary("iso_year", unit, times, int64(), iso_year); CheckScalarUnary("iso_week", unit, times, int64(), iso_week); CheckScalarUnary("us_week", unit, times, int64(), us_week); @@ -575,6 +582,9 @@ TEST_F(ScalarTemporalTest, TestZoned1) { std::string is_dst = "[false, false, false, false, false, false, false, false, false, false, false, " "false, false, false, false, false, null]"; + auto epiyear = + "[1969, 2000, 1898, 2033, 2020, 2020, 2020, 2009, 2009, 2009, 2010, 2005, 2005, " + "2008, 2008, 2011, null]"; auto iso_year = "[1970, 2000, 1898, 2033, 2020, 2020, 2019, 2009, 2009, 2009, 2009, 2005, 2005, " "2008, 2008, 2011, null]"; @@ -610,6 +620,7 @@ TEST_F(ScalarTemporalTest, TestZoned1) { CheckScalarUnary("day_of_week", unit, times, int64(), day_of_week); CheckScalarUnary("day_of_year", unit, times, int64(), day_of_year); CheckScalarUnary("is_dst", unit, times, boolean(), is_dst); + CheckScalarUnary("epiyear", unit, times, int64(), epiyear); CheckScalarUnary("iso_year", unit, times, int64(), iso_year); CheckScalarUnary("iso_week", unit, times, int64(), iso_week); CheckScalarUnary("us_week", unit, times, int64(), us_week); @@ -653,6 +664,9 @@ TEST_F(ScalarTemporalTest, TestZoned2) { std::string is_dst = "[false, true, false, false, true, true, true, true, true, true, true, true, " "true, true, true, true, null]"; + auto epiyear = + "[1969, 2000, 1899, 2033, 2020, 2020, 2020, 2009, 2009, 2010, 2010, 2006, 2005, " + "2008, 2008, 2012, null]"; auto iso_year = "[1970, 2000, 1898, 2033, 2020, 2020, 2020, 2009, 2009, 2009, 2010, 2005, 2005, " "2008, 2009, 2011, null]"; @@ -689,6 +703,7 @@ TEST_F(ScalarTemporalTest, TestZoned2) { CheckScalarUnary("day_of_week", unit, times_seconds_precision, int64(), day_of_week); CheckScalarUnary("day_of_year", unit, times_seconds_precision, int64(), day_of_year); CheckScalarUnary("is_dst", unit, times_seconds_precision, boolean(), is_dst); + CheckScalarUnary("epiyear", unit, times_seconds_precision, int64(), epiyear); CheckScalarUnary("iso_year", unit, times_seconds_precision, int64(), iso_year); CheckScalarUnary("iso_week", unit, times_seconds_precision, int64(), iso_week); CheckScalarUnary("us_week", unit, times_seconds_precision, int64(), us_week); @@ -721,6 +736,7 @@ TEST_F(ScalarTemporalTest, TestNonexistentTimezone) { ASSERT_RAISES(Invalid, DayOfWeek(timestamp_array)); ASSERT_RAISES(Invalid, DayOfYear(timestamp_array)); ASSERT_RAISES(Invalid, IsDaylightSavings(timestamp_array)); + ASSERT_RAISES(Invalid, Epiyear(timestamp_array)); ASSERT_RAISES(Invalid, ISOYear(timestamp_array)); ASSERT_RAISES(Invalid, Week(timestamp_array)); ASSERT_RAISES(Invalid, ISOCalendar(timestamp_array)); diff --git a/cpp/src/arrow/compute/kernels/scalar_temporal_unary.cc b/cpp/src/arrow/compute/kernels/scalar_temporal_unary.cc index 8bf82cc037a..bf82f7f1cff 100644 --- a/cpp/src/arrow/compute/kernels/scalar_temporal_unary.cc +++ b/cpp/src/arrow/compute/kernels/scalar_temporal_unary.cc @@ -429,6 +429,32 @@ struct ISOYear { Localizer localizer_; }; +// ---------------------------------------------------------------------- +// Extract epidemiological year values from temporal types +// +// First week of an epidemiological year has the majority (4 or more) of it's +// days in January. Last week of an epidemiological year has the year's last +// Wednesday in it. Epidemiological week starts on Sunday. + +template +struct Epiyear { + explicit Epiyear(const FunctionOptions* options, Localizer&& localizer) + : localizer_(std::move(localizer)) {} + + template + T Call(KernelContext*, Arg0 arg, Status*) const { + const auto t = floor(localizer_.template ConvertTimePoint(arg)); + auto y = year_month_day{t + days{3}}.year(); + auto start = localizer_.ConvertDays((y - years{1}) / dec / wed[last]) + (mon - thu); + if (t < start) { + --y; + } + return static_cast(static_cast(y)); + } + + Localizer localizer_; +}; + // ---------------------------------------------------------------------- // Extract week from temporal types // @@ -1351,6 +1377,16 @@ const FunctionDoc iso_year_doc{ "cannot be found in the timezone database."), {"values"}}; +const FunctionDoc epiyear_doc{ + "Extract epidemiological year number", + ("First week of an epidemiological year has the majority (4 or more) of it's\n" + "days in January. Last week of an epidemiological year has the year's last\n" + "Wednesday in it. Epidemiological week starts on Sunday." + "Null values emit null.\n" + "An error is returned if the values have a defined timezone but it\n" + "cannot be found in the timezone database."), + {"values"}}; + const FunctionDoc iso_week_doc{ "Extract ISO week of year number", ("First ISO week has the majority (4 or more) of its days in January.\n" @@ -1557,6 +1593,12 @@ void RegisterScalarTemporalUnary(FunctionRegistry* registry) { &iso_year_doc); DCHECK_OK(registry->AddFunction(std::move(iso_year))); + auto epiyear = + UnaryTemporalFactory::Make("epiyear", int64(), + &epiyear_doc); + DCHECK_OK(registry->AddFunction(std::move(epiyear))); + static const auto default_iso_week_options = WeekOptions::ISODefaults(); auto iso_week = UnaryTemporalFactory::Make< diff --git a/docs/source/cpp/compute.rst b/docs/source/cpp/compute.rst index 535d31b26c2..ea87da443d5 100644 --- a/docs/source/cpp/compute.rst +++ b/docs/source/cpp/compute.rst @@ -1380,6 +1380,8 @@ For timestamps inputs with non-empty timezone, localized timestamp components wi +--------------------+------------+-------------------+---------------+----------------------------+-------+ | day_of_year | Unary | Temporal | Int64 | | | +--------------------+------------+-------------------+---------------+----------------------------+-------+ +| epiyear | Unary | Temporal | Int64 | | | ++--------------------+------------+-------------------+---------------+----------------------------+-------+ | hour | Unary | Timestamp, Time | Int64 | | | +--------------------+------------+-------------------+---------------+----------------------------+-------+ | is_dst | Unary | Timestamp | Boolean | | | diff --git a/docs/source/python/api/compute.rst b/docs/source/python/api/compute.rst index b6ba414c5bb..9aefd2ee4c0 100644 --- a/docs/source/python/api/compute.rst +++ b/docs/source/python/api/compute.rst @@ -391,6 +391,7 @@ Temporal Component Extraction day day_of_week day_of_year + epiyear hour iso_week iso_year From a5ec85760dfb4e36e4ce65897c63cdd55920f08d Mon Sep 17 00:00:00 2001 From: Rok Date: Wed, 9 Feb 2022 22:36:04 +0100 Subject: [PATCH 2/3] Epiyear -> USEpiyear --- cpp/src/arrow/compute/api_scalar.cc | 2 +- cpp/src/arrow/compute/api_scalar.h | 13 ++++---- .../compute/kernels/scalar_temporal_test.cc | 20 +++++------ .../compute/kernels/scalar_temporal_unary.cc | 33 ++++++++++--------- docs/source/cpp/compute.rst | 2 +- docs/source/python/api/compute.rst | 2 +- 6 files changed, 37 insertions(+), 35 deletions(-) diff --git a/cpp/src/arrow/compute/api_scalar.cc b/cpp/src/arrow/compute/api_scalar.cc index 36277ae2264..4c15218eec2 100644 --- a/cpp/src/arrow/compute/api_scalar.cc +++ b/cpp/src/arrow/compute/api_scalar.cc @@ -774,7 +774,6 @@ SCALAR_EAGER_UNARY(DayOfYear, "day_of_year") SCALAR_EAGER_UNARY(Hour, "hour") SCALAR_EAGER_UNARY(YearMonthDay, "year_month_day") SCALAR_EAGER_UNARY(IsDaylightSavings, "is_dst") -SCALAR_EAGER_UNARY(Epiyear, "epiyear") SCALAR_EAGER_UNARY(ISOCalendar, "iso_calendar") SCALAR_EAGER_UNARY(ISOWeek, "iso_week") SCALAR_EAGER_UNARY(ISOYear, "iso_year") @@ -786,6 +785,7 @@ SCALAR_EAGER_UNARY(Nanosecond, "nanosecond") SCALAR_EAGER_UNARY(Quarter, "quarter") SCALAR_EAGER_UNARY(Second, "second") SCALAR_EAGER_UNARY(Subsecond, "subsecond") +SCALAR_EAGER_UNARY(USEpiyear, "us_epiyear") SCALAR_EAGER_UNARY(USWeek, "us_week") SCALAR_EAGER_UNARY(Year, "year") diff --git a/cpp/src/arrow/compute/api_scalar.h b/cpp/src/arrow/compute/api_scalar.h index 6272c3086a3..4176798210b 100644 --- a/cpp/src/arrow/compute/api_scalar.h +++ b/cpp/src/arrow/compute/api_scalar.h @@ -1199,19 +1199,20 @@ ARROW_EXPORT Result DayOfYear(const Datum& values, ExecContext* ctx = NUL ARROW_EXPORT Result ISOYear(const Datum& values, ExecContext* ctx = NULLPTR); -/// \brief Epiyear returns epidemiological year number for each element of `values`. -/// First week of an epidemiological year has the majority (4 or more) of it's -/// days in January. Last week of an epidemiological year has the year's last -/// Wednesday in it. Epidemiological week starts on Sunday. +/// \brief USEpiyear returns US epidemiological year number for each element of +/// `values`. +/// First week of US epidemiological year has the majority (4 or more) of it's +/// days in January. Last week of US epidemiological year has the year's last +/// Wednesday in it. US epidemiological week starts on Sunday. /// -/// \param[in] values input to extract epidemiological year from +/// \param[in] values input to extract US epidemiological year from /// \param[in] ctx the function execution context, optional /// \return the resulting datum /// /// \since 8.0.0 /// \note API not yet finalized ARROW_EXPORT -Result Epiyear(const Datum& values, ExecContext* ctx = NULLPTR); +Result USEpiyear(const Datum& values, ExecContext* ctx = NULLPTR); /// \brief ISOWeek returns ISO week of year number for each element of `values`. /// First ISO week has the majority (4 or more) of its days in January. diff --git a/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc b/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc index 912d5d8dd61..372a3cf3e1b 100644 --- a/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc +++ b/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc @@ -166,7 +166,7 @@ class ScalarTemporalTest : public ::testing::Test { std::string day_of_week = "[3, 1, 6, 2, 2, 1, 0, 3, 4, 6, 0, 6, 5, 6, 0, 6, null]"; std::string day_of_year = "[1, 60, 1, 138, 1, 365, 364, 365, 1, 3, 4, 1, 365, 363, 364, 1, null]"; - std::string epiyear = + std::string us_epiyear = "[1969, 2000, 1899, 2033, 2020, 2020, 2020, 2009, 2009, 2010, 2010, 2006, 2005, " "2008, 2008, 2012, null]"; std::string iso_year = @@ -414,7 +414,7 @@ TEST_F(ScalarTemporalTest, TestTemporalComponentExtractionAllTemporalTypes) { CheckScalarUnary("year_month_day", ArrayFromJSON(unit, sample), year_month_day); CheckScalarUnary("day_of_week", unit, sample, int64(), day_of_week); CheckScalarUnary("day_of_year", unit, sample, int64(), day_of_year); - CheckScalarUnary("epiyear", unit, sample, int64(), epiyear); + CheckScalarUnary("us_epiyear", unit, sample, int64(), us_epiyear); CheckScalarUnary("iso_year", unit, sample, int64(), iso_year); CheckScalarUnary("iso_week", unit, sample, int64(), iso_week); CheckScalarUnary("us_week", unit, sample, int64(), us_week); @@ -481,7 +481,7 @@ TEST_F(ScalarTemporalTest, TestTemporalComponentExtractionWithDifferentUnits) { CheckScalarUnary("day_of_year", unit, times_seconds_precision, int64(), day_of_year); ASSERT_RAISES(Invalid, IsDaylightSavings(ArrayFromJSON(unit, times_seconds_precision))); - CheckScalarUnary("epiyear", unit, times_seconds_precision, int64(), epiyear); + CheckScalarUnary("us_epiyear", unit, times_seconds_precision, int64(), us_epiyear); CheckScalarUnary("iso_year", unit, times_seconds_precision, int64(), iso_year); CheckScalarUnary("iso_week", unit, times_seconds_precision, int64(), iso_week); CheckScalarUnary("us_week", unit, times_seconds_precision, int64(), us_week); @@ -510,7 +510,7 @@ TEST_F(ScalarTemporalTest, TestOutsideNanosecondRange) { {"year": 2262, "month": 4, "day": 13}])"); auto day_of_week = "[0, 6]"; auto day_of_year = "[263, 103]"; - auto epiyear = "[1677, 2262]"; + auto us_epiyear = "[1677, 2262]"; auto iso_year = "[1677, 2262]"; auto iso_week = "[38, 15]"; auto us_week = "[38, 16]"; @@ -534,7 +534,7 @@ TEST_F(ScalarTemporalTest, TestOutsideNanosecondRange) { CheckScalarUnary("year_month_day", ArrayFromJSON(unit, times), year_month_day); CheckScalarUnary("day_of_week", unit, times, int64(), day_of_week); CheckScalarUnary("day_of_year", unit, times, int64(), day_of_year); - CheckScalarUnary("epiyear", unit, times, int64(), epiyear); + CheckScalarUnary("us_epiyear", unit, times, int64(), us_epiyear); CheckScalarUnary("iso_year", unit, times, int64(), iso_year); CheckScalarUnary("iso_week", unit, times, int64(), iso_week); CheckScalarUnary("us_week", unit, times, int64(), us_week); @@ -582,7 +582,7 @@ TEST_F(ScalarTemporalTest, TestZoned1) { std::string is_dst = "[false, false, false, false, false, false, false, false, false, false, false, " "false, false, false, false, false, null]"; - auto epiyear = + auto us_epiyear = "[1969, 2000, 1898, 2033, 2020, 2020, 2020, 2009, 2009, 2009, 2010, 2005, 2005, " "2008, 2008, 2011, null]"; auto iso_year = @@ -620,7 +620,7 @@ TEST_F(ScalarTemporalTest, TestZoned1) { CheckScalarUnary("day_of_week", unit, times, int64(), day_of_week); CheckScalarUnary("day_of_year", unit, times, int64(), day_of_year); CheckScalarUnary("is_dst", unit, times, boolean(), is_dst); - CheckScalarUnary("epiyear", unit, times, int64(), epiyear); + CheckScalarUnary("us_epiyear", unit, times, int64(), us_epiyear); CheckScalarUnary("iso_year", unit, times, int64(), iso_year); CheckScalarUnary("iso_week", unit, times, int64(), iso_week); CheckScalarUnary("us_week", unit, times, int64(), us_week); @@ -664,7 +664,7 @@ TEST_F(ScalarTemporalTest, TestZoned2) { std::string is_dst = "[false, true, false, false, true, true, true, true, true, true, true, true, " "true, true, true, true, null]"; - auto epiyear = + auto us_epiyear = "[1969, 2000, 1899, 2033, 2020, 2020, 2020, 2009, 2009, 2010, 2010, 2006, 2005, " "2008, 2008, 2012, null]"; auto iso_year = @@ -703,7 +703,7 @@ TEST_F(ScalarTemporalTest, TestZoned2) { CheckScalarUnary("day_of_week", unit, times_seconds_precision, int64(), day_of_week); CheckScalarUnary("day_of_year", unit, times_seconds_precision, int64(), day_of_year); CheckScalarUnary("is_dst", unit, times_seconds_precision, boolean(), is_dst); - CheckScalarUnary("epiyear", unit, times_seconds_precision, int64(), epiyear); + CheckScalarUnary("us_epiyear", unit, times_seconds_precision, int64(), us_epiyear); CheckScalarUnary("iso_year", unit, times_seconds_precision, int64(), iso_year); CheckScalarUnary("iso_week", unit, times_seconds_precision, int64(), iso_week); CheckScalarUnary("us_week", unit, times_seconds_precision, int64(), us_week); @@ -736,7 +736,7 @@ TEST_F(ScalarTemporalTest, TestNonexistentTimezone) { ASSERT_RAISES(Invalid, DayOfWeek(timestamp_array)); ASSERT_RAISES(Invalid, DayOfYear(timestamp_array)); ASSERT_RAISES(Invalid, IsDaylightSavings(timestamp_array)); - ASSERT_RAISES(Invalid, Epiyear(timestamp_array)); + ASSERT_RAISES(Invalid, USEpiyear(timestamp_array)); ASSERT_RAISES(Invalid, ISOYear(timestamp_array)); ASSERT_RAISES(Invalid, Week(timestamp_array)); ASSERT_RAISES(Invalid, ISOCalendar(timestamp_array)); diff --git a/cpp/src/arrow/compute/kernels/scalar_temporal_unary.cc b/cpp/src/arrow/compute/kernels/scalar_temporal_unary.cc index bf82f7f1cff..c5c036ee85d 100644 --- a/cpp/src/arrow/compute/kernels/scalar_temporal_unary.cc +++ b/cpp/src/arrow/compute/kernels/scalar_temporal_unary.cc @@ -430,15 +430,15 @@ struct ISOYear { }; // ---------------------------------------------------------------------- -// Extract epidemiological year values from temporal types +// Extract US epidemiological year values from temporal types // -// First week of an epidemiological year has the majority (4 or more) of it's -// days in January. Last week of an epidemiological year has the year's last -// Wednesday in it. Epidemiological week starts on Sunday. +// First week of US epidemiological year has the majority (4 or more) of it's +// days in January. Last week of US epidemiological year has the year's last +// Wednesday in it. US epidemiological week starts on Sunday. template -struct Epiyear { - explicit Epiyear(const FunctionOptions* options, Localizer&& localizer) +struct USEpiyear { + explicit USEpiyear(const FunctionOptions* options, Localizer&& localizer) : localizer_(std::move(localizer)) {} template @@ -1377,11 +1377,11 @@ const FunctionDoc iso_year_doc{ "cannot be found in the timezone database."), {"values"}}; -const FunctionDoc epiyear_doc{ - "Extract epidemiological year number", - ("First week of an epidemiological year has the majority (4 or more) of it's\n" - "days in January. Last week of an epidemiological year has the year's last\n" - "Wednesday in it. Epidemiological week starts on Sunday." +const FunctionDoc us_epiyear_doc{ + "Extract US epidemiological year number", + ("First week of US epidemiological year has the majority (4 or more) of\n" + "it's days in January. Last week of US epidemiological year has the\n" + "year's last Wednesday in it. US epidemiological week starts on Sunday.\n" "Null values emit null.\n" "An error is returned if the values have a defined timezone but it\n" "cannot be found in the timezone database."), @@ -1593,11 +1593,12 @@ void RegisterScalarTemporalUnary(FunctionRegistry* registry) { &iso_year_doc); DCHECK_OK(registry->AddFunction(std::move(iso_year))); - auto epiyear = - UnaryTemporalFactory::Make("epiyear", int64(), - &epiyear_doc); - DCHECK_OK(registry->AddFunction(std::move(epiyear))); + auto us_epiyear = + UnaryTemporalFactory::Make("us_epiyear", + int64(), + &us_epiyear_doc); + DCHECK_OK(registry->AddFunction(std::move(us_epiyear))); static const auto default_iso_week_options = WeekOptions::ISODefaults(); auto iso_week = diff --git a/docs/source/cpp/compute.rst b/docs/source/cpp/compute.rst index ea87da443d5..e8cb4bace9e 100644 --- a/docs/source/cpp/compute.rst +++ b/docs/source/cpp/compute.rst @@ -1380,7 +1380,7 @@ For timestamps inputs with non-empty timezone, localized timestamp components wi +--------------------+------------+-------------------+---------------+----------------------------+-------+ | day_of_year | Unary | Temporal | Int64 | | | +--------------------+------------+-------------------+---------------+----------------------------+-------+ -| epiyear | Unary | Temporal | Int64 | | | +| us_epiyear | Unary | Temporal | Int64 | | | +--------------------+------------+-------------------+---------------+----------------------------+-------+ | hour | Unary | Timestamp, Time | Int64 | | | +--------------------+------------+-------------------+---------------+----------------------------+-------+ diff --git a/docs/source/python/api/compute.rst b/docs/source/python/api/compute.rst index 9aefd2ee4c0..05972fbd402 100644 --- a/docs/source/python/api/compute.rst +++ b/docs/source/python/api/compute.rst @@ -391,7 +391,6 @@ Temporal Component Extraction day day_of_week day_of_year - epiyear hour iso_week iso_year @@ -404,6 +403,7 @@ Temporal Component Extraction quarter second subsecond + us_epiyear us_week week year From 35d00d0aa037c6215cac9ed9331917e56bc06193 Mon Sep 17 00:00:00 2001 From: Rok Date: Thu, 10 Feb 2022 15:36:32 +0100 Subject: [PATCH 3/3] USEpiyear -> USYear --- cpp/src/arrow/compute/api_scalar.cc | 2 +- cpp/src/arrow/compute/api_scalar.h | 5 ++--- .../compute/kernels/scalar_temporal_test.cc | 20 +++++++++---------- .../compute/kernels/scalar_temporal_unary.cc | 17 ++++++++-------- docs/source/cpp/compute.rst | 4 ++-- docs/source/python/api/compute.rst | 2 +- 6 files changed, 24 insertions(+), 26 deletions(-) diff --git a/cpp/src/arrow/compute/api_scalar.cc b/cpp/src/arrow/compute/api_scalar.cc index 4c15218eec2..4363fe5593c 100644 --- a/cpp/src/arrow/compute/api_scalar.cc +++ b/cpp/src/arrow/compute/api_scalar.cc @@ -785,8 +785,8 @@ SCALAR_EAGER_UNARY(Nanosecond, "nanosecond") SCALAR_EAGER_UNARY(Quarter, "quarter") SCALAR_EAGER_UNARY(Second, "second") SCALAR_EAGER_UNARY(Subsecond, "subsecond") -SCALAR_EAGER_UNARY(USEpiyear, "us_epiyear") SCALAR_EAGER_UNARY(USWeek, "us_week") +SCALAR_EAGER_UNARY(USYear, "us_year") SCALAR_EAGER_UNARY(Year, "year") Result AssumeTimezone(const Datum& arg, AssumeTimezoneOptions options, diff --git a/cpp/src/arrow/compute/api_scalar.h b/cpp/src/arrow/compute/api_scalar.h index 4176798210b..3b3b0115c8e 100644 --- a/cpp/src/arrow/compute/api_scalar.h +++ b/cpp/src/arrow/compute/api_scalar.h @@ -1199,8 +1199,7 @@ ARROW_EXPORT Result DayOfYear(const Datum& values, ExecContext* ctx = NUL ARROW_EXPORT Result ISOYear(const Datum& values, ExecContext* ctx = NULLPTR); -/// \brief USEpiyear returns US epidemiological year number for each element of -/// `values`. +/// \brief USYear returns US epidemiological year number for each element of `values`. /// First week of US epidemiological year has the majority (4 or more) of it's /// days in January. Last week of US epidemiological year has the year's last /// Wednesday in it. US epidemiological week starts on Sunday. @@ -1212,7 +1211,7 @@ Result ISOYear(const Datum& values, ExecContext* ctx = NULLPTR); /// \since 8.0.0 /// \note API not yet finalized ARROW_EXPORT -Result USEpiyear(const Datum& values, ExecContext* ctx = NULLPTR); +Result USYear(const Datum& values, ExecContext* ctx = NULLPTR); /// \brief ISOWeek returns ISO week of year number for each element of `values`. /// First ISO week has the majority (4 or more) of its days in January. diff --git a/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc b/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc index 372a3cf3e1b..63260c15984 100644 --- a/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc +++ b/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc @@ -166,7 +166,7 @@ class ScalarTemporalTest : public ::testing::Test { std::string day_of_week = "[3, 1, 6, 2, 2, 1, 0, 3, 4, 6, 0, 6, 5, 6, 0, 6, null]"; std::string day_of_year = "[1, 60, 1, 138, 1, 365, 364, 365, 1, 3, 4, 1, 365, 363, 364, 1, null]"; - std::string us_epiyear = + std::string us_year = "[1969, 2000, 1899, 2033, 2020, 2020, 2020, 2009, 2009, 2010, 2010, 2006, 2005, " "2008, 2008, 2012, null]"; std::string iso_year = @@ -414,7 +414,7 @@ TEST_F(ScalarTemporalTest, TestTemporalComponentExtractionAllTemporalTypes) { CheckScalarUnary("year_month_day", ArrayFromJSON(unit, sample), year_month_day); CheckScalarUnary("day_of_week", unit, sample, int64(), day_of_week); CheckScalarUnary("day_of_year", unit, sample, int64(), day_of_year); - CheckScalarUnary("us_epiyear", unit, sample, int64(), us_epiyear); + CheckScalarUnary("us_year", unit, sample, int64(), us_year); CheckScalarUnary("iso_year", unit, sample, int64(), iso_year); CheckScalarUnary("iso_week", unit, sample, int64(), iso_week); CheckScalarUnary("us_week", unit, sample, int64(), us_week); @@ -481,7 +481,7 @@ TEST_F(ScalarTemporalTest, TestTemporalComponentExtractionWithDifferentUnits) { CheckScalarUnary("day_of_year", unit, times_seconds_precision, int64(), day_of_year); ASSERT_RAISES(Invalid, IsDaylightSavings(ArrayFromJSON(unit, times_seconds_precision))); - CheckScalarUnary("us_epiyear", unit, times_seconds_precision, int64(), us_epiyear); + CheckScalarUnary("us_year", unit, times_seconds_precision, int64(), us_year); CheckScalarUnary("iso_year", unit, times_seconds_precision, int64(), iso_year); CheckScalarUnary("iso_week", unit, times_seconds_precision, int64(), iso_week); CheckScalarUnary("us_week", unit, times_seconds_precision, int64(), us_week); @@ -510,7 +510,7 @@ TEST_F(ScalarTemporalTest, TestOutsideNanosecondRange) { {"year": 2262, "month": 4, "day": 13}])"); auto day_of_week = "[0, 6]"; auto day_of_year = "[263, 103]"; - auto us_epiyear = "[1677, 2262]"; + auto us_year = "[1677, 2262]"; auto iso_year = "[1677, 2262]"; auto iso_week = "[38, 15]"; auto us_week = "[38, 16]"; @@ -534,7 +534,7 @@ TEST_F(ScalarTemporalTest, TestOutsideNanosecondRange) { CheckScalarUnary("year_month_day", ArrayFromJSON(unit, times), year_month_day); CheckScalarUnary("day_of_week", unit, times, int64(), day_of_week); CheckScalarUnary("day_of_year", unit, times, int64(), day_of_year); - CheckScalarUnary("us_epiyear", unit, times, int64(), us_epiyear); + CheckScalarUnary("us_year", unit, times, int64(), us_year); CheckScalarUnary("iso_year", unit, times, int64(), iso_year); CheckScalarUnary("iso_week", unit, times, int64(), iso_week); CheckScalarUnary("us_week", unit, times, int64(), us_week); @@ -582,7 +582,7 @@ TEST_F(ScalarTemporalTest, TestZoned1) { std::string is_dst = "[false, false, false, false, false, false, false, false, false, false, false, " "false, false, false, false, false, null]"; - auto us_epiyear = + auto us_year = "[1969, 2000, 1898, 2033, 2020, 2020, 2020, 2009, 2009, 2009, 2010, 2005, 2005, " "2008, 2008, 2011, null]"; auto iso_year = @@ -620,7 +620,7 @@ TEST_F(ScalarTemporalTest, TestZoned1) { CheckScalarUnary("day_of_week", unit, times, int64(), day_of_week); CheckScalarUnary("day_of_year", unit, times, int64(), day_of_year); CheckScalarUnary("is_dst", unit, times, boolean(), is_dst); - CheckScalarUnary("us_epiyear", unit, times, int64(), us_epiyear); + CheckScalarUnary("us_year", unit, times, int64(), us_year); CheckScalarUnary("iso_year", unit, times, int64(), iso_year); CheckScalarUnary("iso_week", unit, times, int64(), iso_week); CheckScalarUnary("us_week", unit, times, int64(), us_week); @@ -664,7 +664,7 @@ TEST_F(ScalarTemporalTest, TestZoned2) { std::string is_dst = "[false, true, false, false, true, true, true, true, true, true, true, true, " "true, true, true, true, null]"; - auto us_epiyear = + auto us_year = "[1969, 2000, 1899, 2033, 2020, 2020, 2020, 2009, 2009, 2010, 2010, 2006, 2005, " "2008, 2008, 2012, null]"; auto iso_year = @@ -703,7 +703,7 @@ TEST_F(ScalarTemporalTest, TestZoned2) { CheckScalarUnary("day_of_week", unit, times_seconds_precision, int64(), day_of_week); CheckScalarUnary("day_of_year", unit, times_seconds_precision, int64(), day_of_year); CheckScalarUnary("is_dst", unit, times_seconds_precision, boolean(), is_dst); - CheckScalarUnary("us_epiyear", unit, times_seconds_precision, int64(), us_epiyear); + CheckScalarUnary("us_year", unit, times_seconds_precision, int64(), us_year); CheckScalarUnary("iso_year", unit, times_seconds_precision, int64(), iso_year); CheckScalarUnary("iso_week", unit, times_seconds_precision, int64(), iso_week); CheckScalarUnary("us_week", unit, times_seconds_precision, int64(), us_week); @@ -736,7 +736,7 @@ TEST_F(ScalarTemporalTest, TestNonexistentTimezone) { ASSERT_RAISES(Invalid, DayOfWeek(timestamp_array)); ASSERT_RAISES(Invalid, DayOfYear(timestamp_array)); ASSERT_RAISES(Invalid, IsDaylightSavings(timestamp_array)); - ASSERT_RAISES(Invalid, USEpiyear(timestamp_array)); + ASSERT_RAISES(Invalid, USYear(timestamp_array)); ASSERT_RAISES(Invalid, ISOYear(timestamp_array)); ASSERT_RAISES(Invalid, Week(timestamp_array)); ASSERT_RAISES(Invalid, ISOCalendar(timestamp_array)); diff --git a/cpp/src/arrow/compute/kernels/scalar_temporal_unary.cc b/cpp/src/arrow/compute/kernels/scalar_temporal_unary.cc index c5c036ee85d..995470cfb1f 100644 --- a/cpp/src/arrow/compute/kernels/scalar_temporal_unary.cc +++ b/cpp/src/arrow/compute/kernels/scalar_temporal_unary.cc @@ -437,8 +437,8 @@ struct ISOYear { // Wednesday in it. US epidemiological week starts on Sunday. template -struct USEpiyear { - explicit USEpiyear(const FunctionOptions* options, Localizer&& localizer) +struct USYear { + explicit USYear(const FunctionOptions* options, Localizer&& localizer) : localizer_(std::move(localizer)) {} template @@ -1377,7 +1377,7 @@ const FunctionDoc iso_year_doc{ "cannot be found in the timezone database."), {"values"}}; -const FunctionDoc us_epiyear_doc{ +const FunctionDoc us_year_doc{ "Extract US epidemiological year number", ("First week of US epidemiological year has the majority (4 or more) of\n" "it's days in January. Last week of US epidemiological year has the\n" @@ -1593,12 +1593,11 @@ void RegisterScalarTemporalUnary(FunctionRegistry* registry) { &iso_year_doc); DCHECK_OK(registry->AddFunction(std::move(iso_year))); - auto us_epiyear = - UnaryTemporalFactory::Make("us_epiyear", - int64(), - &us_epiyear_doc); - DCHECK_OK(registry->AddFunction(std::move(us_epiyear))); + auto us_year = + UnaryTemporalFactory::Make("us_year", int64(), + &us_year_doc); + DCHECK_OK(registry->AddFunction(std::move(us_year))); static const auto default_iso_week_options = WeekOptions::ISODefaults(); auto iso_week = diff --git a/docs/source/cpp/compute.rst b/docs/source/cpp/compute.rst index e8cb4bace9e..191f2658185 100644 --- a/docs/source/cpp/compute.rst +++ b/docs/source/cpp/compute.rst @@ -1380,8 +1380,6 @@ For timestamps inputs with non-empty timezone, localized timestamp components wi +--------------------+------------+-------------------+---------------+----------------------------+-------+ | day_of_year | Unary | Temporal | Int64 | | | +--------------------+------------+-------------------+---------------+----------------------------+-------+ -| us_epiyear | Unary | Temporal | Int64 | | | -+--------------------+------------+-------------------+---------------+----------------------------+-------+ | hour | Unary | Timestamp, Time | Int64 | | | +--------------------+------------+-------------------+---------------+----------------------------+-------+ | is_dst | Unary | Timestamp | Boolean | | | @@ -1410,6 +1408,8 @@ For timestamps inputs with non-empty timezone, localized timestamp components wi +--------------------+------------+-------------------+---------------+----------------------------+-------+ | us_week | Unary | Temporal | Int64 | | \(4) | +--------------------+------------+-------------------+---------------+----------------------------+-------+ +| us_year | Unary | Temporal | Int64 | | \(4) | ++--------------------+------------+-------------------+---------------+----------------------------+-------+ | week | Unary | Timestamp | Int64 | :struct:`WeekOptions` | \(5) | +--------------------+------------+-------------------+---------------+----------------------------+-------+ | year | Unary | Temporal | Int64 | | | diff --git a/docs/source/python/api/compute.rst b/docs/source/python/api/compute.rst index 05972fbd402..fd362bf6e68 100644 --- a/docs/source/python/api/compute.rst +++ b/docs/source/python/api/compute.rst @@ -403,8 +403,8 @@ Temporal Component Extraction quarter second subsecond - us_epiyear us_week + us_year week year year_month_day