Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions cpp/src/arrow/compute/api_scalar.cc
Original file line number Diff line number Diff line change
Expand Up @@ -786,6 +786,7 @@ SCALAR_EAGER_UNARY(Quarter, "quarter")
SCALAR_EAGER_UNARY(Second, "second")
SCALAR_EAGER_UNARY(Subsecond, "subsecond")
SCALAR_EAGER_UNARY(USWeek, "us_week")
SCALAR_EAGER_UNARY(USYear, "us_year")
SCALAR_EAGER_UNARY(Year, "year")

Result<Datum> AssumeTimezone(const Datum& arg, AssumeTimezoneOptions options,
Expand Down
14 changes: 14 additions & 0 deletions cpp/src/arrow/compute/api_scalar.h
Original file line number Diff line number Diff line change
Expand Up @@ -1199,6 +1199,20 @@ ARROW_EXPORT Result<Datum> DayOfYear(const Datum& values, ExecContext* ctx = NUL
ARROW_EXPORT
Result<Datum> ISOYear(const Datum& values, ExecContext* ctx = NULLPTR);

/// \brief USYear returns US epidemiological year number for each element of `values`.
/// First week of US epidemiological year has the majority (4 or more) of it's
/// days in January. Last week of US epidemiological year has the year's last
/// Wednesday in it. US epidemiological week starts on Sunday.
///
/// \param[in] values input to extract US epidemiological year from
/// \param[in] ctx the function execution context, optional
/// \return the resulting datum
///
/// \since 8.0.0
/// \note API not yet finalized
ARROW_EXPORT
Result<Datum> USYear(const Datum& values, ExecContext* ctx = NULLPTR);

/// \brief ISOWeek returns ISO week of year number for each element of `values`.
/// First ISO week has the majority (4 or more) of its days in January.
/// ISO week starts on Monday. Year can have 52 or 53 weeks.
Expand Down
16 changes: 16 additions & 0 deletions cpp/src/arrow/compute/kernels/scalar_temporal_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,9 @@ class ScalarTemporalTest : public ::testing::Test {
std::string day_of_week = "[3, 1, 6, 2, 2, 1, 0, 3, 4, 6, 0, 6, 5, 6, 0, 6, null]";
std::string day_of_year =
"[1, 60, 1, 138, 1, 365, 364, 365, 1, 3, 4, 1, 365, 363, 364, 1, null]";
std::string us_year =
"[1969, 2000, 1899, 2033, 2020, 2020, 2020, 2009, 2009, 2010, 2010, 2006, 2005, "
"2008, 2008, 2012, null]";
std::string iso_year =
"[1970, 2000, 1898, 2033, 2020, 2020, 2020, 2009, 2009, 2009, 2010, 2005, "
"2005, 2008, 2009, 2011, null]";
Expand Down Expand Up @@ -411,6 +414,7 @@ TEST_F(ScalarTemporalTest, TestTemporalComponentExtractionAllTemporalTypes) {
CheckScalarUnary("year_month_day", ArrayFromJSON(unit, sample), year_month_day);
CheckScalarUnary("day_of_week", unit, sample, int64(), day_of_week);
CheckScalarUnary("day_of_year", unit, sample, int64(), day_of_year);
CheckScalarUnary("us_year", unit, sample, int64(), us_year);
CheckScalarUnary("iso_year", unit, sample, int64(), iso_year);
CheckScalarUnary("iso_week", unit, sample, int64(), iso_week);
CheckScalarUnary("us_week", unit, sample, int64(), us_week);
Expand Down Expand Up @@ -477,6 +481,7 @@ TEST_F(ScalarTemporalTest, TestTemporalComponentExtractionWithDifferentUnits) {
CheckScalarUnary("day_of_year", unit, times_seconds_precision, int64(), day_of_year);
ASSERT_RAISES(Invalid,
IsDaylightSavings(ArrayFromJSON(unit, times_seconds_precision)));
CheckScalarUnary("us_year", unit, times_seconds_precision, int64(), us_year);
CheckScalarUnary("iso_year", unit, times_seconds_precision, int64(), iso_year);
CheckScalarUnary("iso_week", unit, times_seconds_precision, int64(), iso_week);
CheckScalarUnary("us_week", unit, times_seconds_precision, int64(), us_week);
Expand Down Expand Up @@ -505,6 +510,7 @@ TEST_F(ScalarTemporalTest, TestOutsideNanosecondRange) {
{"year": 2262, "month": 4, "day": 13}])");
auto day_of_week = "[0, 6]";
auto day_of_year = "[263, 103]";
auto us_year = "[1677, 2262]";
auto iso_year = "[1677, 2262]";
auto iso_week = "[38, 15]";
auto us_week = "[38, 16]";
Expand All @@ -528,6 +534,7 @@ TEST_F(ScalarTemporalTest, TestOutsideNanosecondRange) {
CheckScalarUnary("year_month_day", ArrayFromJSON(unit, times), year_month_day);
CheckScalarUnary("day_of_week", unit, times, int64(), day_of_week);
CheckScalarUnary("day_of_year", unit, times, int64(), day_of_year);
CheckScalarUnary("us_year", unit, times, int64(), us_year);
CheckScalarUnary("iso_year", unit, times, int64(), iso_year);
CheckScalarUnary("iso_week", unit, times, int64(), iso_week);
CheckScalarUnary("us_week", unit, times, int64(), us_week);
Expand Down Expand Up @@ -575,6 +582,9 @@ TEST_F(ScalarTemporalTest, TestZoned1) {
std::string is_dst =
"[false, false, false, false, false, false, false, false, false, false, false, "
"false, false, false, false, false, null]";
auto us_year =
"[1969, 2000, 1898, 2033, 2020, 2020, 2020, 2009, 2009, 2009, 2010, 2005, 2005, "
"2008, 2008, 2011, null]";
auto iso_year =
"[1970, 2000, 1898, 2033, 2020, 2020, 2019, 2009, 2009, 2009, 2009, 2005, 2005, "
"2008, 2008, 2011, null]";
Expand Down Expand Up @@ -610,6 +620,7 @@ TEST_F(ScalarTemporalTest, TestZoned1) {
CheckScalarUnary("day_of_week", unit, times, int64(), day_of_week);
CheckScalarUnary("day_of_year", unit, times, int64(), day_of_year);
CheckScalarUnary("is_dst", unit, times, boolean(), is_dst);
CheckScalarUnary("us_year", unit, times, int64(), us_year);
CheckScalarUnary("iso_year", unit, times, int64(), iso_year);
CheckScalarUnary("iso_week", unit, times, int64(), iso_week);
CheckScalarUnary("us_week", unit, times, int64(), us_week);
Expand Down Expand Up @@ -653,6 +664,9 @@ TEST_F(ScalarTemporalTest, TestZoned2) {
std::string is_dst =
"[false, true, false, false, true, true, true, true, true, true, true, true, "
"true, true, true, true, null]";
auto us_year =
"[1969, 2000, 1899, 2033, 2020, 2020, 2020, 2009, 2009, 2010, 2010, 2006, 2005, "
"2008, 2008, 2012, null]";
auto iso_year =
"[1970, 2000, 1898, 2033, 2020, 2020, 2020, 2009, 2009, 2009, 2010, 2005, 2005, "
"2008, 2009, 2011, null]";
Expand Down Expand Up @@ -689,6 +703,7 @@ TEST_F(ScalarTemporalTest, TestZoned2) {
CheckScalarUnary("day_of_week", unit, times_seconds_precision, int64(), day_of_week);
CheckScalarUnary("day_of_year", unit, times_seconds_precision, int64(), day_of_year);
CheckScalarUnary("is_dst", unit, times_seconds_precision, boolean(), is_dst);
CheckScalarUnary("us_year", unit, times_seconds_precision, int64(), us_year);
CheckScalarUnary("iso_year", unit, times_seconds_precision, int64(), iso_year);
CheckScalarUnary("iso_week", unit, times_seconds_precision, int64(), iso_week);
CheckScalarUnary("us_week", unit, times_seconds_precision, int64(), us_week);
Expand Down Expand Up @@ -721,6 +736,7 @@ TEST_F(ScalarTemporalTest, TestNonexistentTimezone) {
ASSERT_RAISES(Invalid, DayOfWeek(timestamp_array));
ASSERT_RAISES(Invalid, DayOfYear(timestamp_array));
ASSERT_RAISES(Invalid, IsDaylightSavings(timestamp_array));
ASSERT_RAISES(Invalid, USYear(timestamp_array));
ASSERT_RAISES(Invalid, ISOYear(timestamp_array));
ASSERT_RAISES(Invalid, Week(timestamp_array));
ASSERT_RAISES(Invalid, ISOCalendar(timestamp_array));
Expand Down
42 changes: 42 additions & 0 deletions cpp/src/arrow/compute/kernels/scalar_temporal_unary.cc
Original file line number Diff line number Diff line change
Expand Up @@ -429,6 +429,32 @@ struct ISOYear {
Localizer localizer_;
};

// ----------------------------------------------------------------------
// Extract US epidemiological year values from temporal types
//
// First week of US epidemiological year has the majority (4 or more) of it's
// days in January. Last week of US epidemiological year has the year's last
// Wednesday in it. US epidemiological week starts on Sunday.

template <typename Duration, typename Localizer>
struct USYear {
explicit USYear(const FunctionOptions* options, Localizer&& localizer)
: localizer_(std::move(localizer)) {}

template <typename T, typename Arg0>
T Call(KernelContext*, Arg0 arg, Status*) const {
const auto t = floor<days>(localizer_.template ConvertTimePoint<Duration>(arg));
auto y = year_month_day{t + days{3}}.year();
auto start = localizer_.ConvertDays((y - years{1}) / dec / wed[last]) + (mon - thu);
if (t < start) {
--y;
}
return static_cast<T>(static_cast<int32_t>(y));
}

Localizer localizer_;
};

// ----------------------------------------------------------------------
// Extract week from temporal types
//
Expand Down Expand Up @@ -1351,6 +1377,16 @@ const FunctionDoc iso_year_doc{
"cannot be found in the timezone database."),
{"values"}};

const FunctionDoc us_year_doc{
"Extract US epidemiological year number",
("First week of US epidemiological year has the majority (4 or more) of\n"
"it's days in January. Last week of US epidemiological year has the\n"
"year's last Wednesday in it. US epidemiological week starts on Sunday.\n"
"Null values emit null.\n"
"An error is returned if the values have a defined timezone but it\n"
"cannot be found in the timezone database."),
{"values"}};

const FunctionDoc iso_week_doc{
"Extract ISO week of year number",
("First ISO week has the majority (4 or more) of its days in January.\n"
Expand Down Expand Up @@ -1557,6 +1593,12 @@ void RegisterScalarTemporalUnary(FunctionRegistry* registry) {
&iso_year_doc);
DCHECK_OK(registry->AddFunction(std::move(iso_year)));

auto us_year =
UnaryTemporalFactory<USYear, TemporalComponentExtract,
Int64Type>::Make<WithDates, WithTimestamps>("us_year", int64(),
&us_year_doc);
DCHECK_OK(registry->AddFunction(std::move(us_year)));

static const auto default_iso_week_options = WeekOptions::ISODefaults();
auto iso_week =
UnaryTemporalFactory<Week, TemporalComponentExtractWeek, Int64Type>::Make<
Expand Down
2 changes: 2 additions & 0 deletions docs/source/cpp/compute.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1408,6 +1408,8 @@ For timestamps inputs with non-empty timezone, localized timestamp components wi
+--------------------+------------+-------------------+---------------+----------------------------+-------+
| us_week | Unary | Temporal | Int64 | | \(4) |
+--------------------+------------+-------------------+---------------+----------------------------+-------+
| us_year | Unary | Temporal | Int64 | | \(4) |
+--------------------+------------+-------------------+---------------+----------------------------+-------+
| week | Unary | Timestamp | Int64 | :struct:`WeekOptions` | \(5) |
+--------------------+------------+-------------------+---------------+----------------------------+-------+
| year | Unary | Temporal | Int64 | | |
Expand Down
1 change: 1 addition & 0 deletions docs/source/python/api/compute.rst
Original file line number Diff line number Diff line change
Expand Up @@ -404,6 +404,7 @@ Temporal Component Extraction
second
subsecond
us_week
us_year
week
year
year_month_day
Expand Down