From 46498490bbc789b17d36c9f29374a4c3065e9ca2 Mon Sep 17 00:00:00 2001 From: Rok Date: Wed, 25 Aug 2021 15:40:21 +0200 Subject: [PATCH 1/7] Strftime default changes. --- cpp/src/arrow/compute/api_scalar.h | 2 +- cpp/src/arrow/compute/kernels/scalar_temporal.cc | 6 +++--- cpp/src/arrow/compute/kernels/scalar_temporal_test.cc | 11 +++++------ python/pyarrow/tests/test_compute.py | 4 ++-- 4 files changed, 11 insertions(+), 12 deletions(-) diff --git a/cpp/src/arrow/compute/api_scalar.h b/cpp/src/arrow/compute/api_scalar.h index 2cbc0fde2b2..e959884b233 100644 --- a/cpp/src/arrow/compute/api_scalar.h +++ b/cpp/src/arrow/compute/api_scalar.h @@ -185,7 +185,7 @@ class ARROW_EXPORT StrftimeOptions : public FunctionOptions { constexpr static char const kTypeName[] = "StrftimeOptions"; - constexpr static const char* kDefaultFormat = "%Y-%m-%dT%H:%M:%SZ"; + constexpr static const char* kDefaultFormat = "%Y-%m-%dT%H:%M:%S"; /// The desired format string. std::string format; diff --git a/cpp/src/arrow/compute/kernels/scalar_temporal.cc b/cpp/src/arrow/compute/kernels/scalar_temporal.cc index 44c7f75a038..9b9479eb116 100644 --- a/cpp/src/arrow/compute/kernels/scalar_temporal.cc +++ b/cpp/src/arrow/compute/kernels/scalar_temporal.cc @@ -476,11 +476,11 @@ struct Strftime { static Result Make(KernelContext* ctx, const DataType& type) { const StrftimeOptions& options = StrftimeState::Get(ctx); - const auto& timezone = GetInputTimezone(type); + auto timezone = GetInputTimezone(type); if (timezone.empty()) { - return Status::Invalid( - "Timestamps without a time zone cannot be reliably formatted."); + timezone = "UTC"; } + ARROW_ASSIGN_OR_RAISE(const time_zone* tz, LocateZone(timezone)); ARROW_ASSIGN_OR_RAISE(std::locale locale, GetLocale(options.locale)); diff --git a/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc b/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc index d8199089328..afa04da2a22 100644 --- a/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc +++ b/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc @@ -394,7 +394,7 @@ TEST_F(ScalarTemporalTest, Strftime) { const char* nanoseconds = R"(["1970-01-01T00:00:59.123456789", null])"; const char* default_seconds = R"( - ["1970-01-01T00:00:59Z", "2021-08-18T15:11:50Z", null])"; + ["1970-01-01T00:00:59", "2021-08-18T15:11:50", null])"; const char* string_seconds = R"( ["1970-01-01T00:00:59+0000", "2021-08-18T15:11:50+0000", null])"; const char* string_milliseconds = R"(["1970-01-01T00:00:59.123+0000", null])"; @@ -414,12 +414,11 @@ TEST_F(ScalarTemporalTest, Strftime) { } TEST_F(ScalarTemporalTest, StrftimeNoTimezone) { + auto options_default = StrftimeOptions(); const char* seconds = R"(["1970-01-01T00:00:59", null])"; auto arr = ArrayFromJSON(timestamp(TimeUnit::SECOND), seconds); - EXPECT_RAISES_WITH_MESSAGE_THAT( - Invalid, - testing::HasSubstr("Timestamps without a time zone cannot be reliably formatted"), - Strftime(arr, StrftimeOptions())); + CheckScalarUnary("strftime", timestamp(TimeUnit::SECOND), seconds, utf8(), seconds, + &options_default); } TEST_F(ScalarTemporalTest, StrftimeInvalidTimezone) { @@ -440,7 +439,7 @@ TEST_F(ScalarTemporalTest, StrftimeCLocale) { const char* microseconds = R"(["1970-01-01T00:00:59.123456", null])"; const char* nanoseconds = R"(["1970-01-01T00:00:59.123456789", null])"; - const char* default_seconds = R"(["1970-01-01T00:00:59Z", null])"; + const char* default_seconds = R"(["1970-01-01T00:00:59", null])"; const char* string_seconds = R"(["1970-01-01T00:00:59+0000", null])"; const char* string_milliseconds = R"(["1970-01-01T00:00:59.123+0000", null])"; const char* string_microseconds = R"(["1970-01-01T05:30:59.123456+0530", null])"; diff --git a/python/pyarrow/tests/test_compute.py b/python/pyarrow/tests/test_compute.py index bbef46f2477..d4f123d2b02 100644 --- a/python/pyarrow/tests/test_compute.py +++ b/python/pyarrow/tests/test_compute.py @@ -1499,7 +1499,7 @@ def _fix_timestamp(s): # Default format tsa = pa.array(ts, type=pa.timestamp("s", timezone)) result = pc.strftime(tsa, options=pc.StrftimeOptions()) - expected = pa.array(_fix_timestamp(ts.strftime("%Y-%m-%dT%H:%M:%SZ"))) + expected = pa.array(_fix_timestamp(ts.strftime("%Y-%m-%dT%H:%M:%S"))) assert result.equals(expected) # Pandas %S is equivalent to %S in arrow for unit="s" @@ -1520,7 +1520,7 @@ def _fix_timestamp(s): tsa = pa.array(ts, type=pa.timestamp("s", timezone)) options = pc.StrftimeOptions("%Y-%m-%dT%H:%M:%SZ", "C") result = pc.strftime(tsa, options=options) - expected = pa.array(_fix_timestamp(ts.strftime("%Y-%m-%dT%H:%M:%SZ"))) + expected = pa.array(_fix_timestamp(ts.strftime("%Y-%m-%dT%H:%M:%S"))) assert result.equals(expected) for unit in ["s", "ms", "us", "ns"]: From 88e520a50f139e0e8eb9070162bb5cba6cbdbaa5 Mon Sep 17 00:00:00 2001 From: Rok Date: Wed, 25 Aug 2021 21:40:38 +0200 Subject: [PATCH 2/7] Review feedback. --- .../arrow/compute/kernels/scalar_temporal.cc | 8 ++++++ .../compute/kernels/scalar_temporal_test.cc | 7 +++++ docs/source/cpp/compute.rst | 28 +++++++++++++------ python/pyarrow/_compute.pyx | 2 +- python/pyarrow/tests/test_compute.py | 23 +++++++++------ 5 files changed, 50 insertions(+), 18 deletions(-) diff --git a/cpp/src/arrow/compute/kernels/scalar_temporal.cc b/cpp/src/arrow/compute/kernels/scalar_temporal.cc index 9b9479eb116..7c853e53fb6 100644 --- a/cpp/src/arrow/compute/kernels/scalar_temporal.cc +++ b/cpp/src/arrow/compute/kernels/scalar_temporal.cc @@ -478,6 +478,10 @@ struct Strftime { auto timezone = GetInputTimezone(type); if (timezone.empty()) { + if ((options.format.find("%z") != std::string::npos) || + (options.format.find("%Z") != std::string::npos)) { + return Status::Invalid("Timezone not present, cannot print: ", options.format); + } timezone = "UTC"; } @@ -836,6 +840,10 @@ const FunctionDoc strftime_doc{ "Format timestamps according to a format string", ("For each input timestamp, emit a formatted string.\n" "The time format string and locale can be set using StrftimeOptions.\n" + "Output precision of seconds %S flag depends on the input timestamp precision. " + "Timestamps with second precision are represented as integers while microsecond " + "precision timestamps are represented as a floating point number with 6 decimal " + "points." "An error is returned if the timestamps don't have a defined timezone,\n" "or if the timezone cannot be found in the timezone database."), {"timestamps"}, diff --git a/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc b/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc index afa04da2a22..ef9f97130d7 100644 --- a/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc +++ b/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc @@ -417,8 +417,15 @@ TEST_F(ScalarTemporalTest, StrftimeNoTimezone) { auto options_default = StrftimeOptions(); const char* seconds = R"(["1970-01-01T00:00:59", null])"; auto arr = ArrayFromJSON(timestamp(TimeUnit::SECOND), seconds); + CheckScalarUnary("strftime", timestamp(TimeUnit::SECOND), seconds, utf8(), seconds, &options_default); + EXPECT_RAISES_WITH_MESSAGE_THAT( + Invalid, testing::HasSubstr("Invalid: Timezone not present, cannot print"), + Strftime(arr, StrftimeOptions("%Y-%m-%dT%H:%M:%S%z"))); + EXPECT_RAISES_WITH_MESSAGE_THAT( + Invalid, testing::HasSubstr("Invalid: Timezone not present, cannot print"), + Strftime(arr, StrftimeOptions("%Y-%m-%dT%H:%M:%S%Z"))); } TEST_F(ScalarTemporalTest, StrftimeInvalidTimezone) { diff --git a/docs/source/cpp/compute.rst b/docs/source/cpp/compute.rst index 7263d77acf2..ece9fb8860f 100644 --- a/docs/source/cpp/compute.rst +++ b/docs/source/cpp/compute.rst @@ -1105,19 +1105,29 @@ number of input and output types. The type to cast to can be passed in a :struct:`CastOptions` instance. As an alternative, the same service is provided by a concrete function :func:`~arrow::compute::Cast`. -+--------------------------+------------+--------------------+------------------+------------------------------+ -| Function name | Arity | Input types | Output type | Options class | -+==========================+============+====================+==================+==============================+ -| cast | Unary | Many | Variable | :struct:`CastOptions` | -+--------------------------+------------+--------------------+------------------+------------------------------+ -| strftime | Unary | Timestamp | String | :struct:`StrftimeOptions` | -+--------------------------+------------+--------------------+------------------+------------------------------+ -| strptime | Unary | String-like | Timestamp | :struct:`StrptimeOptions` | -+--------------------------+------------+--------------------+------------------+------------------------------+ ++-----------------+------------+--------------------+------------------+------------------------------+-------+ +| Function name | Arity | Input types | Output type | Options class | Notes | ++=================+============+====================+==================+==============================+=======+ +| cast | Unary | Many | Variable | :struct:`CastOptions` | | ++-----------------+------------+--------------------+------------------+------------------------------+-------+ +| strftime | Unary | Timestamp | String | :struct:`StrftimeOptions` | \(1) | ++-----------------+------------+--------------------+------------------+------------------------------+-------+ +| strptime | Unary | String-like | Timestamp | :struct:`StrptimeOptions` | | ++-----------------+------------+--------------------+------------------+------------------------------+-------+ The conversions available with ``cast`` are listed below. In all cases, a null input value is converted into a null output value. +* \(1) Output precision of seconds (``%S``) flag depends on the input timestamp + precision. If the number of seconds can not be exactly represented with seconds, + then the format is a decimal floating point number with a fixed format and a + precision matching that of the precision of the input. Note precision increases + three decimal points points per step going from seconds to nanoseconds. + The character for the decimal point is localized according to the locale. + See `detailed formatting documentation`_ for descriptions of other flags. + +.. _detailed formatting documentation: https://howardhinnant.github.io/date/date.html#to_stream_formatting + **Truth value extraction** +-----------------------------+------------------------------------+--------------+ diff --git a/python/pyarrow/_compute.pyx b/python/pyarrow/_compute.pyx index 29c579f85a9..aaf4c9f2916 100644 --- a/python/pyarrow/_compute.pyx +++ b/python/pyarrow/_compute.pyx @@ -989,7 +989,7 @@ cdef class _StrftimeOptions(FunctionOptions): class StrftimeOptions(_StrftimeOptions): - def __init__(self, format="%Y-%m-%dT%H:%M:%SZ", locale="C"): + def __init__(self, format="%Y-%m-%dT%H:%M:%S", locale="C"): self._set_options(format, locale) diff --git a/python/pyarrow/tests/test_compute.py b/python/pyarrow/tests/test_compute.py index d4f123d2b02..c0b35ba66c6 100644 --- a/python/pyarrow/tests/test_compute.py +++ b/python/pyarrow/tests/test_compute.py @@ -1518,18 +1518,25 @@ def _fix_timestamp(s): # Test setting locale tsa = pa.array(ts, type=pa.timestamp("s", timezone)) - options = pc.StrftimeOptions("%Y-%m-%dT%H:%M:%SZ", "C") + options = pc.StrftimeOptions("%Y-%m-%dT%H:%M:%S", "C") result = pc.strftime(tsa, options=options) expected = pa.array(_fix_timestamp(ts.strftime("%Y-%m-%dT%H:%M:%S"))) assert result.equals(expected) - for unit in ["s", "ms", "us", "ns"]: - tsa = pa.array(ts, type=pa.timestamp(unit)) - for fmt in formats: - with pytest.raises(pa.ArrowInvalid, - match="Timestamps without a time zone " - "cannot be reliably formatted"): - pc.strftime(tsa, options=pc.StrftimeOptions(fmt)) + # Test timestamps without timezone + fmt = "%Y-%m-%dT%H:%M:%S" + ts = pd.to_datetime(times) + tsa = pa.array(ts, type=pa.timestamp("s")) + result = pc.strftime(tsa, options=pc.StrftimeOptions(fmt)) + expected = pa.array(_fix_timestamp(ts.strftime(fmt))) + + assert result.equals(expected) + with pytest.raises(pa.ArrowInvalid, + match="Timezone not present, cannot print:"): + pc.strftime(tsa, options=pc.StrftimeOptions(fmt + "%Z")) + with pytest.raises(pa.ArrowInvalid, + match="Timezone not present, cannot print:"): + pc.strftime(tsa, options=pc.StrftimeOptions(fmt + "%z")) def _check_datetime_components(timestamps, timezone=None): From 644470ef15fa67be86f024168dbcf2aae95466cc Mon Sep 17 00:00:00 2001 From: Rok Date: Fri, 27 Aug 2021 01:15:05 +0200 Subject: [PATCH 3/7] Review feedback. --- .../arrow/compute/kernels/scalar_temporal.cc | 11 +++++---- .../compute/kernels/scalar_temporal_test.cc | 6 +++-- python/pyarrow/tests/test_compute.py | 24 +++++++++++++------ 3 files changed, 27 insertions(+), 14 deletions(-) diff --git a/cpp/src/arrow/compute/kernels/scalar_temporal.cc b/cpp/src/arrow/compute/kernels/scalar_temporal.cc index 7c853e53fb6..a84a561cff5 100644 --- a/cpp/src/arrow/compute/kernels/scalar_temporal.cc +++ b/cpp/src/arrow/compute/kernels/scalar_temporal.cc @@ -480,7 +480,8 @@ struct Strftime { if (timezone.empty()) { if ((options.format.find("%z") != std::string::npos) || (options.format.find("%Z") != std::string::npos)) { - return Status::Invalid("Timezone not present, cannot print: ", options.format); + return Status::Invalid("Timezone not present, cannot convert to string: ", + options.format); } timezone = "UTC"; } @@ -840,10 +841,10 @@ const FunctionDoc strftime_doc{ "Format timestamps according to a format string", ("For each input timestamp, emit a formatted string.\n" "The time format string and locale can be set using StrftimeOptions.\n" - "Output precision of seconds %S flag depends on the input timestamp precision. " - "Timestamps with second precision are represented as integers while microsecond " - "precision timestamps are represented as a floating point number with 6 decimal " - "points." + "Output precision of %S (seconds) flag depends on the input timestamp precision. " + "Timestamps with second precision are represented as integers while milliseconds, " + "microsecond and nanoseconds are represented as fixed floating point numbers with " + "3, 6 and 9 decimal points respectively." "An error is returned if the timestamps don't have a defined timezone,\n" "or if the timezone cannot be found in the timezone database."), {"timestamps"}, diff --git a/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc b/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc index ef9f97130d7..32e46ae5818 100644 --- a/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc +++ b/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc @@ -421,10 +421,12 @@ TEST_F(ScalarTemporalTest, StrftimeNoTimezone) { CheckScalarUnary("strftime", timestamp(TimeUnit::SECOND), seconds, utf8(), seconds, &options_default); EXPECT_RAISES_WITH_MESSAGE_THAT( - Invalid, testing::HasSubstr("Invalid: Timezone not present, cannot print"), + Invalid, + testing::HasSubstr("Invalid: Timezone not present, cannot convert to string"), Strftime(arr, StrftimeOptions("%Y-%m-%dT%H:%M:%S%z"))); EXPECT_RAISES_WITH_MESSAGE_THAT( - Invalid, testing::HasSubstr("Invalid: Timezone not present, cannot print"), + Invalid, + testing::HasSubstr("Invalid: Timezone not present, cannot convert to string"), Strftime(arr, StrftimeOptions("%Y-%m-%dT%H:%M:%S%Z"))); } diff --git a/python/pyarrow/tests/test_compute.py b/python/pyarrow/tests/test_compute.py index c0b35ba66c6..9791062d403 100644 --- a/python/pyarrow/tests/test_compute.py +++ b/python/pyarrow/tests/test_compute.py @@ -1496,10 +1496,18 @@ def _fix_timestamp(s): expected = pa.array(_fix_timestamp(ts.strftime(fmt))) assert result.equals(expected) + fmt = "%Y-%m-%dT%H:%M:%S" + # Default format tsa = pa.array(ts, type=pa.timestamp("s", timezone)) result = pc.strftime(tsa, options=pc.StrftimeOptions()) - expected = pa.array(_fix_timestamp(ts.strftime("%Y-%m-%dT%H:%M:%S"))) + expected = pa.array(_fix_timestamp(ts.strftime(fmt))) + assert result.equals(expected) + + # Default format plus timezone + tsa = pa.array(ts, type=pa.timestamp("s", timezone)) + result = pc.strftime(tsa, options=pc.StrftimeOptions(fmt + "%Z")) + expected = pa.array(_fix_timestamp(ts.strftime(fmt + "%Z"))) assert result.equals(expected) # Pandas %S is equivalent to %S in arrow for unit="s" @@ -1518,9 +1526,9 @@ def _fix_timestamp(s): # Test setting locale tsa = pa.array(ts, type=pa.timestamp("s", timezone)) - options = pc.StrftimeOptions("%Y-%m-%dT%H:%M:%S", "C") + options = pc.StrftimeOptions(fmt, "C") result = pc.strftime(tsa, options=options) - expected = pa.array(_fix_timestamp(ts.strftime("%Y-%m-%dT%H:%M:%S"))) + expected = pa.array(_fix_timestamp(ts.strftime(fmt))) assert result.equals(expected) # Test timestamps without timezone @@ -1531,11 +1539,13 @@ def _fix_timestamp(s): expected = pa.array(_fix_timestamp(ts.strftime(fmt))) assert result.equals(expected) - with pytest.raises(pa.ArrowInvalid, - match="Timezone not present, cannot print:"): + with pytest.raises( + pa.ArrowInvalid, + match="Timezone not present, cannot convert to string:"): pc.strftime(tsa, options=pc.StrftimeOptions(fmt + "%Z")) - with pytest.raises(pa.ArrowInvalid, - match="Timezone not present, cannot print:"): + with pytest.raises( + pa.ArrowInvalid, + match="Timezone not present, cannot convert to string:"): pc.strftime(tsa, options=pc.StrftimeOptions(fmt + "%z")) From 1b4a82c2630430a932e9d890c0092bd08b255848 Mon Sep 17 00:00:00 2001 From: Rok Mihevc Date: Mon, 30 Aug 2021 18:47:33 +0200 Subject: [PATCH 4/7] Apply suggestions from code review --- cpp/src/arrow/compute/kernels/scalar_temporal.cc | 6 +++--- docs/source/cpp/compute.rst | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/cpp/src/arrow/compute/kernels/scalar_temporal.cc b/cpp/src/arrow/compute/kernels/scalar_temporal.cc index a84a561cff5..06b137726c9 100644 --- a/cpp/src/arrow/compute/kernels/scalar_temporal.cc +++ b/cpp/src/arrow/compute/kernels/scalar_temporal.cc @@ -840,12 +840,12 @@ const FunctionDoc subsecond_doc{ const FunctionDoc strftime_doc{ "Format timestamps according to a format string", ("For each input timestamp, emit a formatted string.\n" - "The time format string and locale can be set using StrftimeOptions.\n" + "The time format string and locale can be set using StrftimeOptions. " "Output precision of %S (seconds) flag depends on the input timestamp precision. " "Timestamps with second precision are represented as integers while milliseconds, " "microsecond and nanoseconds are represented as fixed floating point numbers with " - "3, 6 and 9 decimal points respectively." - "An error is returned if the timestamps don't have a defined timezone,\n" + "3, 6 and 9 decimal places respectively.\n" + "An error is returned if the timestamps don't have a defined timezone," "or if the timezone cannot be found in the timezone database."), {"timestamps"}, "StrftimeOptions"}; diff --git a/docs/source/cpp/compute.rst b/docs/source/cpp/compute.rst index ece9fb8860f..4f2507cb467 100644 --- a/docs/source/cpp/compute.rst +++ b/docs/source/cpp/compute.rst @@ -1122,7 +1122,7 @@ null input value is converted into a null output value. precision. If the number of seconds can not be exactly represented with seconds, then the format is a decimal floating point number with a fixed format and a precision matching that of the precision of the input. Note precision increases - three decimal points points per step going from seconds to nanoseconds. + three decimal places per step going from seconds to nanoseconds. The character for the decimal point is localized according to the locale. See `detailed formatting documentation`_ for descriptions of other flags. From b68fe04349af81c03dd38b5b4e89340aa5670360 Mon Sep 17 00:00:00 2001 From: Rok Mihevc Date: Tue, 31 Aug 2021 16:09:24 +0200 Subject: [PATCH 5/7] Update cpp/src/arrow/compute/kernels/scalar_temporal.cc Co-authored-by: Joris Van den Bossche --- cpp/src/arrow/compute/kernels/scalar_temporal.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/arrow/compute/kernels/scalar_temporal.cc b/cpp/src/arrow/compute/kernels/scalar_temporal.cc index 06b137726c9..f315f204cbd 100644 --- a/cpp/src/arrow/compute/kernels/scalar_temporal.cc +++ b/cpp/src/arrow/compute/kernels/scalar_temporal.cc @@ -480,7 +480,7 @@ struct Strftime { if (timezone.empty()) { if ((options.format.find("%z") != std::string::npos) || (options.format.find("%Z") != std::string::npos)) { - return Status::Invalid("Timezone not present, cannot convert to string: ", + return Status::Invalid("Timezone not present, cannot convert to string with timezone: ", options.format); } timezone = "UTC"; From a3cb389cb849b0149fc33b48d7358c5a43ade5b7 Mon Sep 17 00:00:00 2001 From: Rok Date: Tue, 31 Aug 2021 16:33:50 +0200 Subject: [PATCH 6/7] Review feedback. --- .../arrow/compute/kernels/scalar_temporal.cc | 43 ++++++++++--------- docs/source/cpp/compute.rst | 10 ++--- python/pyarrow/tests/test_compute.py | 4 +- 3 files changed, 29 insertions(+), 28 deletions(-) diff --git a/cpp/src/arrow/compute/kernels/scalar_temporal.cc b/cpp/src/arrow/compute/kernels/scalar_temporal.cc index f315f204cbd..2e0e8fc848a 100644 --- a/cpp/src/arrow/compute/kernels/scalar_temporal.cc +++ b/cpp/src/arrow/compute/kernels/scalar_temporal.cc @@ -480,8 +480,9 @@ struct Strftime { if (timezone.empty()) { if ((options.format.find("%z") != std::string::npos) || (options.format.find("%Z") != std::string::npos)) { - return Status::Invalid("Timezone not present, cannot convert to string with timezone: ", - options.format); + return Status::Invalid( + "Timezone not present, cannot convert to string with timezone: ", + options.format); } timezone = "UTC"; } @@ -742,18 +743,18 @@ std::shared_ptr MakeSimpleUnaryTemporal( const FunctionDoc year_doc{ "Extract year from timestamp", - "Returns an error if timestamp has a defined timezone. Null values return null.", + "Returns an error if timezone is specified but not found. Null values return null.", {"values"}}; const FunctionDoc month_doc{ "Extract month number", ("Month is encoded as January=1, December=12.\n" - "Returns an error if timestamp has a defined timezone. Null values return null."), + "Returns an error if timezone is specified but not found. Null values return null."), {"values"}}; const FunctionDoc day_doc{ "Extract day number", - "Returns an error if timestamp has a defined timezone. Null values return null.", + "Returns an error if timezone is specified but not found. Null values return null.", {"values"}}; const FunctionDoc day_of_week_doc{ @@ -763,78 +764,78 @@ const FunctionDoc day_of_week_doc{ "DayOfWeekOptions.week_start can be used to set another starting day using ISO " "convention (Monday=1, Sunday=7). Day numbering can start with 0 or 1 using " "DayOfWeekOptions.one_based_numbering parameter.\n" - "Returns an error if timestamp has a defined timezone. Null values return null."), + "Returns an error if timezone is specified but not found. Null values return null."), {"values"}, "DayOfWeekOptions"}; const FunctionDoc day_of_year_doc{ "Extract number of day of year", ("January 1st maps to day number 1, February 1st to 32, etc.\n" - "Returns an error if timestamp has a defined timezone. Null values return null."), + "Returns an error if timezone is specified but not found. Null values return null."), {"values"}}; const FunctionDoc iso_year_doc{ "Extract ISO year number", ("First week of an ISO year has the majority (4 or more) of its days in January." - "Returns an error if timestamp has a defined timezone. Null values return null."), + "Returns an error if timezone is specified but not found. Null values return null."), {"values"}}; const FunctionDoc iso_week_doc{ "Extract ISO week of year number", ("First ISO week has the majority (4 or more) of its days in January.\n" "Week of the year starts with 1 and can run up to 53.\n" - "Returns an error if timestamp has a defined timezone. Null values return null."), + "Returns an error if timezone is specified but not found. Null values return null."), {"values"}}; const FunctionDoc iso_calendar_doc{ "Extract (ISO year, ISO week, ISO day of week) struct", ("ISO week starts on Monday denoted by 1 and ends on Sunday denoted by 7.\n" - "Returns an error if timestamp has a defined timezone. Null values return null."), + "Returns an error if timezone is specified but not found. Null values return null."), {"values"}}; const FunctionDoc quarter_doc{ "Extract quarter of year number", ("First quarter maps to 1 and forth quarter maps to 4.\n" - "Returns an error if timestamp has a defined timezone. Null values return null."), + "Returns an error if timezone is specified but not found. Null values return null."), {"values"}}; const FunctionDoc hour_doc{ "Extract hour value", - "Returns an error if timestamp has a defined timezone. Null values return null.", + "Returns an error if timezone is specified but not found. Null values return null.", {"values"}}; const FunctionDoc minute_doc{ "Extract minute values", - "Returns an error if timestamp has a defined timezone. Null values return null.", + "Returns an error if timezone is specified but not found. Null values return null.", {"values"}}; const FunctionDoc second_doc{ "Extract second values", - "Returns an error if timestamp has a defined timezone. Null values return null.", + "Returns an error if timezone is specified but not found. Null values return null.", {"values"}}; const FunctionDoc millisecond_doc{ "Extract millisecond values", ("Millisecond returns number of milliseconds since the last full second.\n" - "Returns an error if timestamp has a defined timezone. Null values return null."), + "Returns an error if timezone is specified but not found. Null values return null."), {"values"}}; const FunctionDoc microsecond_doc{ "Extract microsecond values", ("Millisecond returns number of microseconds since the last full millisecond.\n" - "Returns an error if timestamp has a defined timezone. Null values return null."), + "Returns an error if timezone is specified but not found. Null values return null."), {"values"}}; const FunctionDoc nanosecond_doc{ "Extract nanosecond values", ("Nanosecond returns number of nanoseconds since the last full microsecond.\n" - "Returns an error if timestamp has a defined timezone. Null values return null."), + "Returns an error if timezone is specified but not found. Null values return null."), {"values"}}; const FunctionDoc subsecond_doc{ "Extract subsecond values", ("Subsecond returns the fraction of a second since the last full second.\n" - "Returns an error if timestamp has a defined timezone. Null values return null."), + "Returns an error if timezone is specified but not found. Null values return null."), {"values"}}; const FunctionDoc strftime_doc{ @@ -844,9 +845,9 @@ const FunctionDoc strftime_doc{ "Output precision of %S (seconds) flag depends on the input timestamp precision. " "Timestamps with second precision are represented as integers while milliseconds, " "microsecond and nanoseconds are represented as fixed floating point numbers with " - "3, 6 and 9 decimal places respectively.\n" - "An error is returned if the timestamps don't have a defined timezone," - "or if the timezone cannot be found in the timezone database."), + "3, 6 and 9 decimal places respectively. To obtain integer seconds, cast to " + "timestamp with second resolution.\n" + "Returns an error if timezone or locale are not found. Null values return null."), {"timestamps"}, "StrftimeOptions"}; diff --git a/docs/source/cpp/compute.rst b/docs/source/cpp/compute.rst index 4f2507cb467..f4ef440f100 100644 --- a/docs/source/cpp/compute.rst +++ b/docs/source/cpp/compute.rst @@ -1118,11 +1118,11 @@ provided by a concrete function :func:`~arrow::compute::Cast`. The conversions available with ``cast`` are listed below. In all cases, a null input value is converted into a null output value. -* \(1) Output precision of seconds (``%S``) flag depends on the input timestamp - precision. If the number of seconds can not be exactly represented with seconds, - then the format is a decimal floating point number with a fixed format and a - precision matching that of the precision of the input. Note precision increases - three decimal places per step going from seconds to nanoseconds. +* \(1) Output precision of ``%S`` (seconds) flag depends on the input timestamp + precision. Timestamps with second precision are represented as integers while + milliseconds, microsecond and nanoseconds are represented as fixed floating + point numbers with 3, 6 and 9 decimal places respectively. To obtain integer + seconds, cast to timestamp with second resolution. The character for the decimal point is localized according to the locale. See `detailed formatting documentation`_ for descriptions of other flags. diff --git a/python/pyarrow/tests/test_compute.py b/python/pyarrow/tests/test_compute.py index 9791062d403..334a2a7bda3 100644 --- a/python/pyarrow/tests/test_compute.py +++ b/python/pyarrow/tests/test_compute.py @@ -1541,11 +1541,11 @@ def _fix_timestamp(s): assert result.equals(expected) with pytest.raises( pa.ArrowInvalid, - match="Timezone not present, cannot convert to string:"): + match="Timezone not present, cannot convert to string"): pc.strftime(tsa, options=pc.StrftimeOptions(fmt + "%Z")) with pytest.raises( pa.ArrowInvalid, - match="Timezone not present, cannot convert to string:"): + match="Timezone not present, cannot convert to string"): pc.strftime(tsa, options=pc.StrftimeOptions(fmt + "%z")) From 0426860492da36ad85b34ccd8dbd458947e948e1 Mon Sep 17 00:00:00 2001 From: Antoine Pitrou Date: Mon, 6 Sep 2021 17:28:45 +0200 Subject: [PATCH 7/7] Add explicit line wraps, rephrase some function docs --- .../arrow/compute/kernels/scalar_temporal.cc | 88 +++++++++++++------ 1 file changed, 61 insertions(+), 27 deletions(-) diff --git a/cpp/src/arrow/compute/kernels/scalar_temporal.cc b/cpp/src/arrow/compute/kernels/scalar_temporal.cc index 2e0e8fc848a..d70411f8338 100644 --- a/cpp/src/arrow/compute/kernels/scalar_temporal.cc +++ b/cpp/src/arrow/compute/kernels/scalar_temporal.cc @@ -743,111 +743,145 @@ std::shared_ptr MakeSimpleUnaryTemporal( const FunctionDoc year_doc{ "Extract year from timestamp", - "Returns an error if timezone is specified but not found. Null values return null.", + ("Null values emit null.\n" + "An error is returned if the timestamps have a defined timezone but it\n" + "cannot be found in the timezone database."), {"values"}}; const FunctionDoc month_doc{ "Extract month number", ("Month is encoded as January=1, December=12.\n" - "Returns an error if timezone is specified but not found. Null values return null."), + "Null values emit null.\n" + "An error is returned if the timestamps have a defined timezone but it\n" + "cannot be found in the timezone database."), {"values"}}; const FunctionDoc day_doc{ "Extract day number", - "Returns an error if timezone is specified but not found. Null values return null.", + ("Null values emit null.\n" + "An error is returned if the timestamps have a defined timezone but it\n" + "cannot be found in the timezone database."), {"values"}}; const FunctionDoc day_of_week_doc{ "Extract day of the week number", - ("By default, the week starts on Monday represented by 0 and ends on Sunday " + ("By default, the week starts on Monday represented by 0 and ends on Sunday\n" "represented by 6.\n" - "DayOfWeekOptions.week_start can be used to set another starting day using ISO " - "convention (Monday=1, Sunday=7). Day numbering can start with 0 or 1 using " - "DayOfWeekOptions.one_based_numbering parameter.\n" - "Returns an error if timezone is specified but not found. Null values return null."), + "`DayOfWeekOptions.week_start` can be used to set another starting day using\n" + "the ISO numbering convention (1=start week on Monday, 7=start week on Sunday).\n" + "Day numbers can start at 0 or 1 based on `DayOfWeekOptions.one_based_numbering`.\n" + "Null values emit null.\n" + "An error is returned if the timestamps have a defined timezone but it\n" + "cannot be found in the timezone database."), {"values"}, "DayOfWeekOptions"}; const FunctionDoc day_of_year_doc{ "Extract number of day of year", ("January 1st maps to day number 1, February 1st to 32, etc.\n" - "Returns an error if timezone is specified but not found. Null values return null."), + "Null values emit null.\n" + "An error is returned if the timestamps have a defined timezone but it\n" + "cannot be found in the timezone database."), {"values"}}; const FunctionDoc iso_year_doc{ "Extract ISO year number", ("First week of an ISO year has the majority (4 or more) of its days in January." - "Returns an error if timezone is specified but not found. Null values return null."), + "Null values emit null.\n" + "An error is returned if the timestamps have a defined timezone but it\n" + "cannot be found in the timezone database."), {"values"}}; const FunctionDoc iso_week_doc{ "Extract ISO week of year number", ("First ISO week has the majority (4 or more) of its days in January.\n" "Week of the year starts with 1 and can run up to 53.\n" - "Returns an error if timezone is specified but not found. Null values return null."), + "Null values emit null.\n" + "An error is returned if the timestamps have a defined timezone but it\n" + "cannot be found in the timezone database."), {"values"}}; const FunctionDoc iso_calendar_doc{ "Extract (ISO year, ISO week, ISO day of week) struct", ("ISO week starts on Monday denoted by 1 and ends on Sunday denoted by 7.\n" - "Returns an error if timezone is specified but not found. Null values return null."), + "Null values emit null.\n" + "An error is returned if the timestamps have a defined timezone but it\n" + "cannot be found in the timezone database."), {"values"}}; const FunctionDoc quarter_doc{ "Extract quarter of year number", ("First quarter maps to 1 and forth quarter maps to 4.\n" - "Returns an error if timezone is specified but not found. Null values return null."), + "Null values emit null.\n" + "An error is returned if the timestamps have a defined timezone but it\n" + "cannot be found in the timezone database."), {"values"}}; const FunctionDoc hour_doc{ "Extract hour value", - "Returns an error if timezone is specified but not found. Null values return null.", + ("Null values emit null.\n" + "An error is returned if the timestamps have a defined timezone but it\n" + "cannot be found in the timezone database."), {"values"}}; const FunctionDoc minute_doc{ "Extract minute values", - "Returns an error if timezone is specified but not found. Null values return null.", + ("Null values emit null.\n" + "An error is returned if the timestamps have a defined timezone but it\n" + "cannot be found in the timezone database."), {"values"}}; const FunctionDoc second_doc{ "Extract second values", - "Returns an error if timezone is specified but not found. Null values return null.", + ("Null values emit null.\n" + "An error is returned if the timestamps have a defined timezone but it\n" + "cannot be found in the timezone database."), {"values"}}; const FunctionDoc millisecond_doc{ "Extract millisecond values", ("Millisecond returns number of milliseconds since the last full second.\n" - "Returns an error if timezone is specified but not found. Null values return null."), + "Null values emit null.\n" + "An error is returned if the timestamps have a defined timezone but it\n" + "cannot be found in the timezone database."), {"values"}}; const FunctionDoc microsecond_doc{ "Extract microsecond values", ("Millisecond returns number of microseconds since the last full millisecond.\n" - "Returns an error if timezone is specified but not found. Null values return null."), + "Null values emit null.\n" + "An error is returned if the timestamps have a defined timezone but it\n" + "cannot be found in the timezone database."), {"values"}}; const FunctionDoc nanosecond_doc{ "Extract nanosecond values", ("Nanosecond returns number of nanoseconds since the last full microsecond.\n" - "Returns an error if timezone is specified but not found. Null values return null."), + "Null values emit null.\n" + "An error is returned if the timestamps have a defined timezone but it\n" + "cannot be found in the timezone database."), {"values"}}; const FunctionDoc subsecond_doc{ "Extract subsecond values", ("Subsecond returns the fraction of a second since the last full second.\n" - "Returns an error if timezone is specified but not found. Null values return null."), + "Null values emit null.\n" + "An error is returned if the timestamps have a defined timezone but it\n" + "cannot be found in the timezone database."), {"values"}}; const FunctionDoc strftime_doc{ "Format timestamps according to a format string", ("For each input timestamp, emit a formatted string.\n" - "The time format string and locale can be set using StrftimeOptions. " - "Output precision of %S (seconds) flag depends on the input timestamp precision. " - "Timestamps with second precision are represented as integers while milliseconds, " - "microsecond and nanoseconds are represented as fixed floating point numbers with " - "3, 6 and 9 decimal places respectively. To obtain integer seconds, cast to " - "timestamp with second resolution.\n" - "Returns an error if timezone or locale are not found. Null values return null."), + "The time format string and locale can be set using StrftimeOptions.\n" + "The output precision of the \"%S\" (seconds) format code depends on\n" + "the input timestamp precision: it is an integer for timestamps with\n" + "second precision, a real number with the required number of fractional\n" + "digits for higher precisions.\n" + "Null values emit null.\n" + "An error is returned if the timestamps have a defined timezone but it\n" + "cannot be found in the timezone database, or if the specified locale\n" + "does not exist on this system."), {"timestamps"}, "StrftimeOptions"};