From d4d57716729f4b15ae437bf6c9e9447bb69eaa9d Mon Sep 17 00:00:00 2001 From: alamb Date: Thu, 10 Sep 2020 07:19:13 -0400 Subject: [PATCH] ARROW-9961: [Rust][DataFusion] Make to_timestamp function parses timestamp without timezone offset as local --- .../src/physical_plan/datetime_expressions.rs | 50 ++++++++++++++++--- 1 file changed, 44 insertions(+), 6 deletions(-) diff --git a/rust/datafusion/src/physical_plan/datetime_expressions.rs b/rust/datafusion/src/physical_plan/datetime_expressions.rs index 998cddc6b9db..a031f16e434a 100644 --- a/rust/datafusion/src/physical_plan/datetime_expressions.rs +++ b/rust/datafusion/src/physical_plan/datetime_expressions.rs @@ -25,7 +25,7 @@ use arrow::{ buffer::Buffer, datatypes::{DataType, TimeUnit, ToByteSlice}, }; -use chrono::prelude::*; +use chrono::{prelude::*, LocalResult}; #[inline] /// Accepts a string in RFC3339 / ISO8601 standard format and some @@ -107,13 +107,13 @@ fn string_to_timestamp_nanos(s: &str) -> Result { // without a timezone specifier as a local time, using T as a separator // Example: 2020-09-08T13:42:29.190855 if let Ok(ts) = NaiveDateTime::parse_from_str(s, "%Y-%m-%dT%H:%M:%S.%f") { - return Ok(ts.timestamp_nanos()); + return naive_datetime_to_timestamp(s, ts); } // without a timezone specifier as a local time, using ' ' as a separator // Example: 2020-09-08 13:42:29.190855 if let Ok(ts) = NaiveDateTime::parse_from_str(s, "%Y-%m-%d %H:%M:%S.%f") { - return Ok(ts.timestamp_nanos()); + return naive_datetime_to_timestamp(s, ts); } // Note we don't pass along the error message from the underlying @@ -127,6 +127,30 @@ fn string_to_timestamp_nanos(s: &str) -> Result { ))) } +/// Converts the naive datetime (which has no specific timezone) to a +/// nanosecond epoch timestamp relative to UTC. +fn naive_datetime_to_timestamp(s: &str, datetime: NaiveDateTime) -> Result { + let l = Local {}; + + match l.from_local_datetime(&datetime) { + LocalResult::None => Err(ExecutionError::General(format!( + "Error parsing '{}' as timestamp: local time representation is invalid", + s + ))), + LocalResult::Single(local_datetime) => { + Ok(local_datetime.with_timezone(&Utc).timestamp_nanos()) + } + // Ambiguous times can happen if the timestamp is exactly when + // a daylight savings time transition occurs, for example, and + // so the datetime could validly be said to be in two + // potential offsets. However, since we are about to convert + // to UTC anyways, we can pick one arbitrarily + LocalResult::Ambiguous(local_datetime, _) => { + Ok(local_datetime.with_timezone(&Utc).timestamp_nanos()) + } + } +} + /// convert an array of strings into `Timestamp(Nanosecond, None)` pub fn to_timestamp(args: &[ArrayRef]) -> Result { let num_rows = args[0].len(); @@ -220,11 +244,25 @@ mod tests { #[test] fn string_to_timestamp_no_timezone() -> Result<()> { - let expected_date_time = NaiveDateTime::new( + // This test is designed to succeed in regardless of the local + // timezone the test machine is running. Thus it is still + // somewhat suceptable to bugs in the use of chrono + let naive_date_time = NaiveDateTime::new( NaiveDate::from_ymd(2020, 09, 08), NaiveTime::from_hms_nano(13, 42, 29, 190855), - ) - .timestamp_nanos(); + ); + + // Note: Use chrono APIs that are different than + // naive_datetime_to_timestamp to compute the utc offset to + // try and double check the logic + let utc_offset_secs = match Local.offset_from_local_datetime(&naive_date_time) { + LocalResult::Single(local_offset) => { + local_offset.fix().local_minus_utc() as i64 + } + _ => panic!("Unexpected failure converting to local datetime"), + }; + let utc_offset_nanos = utc_offset_secs * 1_000_000_000; + let expected_date_time = naive_date_time.timestamp_nanos() - utc_offset_nanos; // Ensure both T and ' ' variants work assert_eq!(