Skip to content
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 44 additions & 6 deletions rust/datafusion/src/physical_plan/datetime_expressions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ use arrow::{
buffer::Buffer,
datatypes::{DataType, TimeUnit, ToByteSlice},
};
use chrono::prelude::*;
use chrono::{prelude::*, LocalResult};

#[inline]
/// Accepts a string in RFC3339 / ISO8601 standard format and some
Expand Down Expand Up @@ -107,13 +107,13 @@ fn string_to_timestamp_nanos(s: &str) -> Result<i64> {
// without a timezone specifier as a local time, using T as a separator
// Example: 2020-09-08T13:42:29.190855
if let Ok(ts) = NaiveDateTime::parse_from_str(s, "%Y-%m-%dT%H:%M:%S.%f") {
return Ok(ts.timestamp_nanos());
return naive_datetime_to_timestamp(s, ts);
}

// without a timezone specifier as a local time, using ' ' as a separator
// Example: 2020-09-08 13:42:29.190855
if let Ok(ts) = NaiveDateTime::parse_from_str(s, "%Y-%m-%d %H:%M:%S.%f") {
return Ok(ts.timestamp_nanos());
return naive_datetime_to_timestamp(s, ts);
}

// Note we don't pass along the error message from the underlying
Expand All @@ -127,6 +127,30 @@ fn string_to_timestamp_nanos(s: &str) -> Result<i64> {
)))
}

/// Converts the naive datetime (which has no specific timezone) to a
/// nanosecond epoch timestamp relative to UTC.
fn naive_datetime_to_timestamp(s: &str, datetime: NaiveDateTime) -> Result<i64> {
let l = Local {};

match l.from_local_datetime(&datetime) {
LocalResult::None => Err(ExecutionError::General(format!(
"Error parsing '{}' as timestamp: local time representation is invalid",
s
))),
LocalResult::Single(local_datetime) => {
Ok(local_datetime.with_timezone(&Utc).timestamp_nanos())
}
// Ambiguous times can happen if the timestamp is exactly when
// a daylight savings time transition occurs, for example, and
// so the datetime could validly be said to be in two
// potential offsets. However, since we are about to convert
// to UTC anyways, we can pick one arbitrarily
LocalResult::Ambiguous(local_datetime, _) => {
Ok(local_datetime.with_timezone(&Utc).timestamp_nanos())
}
}
}

/// convert an array of strings into `Timestamp(Nanosecond, None)`
pub fn to_timestamp(args: &[ArrayRef]) -> Result<TimestampNanosecondArray> {
let num_rows = args[0].len();
Expand Down Expand Up @@ -220,11 +244,25 @@ mod tests {

#[test]
fn string_to_timestamp_no_timezone() -> Result<()> {
let expected_date_time = NaiveDateTime::new(
// This test is designed to succeed in regardless of the local
// timezone the test machine is running. Thus it is still
// somewhat suceptable to bugs in the use of chrono
let naive_date_time = NaiveDateTime::new(
NaiveDate::from_ymd(2020, 09, 08),
NaiveTime::from_hms_nano(13, 42, 29, 190855),
)
.timestamp_nanos();
);

// Note: Use chrono APIs that are different than
// naive_datetime_to_timestamp to compute the utc offset to
// try and double check the logic
let utc_offset_secs = match Local.offset_from_local_datetime(&naive_date_time) {
LocalResult::Single(local_offset) => {
local_offset.fix().local_minus_utc() as i64
}
_ => panic!("Unexpected failure converting to local datetime"),
};
let utc_offset_nanos = utc_offset_secs * 1_000_000_000;
let expected_date_time = naive_date_time.timestamp_nanos() - utc_offset_nanos;

// Ensure both T and ' ' variants work
assert_eq!(
Expand Down