From 4b6884b4b53ac7b8beac6d025916ed6b11d7b55c Mon Sep 17 00:00:00 2001 From: Martin Hilton Date: Thu, 31 Oct 2024 16:27:10 +0000 Subject: [PATCH 1/2] fix: date_bin() on timstamps before 1970 The date_bin() function was not working correctly for timestamps before 1970. Specifically if the input timestamp was the exact time of the start of a bin then it would be placed in the previous bin. The % operator has a negative result when the dividend is negative. This causes the date_bin calculation to round up to the next bin. To compensate the size of 1 interval is subtracted from the result if the input is negative. This subtraction is no longer performed if the input is already the exact time of the start of a bin. --- datafusion/functions/src/datetime/date_bin.rs | 30 ++++++++++++++++++- .../sqllogictest/test_files/timestamps.slt | 17 +++++++++++ 2 files changed, 46 insertions(+), 1 deletion(-) diff --git a/datafusion/functions/src/datetime/date_bin.rs b/datafusion/functions/src/datetime/date_bin.rs index e335c4e097f78..f6106b0d3d020 100644 --- a/datafusion/functions/src/datetime/date_bin.rs +++ b/datafusion/functions/src/datetime/date_bin.rs @@ -240,7 +240,7 @@ fn date_bin_nanos_interval(stride_nanos: i64, source: i64, origin: i64) -> i64 { fn compute_distance(time_diff: i64, stride: i64) -> i64 { let time_delta = time_diff - (time_diff % stride); - if time_diff < 0 && stride > 1 { + if time_diff < 0 && stride > 1 && time_delta != time_diff { // The origin is later than the source timestamp, round down to the previous bin time_delta - stride } else { @@ -864,4 +864,32 @@ mod tests { assert_eq!(result, expected1, "{source} = {expected}"); }) } + + #[test] + fn test_date_bin_before_epoch() { + let cases = vec![ + ( + (TimeDelta::try_minutes(15), "1969-12-31T23:44:59.999999999"), + "1969-12-31T23:30:00", + ), + ( + (TimeDelta::try_minutes(15), "1969-12-31T23:45:00"), + "1969-12-31T23:45:00", + ), + ( + (TimeDelta::try_minutes(15), "1969-12-31T23:45:00.000000001"), + "1969-12-31T23:45:00", + ), + ]; + + cases.iter().for_each(|((stride, source), expected)| { + let stride = stride.unwrap(); + let stride1 = stride.num_nanoseconds().unwrap(); + let source1 = string_to_timestamp_nanos(source).unwrap(); + + let expected1 = string_to_timestamp_nanos(expected).unwrap(); + let result = date_bin_nanos_interval(stride1, source1, 0); + assert_eq!(result, expected1, "{source} = {expected}"); + }) + } } diff --git a/datafusion/sqllogictest/test_files/timestamps.slt b/datafusion/sqllogictest/test_files/timestamps.slt index 38c2a66472731..a09a63a791fce 100644 --- a/datafusion/sqllogictest/test_files/timestamps.slt +++ b/datafusion/sqllogictest/test_files/timestamps.slt @@ -980,6 +980,23 @@ SELECT DATE_BIN('3 years 1 months', '2022-09-01 00:00:00Z'); ---- 2022-06-01T00:00:00 +# Times before the unix epoch +query P +select date_bin('1 hour', column1) +from (values + (timestamp '1969-01-01 00:00:00'), + (timestamp '1969-01-01 00:15:00'), + (timestamp '1969-01-01 00:30:00'), + (timestamp '1969-01-01 00:45:00'), + (timestamp '1969-01-01 01:00:00') +) as sq +---- +1969-01-01T00:00:00 +1969-01-01T00:00:00 +1969-01-01T00:00:00 +1969-01-01T00:00:00 +1969-01-01T01:00:00 + ### ## test date_trunc function ### From 78853100d60c14b3cf70093198c43239e9710306 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Thu, 31 Oct 2024 15:16:21 -0400 Subject: [PATCH 2/2] fix clippy --- datafusion/functions/src/datetime/date_bin.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/functions/src/datetime/date_bin.rs b/datafusion/functions/src/datetime/date_bin.rs index f6106b0d3d020..e8d065df86333 100644 --- a/datafusion/functions/src/datetime/date_bin.rs +++ b/datafusion/functions/src/datetime/date_bin.rs @@ -867,7 +867,7 @@ mod tests { #[test] fn test_date_bin_before_epoch() { - let cases = vec![ + let cases = [ ( (TimeDelta::try_minutes(15), "1969-12-31T23:44:59.999999999"), "1969-12-31T23:30:00",