From e8ccb90247a43384997df2ea26ada2a3bcf1dd07 Mon Sep 17 00:00:00 2001 From: Gian Merlino Date: Mon, 23 Oct 2017 16:53:18 -0700 Subject: [PATCH 1/2] SQL: Improved behavior when implicitly casting strings to date/time literals. - Handle all flavors of ISO8601 and SQL literals. - Throw errors on other literals instead of silently transforming them to 0. --- docs/content/misc/math-expr.md | 2 +- .../expression/TimestampParseExprMacro.java | 36 +++++++++++++++- .../druid/query/expression/ExprMacroTest.java | 7 +++- .../builtin/CastOperatorConversion.java | 4 +- .../sql/calcite/planner/DruidRexExecutor.java | 9 ++++ .../druid/sql/calcite/CalciteQueryTest.java | 42 +++++++++++++++++-- .../calcite/expression/ExpressionsTest.java | 4 +- 7 files changed, 93 insertions(+), 11 deletions(-) diff --git a/docs/content/misc/math-expr.md b/docs/content/misc/math-expr.md index f4d0db7f9e3e..07dcc5018082 100644 --- a/docs/content/misc/math-expr.md +++ b/docs/content/misc/math-expr.md @@ -63,7 +63,7 @@ The following built-in functions are available. |timestamp_floor|timestamp_floor(expr, period, \[origin, [timezone\]\]) rounds down a timestamp, returning it as a new timestamp. Period can be any ISO8601 period, like P3M (quarters) or PT12H (half-days). The time zone, if provided, should be a time zone name like "America/Los_Angeles" or offset like "-08:00".| |timestamp_shift|timestamp_shift(expr, period, step, \[timezone\]) shifts a timestamp by a period (step times), returning it as a new timestamp. Period can be any ISO8601 period. Step may be negative. The time zone, if provided, should be a time zone name like "America/Los_Angeles" or offset like "-08:00".| |timestamp_extract|timestamp_extract(expr, unit, \[timezone\]) extracts a time part from expr, returning it as a number. Unit can be EPOCH, SECOND, MINUTE, HOUR, DAY (day of month), DOW (day of week), DOY (day of year), WEEK (week of [week year](https://en.wikipedia.org/wiki/ISO_week_date)), MONTH (1 through 12), QUARTER (1 through 4), or YEAR. The time zone, if provided, should be a time zone name like "America/Los_Angeles" or offset like "-08:00"| -|timestamp_parse|timestamp_parse(string expr, \[pattern, [timezone\]\]) parses a string into a timestamp using a given [Joda DateTimeFormat pattern](http://www.joda.org/joda-time/apidocs/org/joda/time/format/DateTimeFormat.html), or ISO8601 if the pattern is not provided. The time zone, if provided, should be a time zone name like "America/Los_Angeles" or offset like "-08:00", and will be used as the time zone for strings that do not include a time zone offset. Pattern and time zone must be literals. Strings that cannot be parsed as timestamps will be returned as nulls.| +|timestamp_parse|timestamp_parse(string expr, \[pattern, [timezone\]\]) parses a string into a timestamp using a given [Joda DateTimeFormat pattern](http://www.joda.org/joda-time/apidocs/org/joda/time/format/DateTimeFormat.html). If the pattern is not provided, this parses time strings in either ISO8601 or SQL format. The time zone, if provided, should be a time zone name like "America/Los_Angeles" or offset like "-08:00", and will be used as the time zone for strings that do not include a time zone offset. Pattern and time zone must be literals. Strings that cannot be parsed as timestamps will be returned as nulls.| |timestamp_format|timestamp_format(expr, \[pattern, \[timezone\]\]) formats a timestamp as a string with a given [Joda DateTimeFormat pattern](http://www.joda.org/joda-time/apidocs/org/joda/time/format/DateTimeFormat.html), or ISO8601 if the pattern is not provided. The time zone, if provided, should be a time zone name like "America/Los_Angeles" or offset like "-08:00". Pattern and time zone must be literals.| ## Math functions diff --git a/processing/src/main/java/io/druid/query/expression/TimestampParseExprMacro.java b/processing/src/main/java/io/druid/query/expression/TimestampParseExprMacro.java index d885d8709a95..defcb56019dd 100644 --- a/processing/src/main/java/io/druid/query/expression/TimestampParseExprMacro.java +++ b/processing/src/main/java/io/druid/query/expression/TimestampParseExprMacro.java @@ -26,6 +26,10 @@ import io.druid.math.expr.ExprMacroTable; import org.joda.time.DateTimeZone; import org.joda.time.format.DateTimeFormat; +import org.joda.time.format.DateTimeFormatter; +import org.joda.time.format.DateTimeFormatterBuilder; +import org.joda.time.format.DateTimeParser; +import org.joda.time.format.ISODateTimeFormat; import javax.annotation.Nonnull; import java.util.List; @@ -57,7 +61,7 @@ public Expr apply(final List args) final DateTimes.UtcFormatter formatter = formatString == null - ? DateTimes.ISO_DATE_OR_TIME + ? createDefaultParser() : DateTimes.wrapFormatter(DateTimeFormat.forPattern(formatString).withZone(timeZone)); class TimestampParseExpr implements Expr @@ -91,4 +95,34 @@ public void visit(final Visitor visitor) return new TimestampParseExpr(); } + + /** + * Default formatter that parses according to the docs for this method: "If the pattern is not provided, this parses + * time strings in either ISO8601 or SQL format." + */ + private static DateTimes.UtcFormatter createDefaultParser() + { + final DateTimeFormatter offsetElement = new DateTimeFormatterBuilder() + .appendTimeZoneOffset("Z", true, 2, 4) + .toFormatter(); + + DateTimeParser timeOrOffset = new DateTimeFormatterBuilder() + .append( + null, + new DateTimeParser[]{ + new DateTimeFormatterBuilder().appendLiteral('T').toParser(), + new DateTimeFormatterBuilder().appendLiteral(' ').toParser() + } + ) + .appendOptional(ISODateTimeFormat.timeElementParser().getParser()) + .appendOptional(offsetElement.getParser()) + .toParser(); + + return DateTimes.wrapFormatter( + new DateTimeFormatterBuilder() + .append(ISODateTimeFormat.dateElementParser()) + .appendOptional(timeOrOffset) + .toFormatter() + ); + } } diff --git a/server/src/test/java/io/druid/query/expression/ExprMacroTest.java b/server/src/test/java/io/druid/query/expression/ExprMacroTest.java index e7bb2397fe2c..e1d39548d0e8 100644 --- a/server/src/test/java/io/druid/query/expression/ExprMacroTest.java +++ b/server/src/test/java/io/druid/query/expression/ExprMacroTest.java @@ -120,8 +120,13 @@ public void testTimestampExtract() public void testTimestampParse() { assertExpr("timestamp_parse(tstr)", DateTimes.of("2000-02-03T04:05:06").getMillis()); - assertExpr("timestamp_parse(tstr_sql)", null); + assertExpr("timestamp_parse(tstr_sql)", DateTimes.of("2000-02-03T04:05:06").getMillis()); + assertExpr("timestamp_parse('2000-02-03')", DateTimes.of("2000-02-03").getMillis()); + assertExpr("timestamp_parse('2000-02')", DateTimes.of("2000-02-01").getMillis()); + assertExpr("timestamp_parse('')", null); + assertExpr("timestamp_parse('z2000')", null); assertExpr("timestamp_parse(tstr_sql,'yyyy-MM-dd HH:mm:ss')", DateTimes.of("2000-02-03T04:05:06").getMillis()); + assertExpr("timestamp_parse('02/03/2000','MM/dd/yyyy')", DateTimes.of("2000-02-03").getMillis()); assertExpr( "timestamp_parse(tstr_sql,'yyyy-MM-dd HH:mm:ss','America/Los_Angeles')", DateTimes.of("2000-02-03T04:05:06-08:00").getMillis() diff --git a/sql/src/main/java/io/druid/sql/calcite/expression/builtin/CastOperatorConversion.java b/sql/src/main/java/io/druid/sql/calcite/expression/builtin/CastOperatorConversion.java index 1f34bde5082f..7cc7acaf4374 100644 --- a/sql/src/main/java/io/druid/sql/calcite/expression/builtin/CastOperatorConversion.java +++ b/sql/src/main/java/io/druid/sql/calcite/expression/builtin/CastOperatorConversion.java @@ -140,12 +140,12 @@ private static DruidExpression castCharToDateTime( final SqlTypeName toType ) { - // Cast strings to datetimes by parsin them from SQL format. + // Cast strings to datetimes by parsing them from SQL format. final DruidExpression timestampExpression = DruidExpression.fromFunctionCall( "timestamp_parse", ImmutableList.of( operand, - DruidExpression.fromExpression(DruidExpression.stringLiteral(dateTimeFormatString(toType))), + DruidExpression.fromExpression(DruidExpression.nullLiteral()), DruidExpression.fromExpression(DruidExpression.stringLiteral(plannerContext.getTimeZone().getID())) ) ); diff --git a/sql/src/main/java/io/druid/sql/calcite/planner/DruidRexExecutor.java b/sql/src/main/java/io/druid/sql/calcite/planner/DruidRexExecutor.java index 2eba93b58dea..1e187f78742b 100644 --- a/sql/src/main/java/io/druid/sql/calcite/planner/DruidRexExecutor.java +++ b/sql/src/main/java/io/druid/sql/calcite/planner/DruidRexExecutor.java @@ -20,6 +20,7 @@ package io.druid.sql.calcite.planner; import io.druid.java.util.common.DateTimes; +import io.druid.java.util.common.IAE; import io.druid.math.expr.Expr; import io.druid.math.expr.ExprEval; import io.druid.math.expr.ExprType; @@ -83,6 +84,10 @@ public void reduce( if (sqlTypeName == SqlTypeName.BOOLEAN) { literal = rexBuilder.makeLiteral(exprResult.asBoolean(), constExp.getType(), true); } else if (sqlTypeName == SqlTypeName.DATE) { + if (!constExp.getType().isNullable() && exprResult.isNull()) { + throw new IAE("Illegal DATE constant: %s", constExp); + } + literal = rexBuilder.makeDateLiteral( Calcites.jodaToCalciteDateString( DateTimes.utc(exprResult.asLong()), @@ -90,6 +95,10 @@ public void reduce( ) ); } else if (sqlTypeName == SqlTypeName.TIMESTAMP) { + if (!constExp.getType().isNullable() && exprResult.isNull()) { + throw new IAE("Illegal TIMESTAMP constant: %s", constExp); + } + literal = rexBuilder.makeTimestampLiteral( Calcites.jodaToCalciteTimestampString( DateTimes.utc(exprResult.asLong()), diff --git a/sql/src/test/java/io/druid/sql/calcite/CalciteQueryTest.java b/sql/src/test/java/io/druid/sql/calcite/CalciteQueryTest.java index c2ec6b2e1d42..0000921b7b76 100644 --- a/sql/src/test/java/io/druid/sql/calcite/CalciteQueryTest.java +++ b/sql/src/test/java/io/druid/sql/calcite/CalciteQueryTest.java @@ -105,6 +105,7 @@ import io.druid.sql.calcite.util.SpecificSegmentsQuerySegmentWalker; import io.druid.sql.calcite.view.InProcessViewManager; import org.apache.calcite.plan.RelOptPlanner; +import org.hamcrest.CoreMatchers; import org.joda.time.DateTime; import org.joda.time.DateTimeZone; import org.joda.time.Interval; @@ -116,6 +117,8 @@ import org.junit.Ignore; import org.junit.Rule; import org.junit.Test; +import org.junit.internal.matchers.ThrowableMessageMatcher; +import org.junit.rules.ExpectedException; import org.junit.rules.TemporaryFolder; import java.util.ArrayList; @@ -219,6 +222,9 @@ public int getMaxQueryCount() private static final PagingSpec FIRST_PAGING_SPEC = new PagingSpec(null, 1000, true); + @Rule + public ExpectedException expectedException = ExpectedException.none(); + @Rule public TemporaryFolder temporaryFolder = new TemporaryFolder(); @@ -2714,15 +2720,23 @@ public void testCountStarWithTimeFilter() throws Exception @Test public void testCountStarWithTimeFilterUsingStringLiterals() throws Exception { - // Strings are implicitly cast to timestamps. + // Strings are implicitly cast to timestamps. Test a few different forms. testQuery( - "SELECT COUNT(*) FROM druid.foo " - + "WHERE __time >= '2000-01-01 00:00:00' AND __time < '2001-01-01 00:00:00'", + "SELECT COUNT(*) FROM druid.foo\n" + + "WHERE __time >= '2000-01-01 00:00:00' AND __time < '2001-01-01T00:00:00'\n" + + "OR __time >= '2001-02-01' AND __time < '2001-02-02'\n" + + "OR __time BETWEEN '2001-03-01' AND '2001-03-02'", ImmutableList.of( Druids.newTimeseriesQueryBuilder() .dataSource(CalciteTests.DATASOURCE1) - .intervals(QSS(Intervals.of("2000-01-01/2001-01-01"))) + .intervals( + QSS( + Intervals.of("2000-01-01/2001-01-01"), + Intervals.of("2001-02-01/2001-02-02"), + Intervals.of("2001-03-01/2001-03-02T00:00:00.001") + ) + ) .granularity(Granularities.ALL) .aggregators(AGGS(new CountAggregatorFactory("a0"))) .context(TIMESERIES_CONTEXT_DEFAULT) @@ -2734,6 +2748,26 @@ public void testCountStarWithTimeFilterUsingStringLiterals() throws Exception ); } + @Test + public void testCountStarWithTimeFilterUsingStringLiteralsInvalid() throws Exception + { + // Strings are implicitly cast to timestamps. Test an invalid string. + + // This error message isn't ideal but it is at least better than silently ignoring the problem. + expectedException.expect(RuntimeException.class); + expectedException.expectMessage("Error while applying rule ReduceExpressionsRule"); + expectedException.expectCause( + ThrowableMessageMatcher.hasMessage(CoreMatchers.containsString("Illegal TIMESTAMP constant")) + ); + + testQuery( + "SELECT COUNT(*) FROM druid.foo\n" + + "WHERE __time >= 'z2000-01-01 00:00:00' AND __time < '2001-01-01 00:00:00'\n", + ImmutableList.of(), + ImmutableList.of() + ); + } + @Test public void testCountStarWithSinglePointInTime() throws Exception { diff --git a/sql/src/test/java/io/druid/sql/calcite/expression/ExpressionsTest.java b/sql/src/test/java/io/druid/sql/calcite/expression/ExpressionsTest.java index 1ef16e824afe..fe0d54962125 100644 --- a/sql/src/test/java/io/druid/sql/calcite/expression/ExpressionsTest.java +++ b/sql/src/test/java/io/druid/sql/calcite/expression/ExpressionsTest.java @@ -724,7 +724,7 @@ public void testCastAsTimestamp() ), DruidExpression.of( null, - "timestamp_parse(\"tstr\",'yyyy-MM-dd HH:mm:ss','UTC')" + "timestamp_parse(\"tstr\",'','UTC')" ), DateTimes.of("2000-02-03T04:05:06Z").getMillis() ); @@ -784,7 +784,7 @@ public void testCastAsDate() inputRef("dstr") ), DruidExpression.fromExpression( - "timestamp_floor(timestamp_parse(\"dstr\",'yyyy-MM-dd','UTC'),'P1D','','UTC')" + "timestamp_floor(timestamp_parse(\"dstr\",'','UTC'),'P1D','','UTC')" ), DateTimes.of("2000-02-03").getMillis() ); From 1ff6c4d3313fd48cee31f6862127713a834c5106 Mon Sep 17 00:00:00 2001 From: Gian Merlino Date: Wed, 8 Nov 2017 01:10:17 -0800 Subject: [PATCH 2/2] Respect timeZone when format is null. --- .../io/druid/query/expression/TimestampParseExprMacro.java | 5 +++-- .../test/java/io/druid/query/expression/ExprMacroTest.java | 4 ++++ 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/processing/src/main/java/io/druid/query/expression/TimestampParseExprMacro.java b/processing/src/main/java/io/druid/query/expression/TimestampParseExprMacro.java index defcb56019dd..b163aac140cc 100644 --- a/processing/src/main/java/io/druid/query/expression/TimestampParseExprMacro.java +++ b/processing/src/main/java/io/druid/query/expression/TimestampParseExprMacro.java @@ -61,7 +61,7 @@ public Expr apply(final List args) final DateTimes.UtcFormatter formatter = formatString == null - ? createDefaultParser() + ? createDefaultParser(timeZone) : DateTimes.wrapFormatter(DateTimeFormat.forPattern(formatString).withZone(timeZone)); class TimestampParseExpr implements Expr @@ -100,7 +100,7 @@ public void visit(final Visitor visitor) * Default formatter that parses according to the docs for this method: "If the pattern is not provided, this parses * time strings in either ISO8601 or SQL format." */ - private static DateTimes.UtcFormatter createDefaultParser() + private static DateTimes.UtcFormatter createDefaultParser(final DateTimeZone timeZone) { final DateTimeFormatter offsetElement = new DateTimeFormatterBuilder() .appendTimeZoneOffset("Z", true, 2, 4) @@ -123,6 +123,7 @@ private static DateTimes.UtcFormatter createDefaultParser() .append(ISODateTimeFormat.dateElementParser()) .appendOptional(timeOrOffset) .toFormatter() + .withZone(timeZone) ); } } diff --git a/server/src/test/java/io/druid/query/expression/ExprMacroTest.java b/server/src/test/java/io/druid/query/expression/ExprMacroTest.java index e1d39548d0e8..5dc3135ed965 100644 --- a/server/src/test/java/io/druid/query/expression/ExprMacroTest.java +++ b/server/src/test/java/io/druid/query/expression/ExprMacroTest.java @@ -121,6 +121,10 @@ public void testTimestampParse() { assertExpr("timestamp_parse(tstr)", DateTimes.of("2000-02-03T04:05:06").getMillis()); assertExpr("timestamp_parse(tstr_sql)", DateTimes.of("2000-02-03T04:05:06").getMillis()); + assertExpr( + "timestamp_parse(tstr_sql,'','America/Los_Angeles')", + DateTimes.of("2000-02-03T04:05:06-08:00").getMillis() + ); assertExpr("timestamp_parse('2000-02-03')", DateTimes.of("2000-02-03").getMillis()); assertExpr("timestamp_parse('2000-02')", DateTimes.of("2000-02-01").getMillis()); assertExpr("timestamp_parse('')", null);