From af6bc0f65c8e264fa849896437e87580edc6e364 Mon Sep 17 00:00:00 2001 From: caiconghui1 Date: Tue, 25 Jan 2022 10:45:33 +0800 Subject: [PATCH 1/2] [improvement](rewrite) Make RewriteDateLiteralRule to be compatible with mysql --- be/src/runtime/datetime_value.cpp | 4 +- .../org/apache/doris/analysis/Analyzer.java | 4 +- .../apache/doris/rewrite/ExprRewriter.java | 2 +- .../doris/rewrite/RewriteDateLiteralRule.java | 198 ++++++++++++++++++ ...fyInvalidDateBinaryPredicatesDateRule.java | 81 ------- .../doris/rewrite/InferFiltersRuleTest.java | 9 +- .../rewrite/RewriteDateLiteralRuleTest.java | 164 +++++++++++++++ 7 files changed, 368 insertions(+), 94 deletions(-) create mode 100644 fe/fe-core/src/main/java/org/apache/doris/rewrite/RewriteDateLiteralRule.java delete mode 100644 fe/fe-core/src/main/java/org/apache/doris/rewrite/SimplifyInvalidDateBinaryPredicatesDateRule.java create mode 100644 fe/fe-core/src/test/java/org/apache/doris/rewrite/RewriteDateLiteralRuleTest.java diff --git a/be/src/runtime/datetime_value.cpp b/be/src/runtime/datetime_value.cpp index b545f5512eb37c..b7202916d5596e 100644 --- a/be/src/runtime/datetime_value.cpp +++ b/be/src/runtime/datetime_value.cpp @@ -102,7 +102,7 @@ bool DateTimeValue::from_date_str(const char* date_str, int len) { int digits = pos - ptr; bool is_interval_format = false; - // Compatible with MySQL. Shit!!! + // Compatible with MySQL. // For YYYYMMDD/YYYYMMDDHHMMSS is 4 digits years if (pos == end || *pos == '.') { if (digits == 4 || digits == 8 || digits >= 14) { @@ -122,7 +122,7 @@ bool DateTimeValue::from_date_str(const char* date_str, int len) { while (ptr < end && isdigit(*ptr) && (scan_to_delim || field_len--)) { temp_val = temp_val * 10 + (*ptr++ - '0'); } - // Imposible + // Impossible if (temp_val > 999999L) { return false; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/Analyzer.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/Analyzer.java index b59c22c9f47103..2ef0c7f9928a0c 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/Analyzer.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/Analyzer.java @@ -49,7 +49,7 @@ import org.apache.doris.rewrite.RewriteEncryptKeyRule; import org.apache.doris.rewrite.RewriteFromUnixTimeRule; import org.apache.doris.rewrite.RewriteLikePredicateRule; -import org.apache.doris.rewrite.SimplifyInvalidDateBinaryPredicatesDateRule; +import org.apache.doris.rewrite.RewriteDateLiteralRule; import org.apache.doris.rewrite.mvrewrite.CountDistinctToBitmap; import org.apache.doris.rewrite.mvrewrite.CountDistinctToBitmapOrHLLRule; import org.apache.doris.rewrite.mvrewrite.CountFieldToSum; @@ -306,7 +306,7 @@ public GlobalState(Catalog catalog, ConnectContext context) { rules.add(FoldConstantsRule.INSTANCE); rules.add(RewriteFromUnixTimeRule.INSTANCE); rules.add(CompoundPredicateWriteRule.INSTANCE); - rules.add(SimplifyInvalidDateBinaryPredicatesDateRule.INSTANCE); + rules.add(RewriteDateLiteralRule.INSTANCE); rules.add(RewriteEncryptKeyRule.INSTANCE); rules.add(RewriteAliasFunctionRule.INSTANCE); rules.add(RewriteLikePredicateRule.INSTANCE); diff --git a/fe/fe-core/src/main/java/org/apache/doris/rewrite/ExprRewriter.java b/fe/fe-core/src/main/java/org/apache/doris/rewrite/ExprRewriter.java index 7fbf56322affb5..eb211807bc599a 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/rewrite/ExprRewriter.java +++ b/fe/fe-core/src/main/java/org/apache/doris/rewrite/ExprRewriter.java @@ -48,7 +48,7 @@ public class ExprRewriter { private final List rules_; // The type of clause that executes the rule. - // This type is only used in InferFiltersRule, other rules are not used + // This type is only used in InferFiltersRule, RewriteDateLiteralRule, other rules are not used public enum ClauseType { ON_CLAUSE, WHERE_CLAUSE, diff --git a/fe/fe-core/src/main/java/org/apache/doris/rewrite/RewriteDateLiteralRule.java b/fe/fe-core/src/main/java/org/apache/doris/rewrite/RewriteDateLiteralRule.java new file mode 100644 index 00000000000000..9b08224c21e3e8 --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/rewrite/RewriteDateLiteralRule.java @@ -0,0 +1,198 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.rewrite; + +import org.apache.doris.analysis.Analyzer; +import org.apache.doris.analysis.BinaryPredicate; +import org.apache.doris.analysis.CastExpr; +import org.apache.doris.analysis.DateLiteral; +import org.apache.doris.analysis.Expr; +import org.apache.doris.analysis.LiteralExpr; +import org.apache.doris.analysis.NullLiteral; +import org.apache.doris.common.AnalysisException; + +/** + * this rule try to convert date expression, if date is invalid, it will be + * converted into null literal to avoid scanning all partitions + * if a date data is invalid, Doris will try to cast it as datetime firstly, + * only support rewriting pattern: slot + operator + date literal + * Examples: + * date = "2020-10-32" will throw analysis exception when in on clause or where clause, + * and be converted to be NULL when in other clause + */ +public class RewriteDateLiteralRule implements ExprRewriteRule { + public final static ExprRewriteRule INSTANCE = new RewriteDateLiteralRule(); + private final static int ALLOW_SPACE_MASK = 4 | 64; + private final static int MAX_DATE_PARTS = 8; + private final static int YY_PART_YEAR = 70; + private final static int[] DAYS_IN_MONTH = new int[] {0, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31}; + + @Override + public Expr apply(Expr expr, Analyzer analyzer, ExprRewriter.ClauseType clauseType) throws AnalysisException { + if (!(expr instanceof BinaryPredicate)) return expr; + Expr lchild = expr.getChild(0); + if (!lchild.getType().isDateType()) { + return expr; + } + Expr valueExpr = expr.getChild(1); + if (!valueExpr.getType().isDateType()) { + return expr; + } + if (!valueExpr.isConstant()) { + return expr; + } + // Only consider CastExpr and try our best to convert non-date_literal to date_literal,to be compatible with MySQL + if (valueExpr instanceof CastExpr) { + Expr childExpr = valueExpr.getChild(0); + if (childExpr instanceof LiteralExpr) { + try { + String dateStr = childExpr.getStringValue(); + expr.setChild(1, getDateLiteralfromDateStr(dateStr)); + } catch (AnalysisException e) { + if (clauseType == ExprRewriter.ClauseType.OTHER_CLAUSE) { + return new NullLiteral(); + } else { + throw new AnalysisException("Incorrect datetime value: " + valueExpr.toSql() + " in expression: " + expr.toSql()); + } + } + } + } + return expr; + } + + // The interval format is that with no delimiters + // YYYY-MM-DD HH-MM-DD.FFFFFF AM in default format, and now doris will skip part 6, 7 + // 0 1 2 3 4 5 6 7 + private DateLiteral getDateLiteralfromDateStr(String dateStr) throws AnalysisException { + dateStr = dateStr.trim(); + if (dateStr.isEmpty()) { + throw new AnalysisException("parse datetime value failed: " + dateStr); + } + int[] dateVal = new int[MAX_DATE_PARTS]; + int[] dateLen = new int[MAX_DATE_PARTS]; + + // Fix year length + int pre = 0; + int pos = 0; + while (pos < dateStr.length() && (Character.isDigit(dateStr.charAt(pos)) || dateStr.charAt(pos) == 'T')) { + pos++; + } + int yearLen = 4; + int digits = pos - pre; + boolean isIntervalFormat = false; + // For YYYYMMDD/YYYYMMDDHHMMSS is 4 digits years + if (pos == dateStr.length() || dateStr.charAt(pos) == '.') { + if (digits == 4 || digits == 8 || digits >= 14) { + yearLen = 4; + } else { + yearLen = 2; + } + isIntervalFormat = true; + } + int fieldIdx = 0; + int fieldLen = yearLen; + while (pre < dateStr.length() && Character.isDigit(dateStr.charAt(pre)) && fieldIdx < MAX_DATE_PARTS - 1) { + int start = pre; + int temp_val = 0; + boolean scanToDelim = (!isIntervalFormat) && (fieldIdx != 6); + while (pre < dateStr.length() && Character.isDigit(dateStr.charAt(pre)) && (scanToDelim || fieldLen-- != 0)) { + temp_val = temp_val * 10 + (dateStr.charAt(pre++) - '0'); + } + dateVal[fieldIdx] = temp_val; + dateLen[fieldIdx] = pre - start; + fieldLen = 2; + + if (pre == dateStr.length()) { + fieldIdx++; + break; + } + + if (fieldIdx == 2 && dateStr.charAt(pre) == 'T') { + // YYYYMMDDTHHMMDD, skip 'T' and continue + pre++; + fieldIdx++; + continue; + } + + // Second part + if (fieldIdx == 5) { + if (dateStr.charAt(pre) == '.') { + pre++; + fieldLen = 6; + } else if (Character.isDigit(dateStr.charAt(pre))) { + fieldIdx++; + break; + } + fieldIdx++; + continue; + } + // escape separator + while (pre < dateStr.length() && (Character.toString(dateStr.charAt(pre)).matches("\\p{Punct}")) + || Character.isSpaceChar(dateStr.charAt(pre))) { + if (Character.isSpaceChar(dateStr.charAt(pre))) { + if (((1 << fieldIdx) & ALLOW_SPACE_MASK) == 0) { + throw new AnalysisException("parse datetime value failed: " + dateStr); + } + } + pre++; + } + fieldIdx++; + } + int numField = fieldIdx; + if (!isIntervalFormat) { + yearLen = dateLen[0]; + } + for (; fieldIdx < MAX_DATE_PARTS; ++fieldIdx) { + dateLen[fieldIdx] = 0; + dateVal[fieldIdx] = 0; + } + if (yearLen == 2) { + if (dateVal[0] < YY_PART_YEAR) { + dateVal[0] += 2000; + } else { + dateVal[0] += 1900; + } + } + + if (numField < 3) { + throw new AnalysisException("parse datetime value failed: " + dateStr); + } + return getValidDataLiteral(dateVal[0], dateVal[1], dateVal[2], dateVal[3], dateVal[4], dateVal[5]); + } + + private boolean isLeapYear(int year) { + return ((year % 4) == 0) && ((year % 100 != 0) || ((year % 400) == 0)); + } + + private Boolean CheckDateOutOfRange(int year, int month, int day) { + if (month != 0 && month <= 12 && day > DAYS_IN_MONTH[month]) { + // Feb 29 in leap year is valid. + if (!(month == 2 && day == 29 && isLeapYear(year))) return true; + } + return year > 9999 || month > 12 || day > 31; + } + + private DateLiteral getValidDataLiteral(int year, int month, int day, int hour, int minute, int second) throws AnalysisException { + boolean timeOutOfRange = hour > 23 || minute > 59 || second > 59; + if (timeOutOfRange || CheckDateOutOfRange(year, month, day)) { + throw new AnalysisException("Datetime value is out of range: " + + String.format("%s-%s-%s %s:%s:%s", year, month, day, hour, minute, second)); + } + return new DateLiteral(year, month, day, hour, minute, second); + } +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/rewrite/SimplifyInvalidDateBinaryPredicatesDateRule.java b/fe/fe-core/src/main/java/org/apache/doris/rewrite/SimplifyInvalidDateBinaryPredicatesDateRule.java deleted file mode 100644 index 4c16a4b3edc4c8..00000000000000 --- a/fe/fe-core/src/main/java/org/apache/doris/rewrite/SimplifyInvalidDateBinaryPredicatesDateRule.java +++ /dev/null @@ -1,81 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.rewrite; - -import org.apache.doris.analysis.Analyzer; -import org.apache.doris.analysis.BinaryPredicate; -import org.apache.doris.analysis.CastExpr; -import org.apache.doris.analysis.Expr; -import org.apache.doris.analysis.NullLiteral; -import org.apache.doris.common.AnalysisException; -import org.apache.doris.common.NotLiteralExprPredicate; - -/** - * this rule try to convert date expression, if date is invalid, it will be - * converted into null literal to avoid to scan all partitions - * if a date data is invalid or contains nanosecond, it will be convert into CastExpr - * only support rewriting pattern: slot + operator + date literal - * Examples: - * date = "2020-10-32" => NULL - */ -public class SimplifyInvalidDateBinaryPredicatesDateRule implements ExprRewriteRule { - public static ExprRewriteRule INSTANCE = new SimplifyInvalidDateBinaryPredicatesDateRule(); - public static final int DATETIME_STRING_MAX_LENGTH = new String("yyyy-MM-dd HH:ii:ss").length(); - private static final NotLiteralExprPredicate NOT_LITERAL_EXPR_PREDICATE = new NotLiteralExprPredicate(); - - @Override - public Expr apply(Expr expr, Analyzer analyzer, ExprRewriter.ClauseType clauseType) throws AnalysisException { - if (!(expr instanceof BinaryPredicate)) return expr; - Expr lchild = expr.getChild(0); - if (!lchild.getType().isDateType()) { - return expr; - } - Expr valueExpr = expr.getChild(1); - if (!valueExpr.getType().isDateType()) { - return expr; - } - if (!valueExpr.isConstant()) { - return expr; - } - - // This is not a very good implementation and tricky. - // We have to handle the following cases: - // A. k1 is datetime, sql with "k1 > to_date(now())" will be converted to k1 > cast(to_date("xxxx-xx-xx")) - // B. k1 is datetime, sql with "k1 > '2021-10-32 10:00:00.100010'" will be converted to k1 > cast('2021-10-32 10:00:00.100010' as datetime) - // C. k1 is datetime, sql with "k1 > '2021-10-32'" will be converted to k1 > cast('2021-10-32' as datetime), and finally to converted to NullLiteral. - if (valueExpr instanceof CastExpr) { - valueExpr = valueExpr.getChild(0); - if (valueExpr.contains(NOT_LITERAL_EXPR_PREDICATE)) { - // Case A. - return expr; - } - String dateStr = valueExpr.toSql(); - if (dateStr.length() > DATETIME_STRING_MAX_LENGTH && dateStr.contains(".")) { - // Case B - return expr; - } - // Case C - return new NullLiteral(); - } else { - if (valueExpr.contains(NOT_LITERAL_EXPR_PREDICATE)) { - return expr; - } - } - return expr; - } -} diff --git a/fe/fe-core/src/test/java/org/apache/doris/rewrite/InferFiltersRuleTest.java b/fe/fe-core/src/test/java/org/apache/doris/rewrite/InferFiltersRuleTest.java index 9149a64824e551..15234fb5c39f09 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/rewrite/InferFiltersRuleTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/rewrite/InferFiltersRuleTest.java @@ -18,8 +18,6 @@ package org.apache.doris.rewrite; -import org.apache.commons.lang3.StringUtils; -import org.apache.doris.common.AnalysisException; import org.apache.doris.common.FeConstants; import org.apache.doris.qe.SessionVariable; @@ -32,17 +30,12 @@ import org.junit.BeforeClass; import org.junit.Test; -import java.util.Map; import java.util.UUID; - - public class InferFiltersRuleTest { - - private static final Logger LOG = LogManager.getLogger(InferFiltersRuleTest.class); private static String baseDir = "fe"; private static String runningDir = baseDir + "/mocked/InferFiltersRuleTest/" - + UUID.randomUUID().toString() + "/"; + + UUID.randomUUID() + "/"; private static DorisAssert dorisAssert; private static final String DB_NAME = "db1"; private static final String TABLE_NAME_1 = "tb1"; diff --git a/fe/fe-core/src/test/java/org/apache/doris/rewrite/RewriteDateLiteralRuleTest.java b/fe/fe-core/src/test/java/org/apache/doris/rewrite/RewriteDateLiteralRuleTest.java new file mode 100644 index 00000000000000..86001047a134a2 --- /dev/null +++ b/fe/fe-core/src/test/java/org/apache/doris/rewrite/RewriteDateLiteralRuleTest.java @@ -0,0 +1,164 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +// + +package org.apache.doris.rewrite; + +import org.apache.doris.common.AnalysisException; +import org.apache.doris.common.FeConstants; + +import org.apache.doris.utframe.DorisAssert; +import org.apache.doris.utframe.UtFrameUtils; +import org.junit.AfterClass; +import org.junit.Assert; +import org.junit.BeforeClass; +import org.junit.Test; + +import java.util.UUID; + +public class RewriteDateLiteralRuleTest { + private static String baseDir = "fe"; + private static String runningDir = baseDir + "/mocked/RewriteDateLiteralRuleTest/" + + UUID.randomUUID() + "/"; + private static DorisAssert dorisAssert; + private static final String DB_NAME = "db1"; + private static final String TABLE_NAME_1 = "tb1"; + + @BeforeClass + public static void beforeClass() throws Exception { + FeConstants.default_scheduler_interval_millisecond = 10; + FeConstants.runningUnitTest = true; + UtFrameUtils.createDorisCluster(runningDir); + dorisAssert = new DorisAssert(); + dorisAssert.withDatabase(DB_NAME).useDatabase(DB_NAME); + String createTableSQL = "create table " + DB_NAME + "." + TABLE_NAME_1 + + " (k1 datetime, k2 int) " + + "distributed by hash(k2) buckets 3 properties('replication_num' = '1');"; + dorisAssert.withTable(createTableSQL); + } + + @AfterClass + public static void afterClass() throws Exception { + UtFrameUtils.cleanDorisFeDir(baseDir); + } + + @Test + public void testWithIntFormatDate() throws Exception { + String query = "select * from db1.tb1 where k1 > 20210301"; + String planString = dorisAssert.query(query).explainQuery(); + Assert.assertTrue(planString.contains("`k1` > '2021-03-01 00:00:00'")); + query = "select k1 > 20210301 from db1.tb1"; + planString = dorisAssert.query(query).explainQuery(); + Assert.assertTrue(planString.contains("`k1` > '2021-03-01 00:00:00'")); + query = "select k1 > 20210301223344 from db1.tb1"; + planString = dorisAssert.query(query).explainQuery(); + Assert.assertTrue(planString.contains("`k1` > '2021-03-01 22:33:44'")); + } + + @Test + public void testWithStringFormatDate() throws Exception { + String query = "select * from db1.tb1 where k1 > '2021030112334455'"; + String planString = dorisAssert.query(query).explainQuery(); + Assert.assertTrue(planString.contains("`k1` > '2021-03-01 12:33:44'")); + + query = "select k1 > '20210301' from db1.tb1"; + planString = dorisAssert.query(query).explainQuery(); + Assert.assertTrue(planString.contains("`k1` > '2021-03-01 00:00:00'")); + + query = "select k1 > '20210301233234.34' from db1.tb1"; + planString = dorisAssert.query(query).explainQuery(); + Assert.assertTrue(planString.contains("`k1` > '2021-03-01 23:32:34'")); + + query = "select * from db1.tb1 where k1 > '2021-03-01'"; + planString = dorisAssert.query(query).explainQuery(); + Assert.assertTrue(planString.contains("`k1` > '2021-03-01 00:00:00'")); + + query = "select k1 > '2021-03-01 11:22:33' from db1.tb1"; + planString = dorisAssert.query(query).explainQuery(); + Assert.assertTrue(planString.contains("`k1` > '2021-03-01 11:22:33'")); + + query = "select k1 > '2021-03-01 16:22:33' from db1.tb1"; + planString = dorisAssert.query(query).explainQuery(); + Assert.assertTrue(planString.contains("`k1` > '2021-03-01 16:22:33'")); + + query = "select k1 > '2021-03-01 11:22' from db1.tb1"; + planString = dorisAssert.query(query).explainQuery(); + Assert.assertTrue(planString.contains("`k1` > '2021-03-01 11:22:00'")); + + query = "select k1 > '20210301T221133' from db1.tb1"; + planString = dorisAssert.query(query).explainQuery(); + Assert.assertTrue(planString.contains("`k1` > '2021-03-01 22:11:33'")); + + query = "select k1 > '2021-03-01dd 11:22' from db1.tb1"; + planString = dorisAssert.query(query).explainQuery(); + Assert.assertTrue(planString.contains("`k1` > '2021-03-01 00:00:00'")); + + query = "select k1 > '80-03-01 11:22' from db1.tb1"; + planString = dorisAssert.query(query).explainQuery(); + Assert.assertTrue(planString.contains("`k1` > '1980-03-01 11:22:00'")); + + query = "select k1 > '12-03-01 11:22' from db1.tb1"; + planString = dorisAssert.query(query).explainQuery(); + Assert.assertTrue(planString.contains("`k1` > '2012-03-01 11:22:00'")); + } + + @Test + public void testWithDoubleFormatDate() throws Exception { + String query = "select * from db1.tb1 where k1 > 20210301.22"; + String planString = dorisAssert.query(query).explainQuery(); + Assert.assertTrue(planString.contains("`k1` > 2.021030122E7")); + + query = "select k1 > 20210331.22 from db1.tb1"; + planString = dorisAssert.query(query).explainQuery(); + Assert.assertTrue(planString.contains("`k1` > 2.021033122E7")); + } + + @Test + public void testWithInvalidFormatDate() throws Exception { + String query = "select * from db1.tb1 where k1 > '2021030125334455'"; + try { + dorisAssert.query(query).explainQuery(); + } catch (AnalysisException e) { + Assert.assertTrue(e.getMessage().contains( + "Incorrect datetime value: '2021030125334455' in expression: `k1` > '2021030125334455'")); + } + + query = "select k1 > '2021030125334455' from db1.tb1"; + String plainString = dorisAssert.query(query).explainQuery(); + Assert.assertTrue(plainString.contains("NULL")); + + query = "select * from db1.tb1 where k1 > '2021-03-32 23:33:55'"; + try { + dorisAssert.query(query).explainQuery(); + } catch (AnalysisException e) { + Assert.assertTrue(e.getMessage().contains( + "Incorrect datetime value: '2021-03-32 23:33:55' in expression: `k1` > '2021-03-32 23:33:55'")); + } + + query = "select * from db1.tb1 where k1 > '2021-03- 03 23:33:55'"; + try { + dorisAssert.query(query).explainQuery(); + } catch (AnalysisException e) { + Assert.assertTrue(e.getMessage().contains( + "Incorrect datetime value: '2021-03- 03 23:33:55' in expression: `k1` > '2021-03- 03 23:33:55'")); + } + + query = "select k1 > '2021-03- 03 23:33:55' from db1.tb1"; + plainString = dorisAssert.query(query).explainQuery(); + Assert.assertTrue(plainString.contains("NULL")); + } +} \ No newline at end of file From ee4dabca988c76392b3ad03cd135d21c19637640 Mon Sep 17 00:00:00 2001 From: caiconghui1 Date: Tue, 25 Jan 2022 15:44:20 +0800 Subject: [PATCH 2/2] fix by review --- .../apache/doris/analysis/DateLiteral.java | 132 +++++++++++++++++- .../doris/rewrite/RewriteDateLiteralRule.java | 129 +---------------- 2 files changed, 128 insertions(+), 133 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/DateLiteral.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/DateLiteral.java index a939aadba1273d..15f04285473d1d 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/DateLiteral.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/DateLiteral.java @@ -29,7 +29,6 @@ import org.apache.doris.thrift.TExprNodeType; import com.google.common.base.Preconditions; -import com.google.common.collect.Lists; import com.google.common.collect.Maps; import org.apache.logging.log4j.LogManager; @@ -41,7 +40,6 @@ import java.nio.ByteBuffer; import java.time.Year; import java.util.Date; -import java.util.List; import java.util.Map; import java.util.Objects; import java.util.TimeZone; @@ -89,8 +87,10 @@ public class DateLiteral extends LiteralExpr { private static Map MONTH_NAME_DICT = Maps.newHashMap(); private static Map MONTH_ABBR_NAME_DICT = Maps.newHashMap(); private static Map WEEK_DAY_NAME_DICT = Maps.newHashMap(); - private static Map WEEK_DAY_ABBR_NAME_DICT = Maps.newHashMap(); - private static List DAYS_IN_MONTH = Lists.newArrayList(0, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31); + private final static int[] DAYS_IN_MONTH = new int[] {0, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31}; + private final static int ALLOW_SPACE_MASK = 4 | 64; + private final static int MAX_DATE_PARTS = 8; + private final static int YY_PART_YEAR = 70; static { try { @@ -1068,7 +1068,7 @@ private boolean checkRange() { || microsecond > MAX_MICROSECOND; } private boolean checkDate() { - if (month != 0 && day > DAYS_IN_MONTH.get((int)month)){ + if (month != 0 && day > DAYS_IN_MONTH[((int) month)]){ if (month == 2 && day == 29 && Year.isLeap(year)) { return false; } @@ -1137,8 +1137,8 @@ private void getDateFromDaynr(long daynr) throws InvalidFormatException { } } this.month = 1; - while (daysOfYear > DAYS_IN_MONTH.get((int) this.month)) { - daysOfYear -= DAYS_IN_MONTH.get((int) this.month); + while (daysOfYear > DAYS_IN_MONTH[(int) this.month]) { + daysOfYear -= DAYS_IN_MONTH[(int) this.month]; this.month++; } this.day = daysOfYear + leapDay; @@ -1172,4 +1172,122 @@ private int checkWord(Map dict, String value) throws InvalidFor } throw new InvalidFormatException("'" + value + "' is invalid"); } + + // The interval format is that with no delimiters + // YYYY-MM-DD HH-MM-DD.FFFFFF AM in default format, and now doris will skip part 7 + // 0 1 2 3 4 5 6 7 + public void fromDateStr(String dateStr) throws AnalysisException { + dateStr = dateStr.trim(); + if (dateStr.isEmpty()) { + throw new AnalysisException("parse datetime value failed: " + dateStr); + } + int[] dateVal = new int[MAX_DATE_PARTS]; + int[] dateLen = new int[MAX_DATE_PARTS]; + + // Fix year length + int pre = 0; + int pos = 0; + while (pos < dateStr.length() && (Character.isDigit(dateStr.charAt(pos)) || dateStr.charAt(pos) == 'T')) { + pos++; + } + int yearLen = 4; + int digits = pos - pre; + boolean isIntervalFormat = false; + // For YYYYMMDD/YYYYMMDDHHMMSS is 4 digits years + if (pos == dateStr.length() || dateStr.charAt(pos) == '.') { + if (digits == 4 || digits == 8 || digits >= 14) { + yearLen = 4; + } else { + yearLen = 2; + } + isIntervalFormat = true; + } + + int fieldIdx = 0; + int fieldLen = yearLen; + while (pre < dateStr.length() && Character.isDigit(dateStr.charAt(pre)) && fieldIdx < MAX_DATE_PARTS - 1) { + int start = pre; + int temp_val = 0; + boolean scanToDelim = (!isIntervalFormat) && (fieldIdx != 6); + while (pre < dateStr.length() && Character.isDigit(dateStr.charAt(pre)) && (scanToDelim || fieldLen-- != 0)) { + temp_val = temp_val * 10 + (dateStr.charAt(pre++) - '0'); + } + dateVal[fieldIdx] = temp_val; + dateLen[fieldIdx] = pre - start; + fieldLen = 2; + + if (pre == dateStr.length()) { + fieldIdx++; + break; + } + + if (fieldIdx == 2 && dateStr.charAt(pre) == 'T') { + // YYYYMMDDTHHMMDD, skip 'T' and continue + pre++; + fieldIdx++; + continue; + } + + // Second part + if (fieldIdx == 5) { + if (dateStr.charAt(pre) == '.') { + pre++; + fieldLen = 6; + } else if (Character.isDigit(dateStr.charAt(pre))) { + fieldIdx++; + break; + } + fieldIdx++; + continue; + } + // escape separator + while (pre < dateStr.length() && (Character.toString(dateStr.charAt(pre)).matches("\\p{Punct}")) + || Character.isSpaceChar(dateStr.charAt(pre))) { + if (Character.isSpaceChar(dateStr.charAt(pre))) { + if (((1 << fieldIdx) & ALLOW_SPACE_MASK) == 0) { + throw new AnalysisException("parse datetime value failed: " + dateStr); + } + } + pre++; + } + fieldIdx++; + } + int numField = fieldIdx; + if (!isIntervalFormat) { + yearLen = dateLen[0]; + } + for (; fieldIdx < MAX_DATE_PARTS; ++fieldIdx) { + dateLen[fieldIdx] = 0; + dateVal[fieldIdx] = 0; + } + if (yearLen == 2) { + if (dateVal[0] < YY_PART_YEAR) { + dateVal[0] += 2000; + } else { + dateVal[0] += 1900; + } + } + + if (numField < 3) { + throw new AnalysisException("parse datetime value failed: " + dateStr); + } + + year = dateVal[0]; + month = dateVal[1]; + day = dateVal[2]; + hour = dateVal[3]; + minute = dateVal[4]; + second = dateVal[5]; + microsecond = dateVal[6]; + + if (numField == 3) { + type = Type.DATE; + } else { + type = Type.DATETIME; + } + + if (checkRange() || checkDate()) { + throw new AnalysisException("Datetime value is out of range: " + dateStr); + } + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/rewrite/RewriteDateLiteralRule.java b/fe/fe-core/src/main/java/org/apache/doris/rewrite/RewriteDateLiteralRule.java index 9b08224c21e3e8..2448903fc6d4ae 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/rewrite/RewriteDateLiteralRule.java +++ b/fe/fe-core/src/main/java/org/apache/doris/rewrite/RewriteDateLiteralRule.java @@ -37,10 +37,6 @@ */ public class RewriteDateLiteralRule implements ExprRewriteRule { public final static ExprRewriteRule INSTANCE = new RewriteDateLiteralRule(); - private final static int ALLOW_SPACE_MASK = 4 | 64; - private final static int MAX_DATE_PARTS = 8; - private final static int YY_PART_YEAR = 70; - private final static int[] DAYS_IN_MONTH = new int[] {0, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31}; @Override public Expr apply(Expr expr, Analyzer analyzer, ExprRewriter.ClauseType clauseType) throws AnalysisException { @@ -62,7 +58,9 @@ public Expr apply(Expr expr, Analyzer analyzer, ExprRewriter.ClauseType clauseTy if (childExpr instanceof LiteralExpr) { try { String dateStr = childExpr.getStringValue(); - expr.setChild(1, getDateLiteralfromDateStr(dateStr)); + DateLiteral dateLiteral = new DateLiteral(); + dateLiteral.fromDateStr(dateStr); + expr.setChild(1, dateLiteral); } catch (AnalysisException e) { if (clauseType == ExprRewriter.ClauseType.OTHER_CLAUSE) { return new NullLiteral(); @@ -74,125 +72,4 @@ public Expr apply(Expr expr, Analyzer analyzer, ExprRewriter.ClauseType clauseTy } return expr; } - - // The interval format is that with no delimiters - // YYYY-MM-DD HH-MM-DD.FFFFFF AM in default format, and now doris will skip part 6, 7 - // 0 1 2 3 4 5 6 7 - private DateLiteral getDateLiteralfromDateStr(String dateStr) throws AnalysisException { - dateStr = dateStr.trim(); - if (dateStr.isEmpty()) { - throw new AnalysisException("parse datetime value failed: " + dateStr); - } - int[] dateVal = new int[MAX_DATE_PARTS]; - int[] dateLen = new int[MAX_DATE_PARTS]; - - // Fix year length - int pre = 0; - int pos = 0; - while (pos < dateStr.length() && (Character.isDigit(dateStr.charAt(pos)) || dateStr.charAt(pos) == 'T')) { - pos++; - } - int yearLen = 4; - int digits = pos - pre; - boolean isIntervalFormat = false; - // For YYYYMMDD/YYYYMMDDHHMMSS is 4 digits years - if (pos == dateStr.length() || dateStr.charAt(pos) == '.') { - if (digits == 4 || digits == 8 || digits >= 14) { - yearLen = 4; - } else { - yearLen = 2; - } - isIntervalFormat = true; - } - int fieldIdx = 0; - int fieldLen = yearLen; - while (pre < dateStr.length() && Character.isDigit(dateStr.charAt(pre)) && fieldIdx < MAX_DATE_PARTS - 1) { - int start = pre; - int temp_val = 0; - boolean scanToDelim = (!isIntervalFormat) && (fieldIdx != 6); - while (pre < dateStr.length() && Character.isDigit(dateStr.charAt(pre)) && (scanToDelim || fieldLen-- != 0)) { - temp_val = temp_val * 10 + (dateStr.charAt(pre++) - '0'); - } - dateVal[fieldIdx] = temp_val; - dateLen[fieldIdx] = pre - start; - fieldLen = 2; - - if (pre == dateStr.length()) { - fieldIdx++; - break; - } - - if (fieldIdx == 2 && dateStr.charAt(pre) == 'T') { - // YYYYMMDDTHHMMDD, skip 'T' and continue - pre++; - fieldIdx++; - continue; - } - - // Second part - if (fieldIdx == 5) { - if (dateStr.charAt(pre) == '.') { - pre++; - fieldLen = 6; - } else if (Character.isDigit(dateStr.charAt(pre))) { - fieldIdx++; - break; - } - fieldIdx++; - continue; - } - // escape separator - while (pre < dateStr.length() && (Character.toString(dateStr.charAt(pre)).matches("\\p{Punct}")) - || Character.isSpaceChar(dateStr.charAt(pre))) { - if (Character.isSpaceChar(dateStr.charAt(pre))) { - if (((1 << fieldIdx) & ALLOW_SPACE_MASK) == 0) { - throw new AnalysisException("parse datetime value failed: " + dateStr); - } - } - pre++; - } - fieldIdx++; - } - int numField = fieldIdx; - if (!isIntervalFormat) { - yearLen = dateLen[0]; - } - for (; fieldIdx < MAX_DATE_PARTS; ++fieldIdx) { - dateLen[fieldIdx] = 0; - dateVal[fieldIdx] = 0; - } - if (yearLen == 2) { - if (dateVal[0] < YY_PART_YEAR) { - dateVal[0] += 2000; - } else { - dateVal[0] += 1900; - } - } - - if (numField < 3) { - throw new AnalysisException("parse datetime value failed: " + dateStr); - } - return getValidDataLiteral(dateVal[0], dateVal[1], dateVal[2], dateVal[3], dateVal[4], dateVal[5]); - } - - private boolean isLeapYear(int year) { - return ((year % 4) == 0) && ((year % 100 != 0) || ((year % 400) == 0)); - } - - private Boolean CheckDateOutOfRange(int year, int month, int day) { - if (month != 0 && month <= 12 && day > DAYS_IN_MONTH[month]) { - // Feb 29 in leap year is valid. - if (!(month == 2 && day == 29 && isLeapYear(year))) return true; - } - return year > 9999 || month > 12 || day > 31; - } - - private DateLiteral getValidDataLiteral(int year, int month, int day, int hour, int minute, int second) throws AnalysisException { - boolean timeOutOfRange = hour > 23 || minute > 59 || second > 59; - if (timeOutOfRange || CheckDateOutOfRange(year, month, day)) { - throw new AnalysisException("Datetime value is out of range: " + - String.format("%s-%s-%s %s:%s:%s", year, month, day, hour, minute, second)); - } - return new DateLiteral(year, month, day, hour, minute, second); - } }