Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions be/src/runtime/datetime_value.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ bool DateTimeValue::from_date_str(const char* date_str, int len) {
int digits = pos - ptr;
bool is_interval_format = false;

// Compatible with MySQL. Shit!!!
// Compatible with MySQL.
// For YYYYMMDD/YYYYMMDDHHMMSS is 4 digits years
if (pos == end || *pos == '.') {
if (digits == 4 || digits == 8 || digits >= 14) {
Expand All @@ -122,7 +122,7 @@ bool DateTimeValue::from_date_str(const char* date_str, int len) {
while (ptr < end && isdigit(*ptr) && (scan_to_delim || field_len--)) {
temp_val = temp_val * 10 + (*ptr++ - '0');
}
// Imposible
// Impossible
if (temp_val > 999999L) {
return false;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@
import org.apache.doris.rewrite.RewriteEncryptKeyRule;
import org.apache.doris.rewrite.RewriteFromUnixTimeRule;
import org.apache.doris.rewrite.RewriteLikePredicateRule;
import org.apache.doris.rewrite.SimplifyInvalidDateBinaryPredicatesDateRule;
import org.apache.doris.rewrite.RewriteDateLiteralRule;
import org.apache.doris.rewrite.mvrewrite.CountDistinctToBitmap;
import org.apache.doris.rewrite.mvrewrite.CountDistinctToBitmapOrHLLRule;
import org.apache.doris.rewrite.mvrewrite.CountFieldToSum;
Expand Down Expand Up @@ -306,7 +306,7 @@ public GlobalState(Catalog catalog, ConnectContext context) {
rules.add(FoldConstantsRule.INSTANCE);
rules.add(RewriteFromUnixTimeRule.INSTANCE);
rules.add(CompoundPredicateWriteRule.INSTANCE);
rules.add(SimplifyInvalidDateBinaryPredicatesDateRule.INSTANCE);
rules.add(RewriteDateLiteralRule.INSTANCE);
rules.add(RewriteEncryptKeyRule.INSTANCE);
rules.add(RewriteAliasFunctionRule.INSTANCE);
rules.add(RewriteLikePredicateRule.INSTANCE);
Expand Down
132 changes: 125 additions & 7 deletions fe/fe-core/src/main/java/org/apache/doris/analysis/DateLiteral.java
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@
import org.apache.doris.thrift.TExprNodeType;

import com.google.common.base.Preconditions;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;

import org.apache.logging.log4j.LogManager;
Expand All @@ -41,7 +40,6 @@
import java.nio.ByteBuffer;
import java.time.Year;
import java.util.Date;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.TimeZone;
Expand Down Expand Up @@ -89,8 +87,10 @@ public class DateLiteral extends LiteralExpr {
private static Map<String, Integer> MONTH_NAME_DICT = Maps.newHashMap();
private static Map<String, Integer> MONTH_ABBR_NAME_DICT = Maps.newHashMap();
private static Map<String, Integer> WEEK_DAY_NAME_DICT = Maps.newHashMap();
private static Map<String, Integer> WEEK_DAY_ABBR_NAME_DICT = Maps.newHashMap();
private static List<Integer> DAYS_IN_MONTH = Lists.newArrayList(0, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31);
private final static int[] DAYS_IN_MONTH = new int[] {0, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31};
private final static int ALLOW_SPACE_MASK = 4 | 64;
private final static int MAX_DATE_PARTS = 8;
private final static int YY_PART_YEAR = 70;

static {
try {
Expand Down Expand Up @@ -1068,7 +1068,7 @@ private boolean checkRange() {
|| microsecond > MAX_MICROSECOND;
}
private boolean checkDate() {
if (month != 0 && day > DAYS_IN_MONTH.get((int)month)){
if (month != 0 && day > DAYS_IN_MONTH[((int) month)]){
if (month == 2 && day == 29 && Year.isLeap(year)) {
return false;
}
Expand Down Expand Up @@ -1137,8 +1137,8 @@ private void getDateFromDaynr(long daynr) throws InvalidFormatException {
}
}
this.month = 1;
while (daysOfYear > DAYS_IN_MONTH.get((int) this.month)) {
daysOfYear -= DAYS_IN_MONTH.get((int) this.month);
while (daysOfYear > DAYS_IN_MONTH[(int) this.month]) {
daysOfYear -= DAYS_IN_MONTH[(int) this.month];
this.month++;
}
this.day = daysOfYear + leapDay;
Expand Down Expand Up @@ -1172,4 +1172,122 @@ private int checkWord(Map<String, Integer> dict, String value) throws InvalidFor
}
throw new InvalidFormatException("'" + value + "' is invalid");
}

// The interval format is that with no delimiters
// YYYY-MM-DD HH-MM-DD.FFFFFF AM in default format, and now doris will skip part 7
// 0 1 2 3 4 5 6 7
public void fromDateStr(String dateStr) throws AnalysisException {
dateStr = dateStr.trim();
if (dateStr.isEmpty()) {
throw new AnalysisException("parse datetime value failed: " + dateStr);
}
int[] dateVal = new int[MAX_DATE_PARTS];
int[] dateLen = new int[MAX_DATE_PARTS];

// Fix year length
int pre = 0;
int pos = 0;
while (pos < dateStr.length() && (Character.isDigit(dateStr.charAt(pos)) || dateStr.charAt(pos) == 'T')) {
pos++;
}
int yearLen = 4;
int digits = pos - pre;
boolean isIntervalFormat = false;
// For YYYYMMDD/YYYYMMDDHHMMSS is 4 digits years
if (pos == dateStr.length() || dateStr.charAt(pos) == '.') {
if (digits == 4 || digits == 8 || digits >= 14) {
yearLen = 4;
} else {
yearLen = 2;
}
isIntervalFormat = true;
}

int fieldIdx = 0;
int fieldLen = yearLen;
while (pre < dateStr.length() && Character.isDigit(dateStr.charAt(pre)) && fieldIdx < MAX_DATE_PARTS - 1) {
int start = pre;
int temp_val = 0;
boolean scanToDelim = (!isIntervalFormat) && (fieldIdx != 6);
while (pre < dateStr.length() && Character.isDigit(dateStr.charAt(pre)) && (scanToDelim || fieldLen-- != 0)) {
temp_val = temp_val * 10 + (dateStr.charAt(pre++) - '0');
}
dateVal[fieldIdx] = temp_val;
dateLen[fieldIdx] = pre - start;
fieldLen = 2;

if (pre == dateStr.length()) {
fieldIdx++;
break;
}

if (fieldIdx == 2 && dateStr.charAt(pre) == 'T') {
// YYYYMMDDTHHMMDD, skip 'T' and continue
pre++;
fieldIdx++;
continue;
}

// Second part
if (fieldIdx == 5) {
if (dateStr.charAt(pre) == '.') {
pre++;
fieldLen = 6;
} else if (Character.isDigit(dateStr.charAt(pre))) {
fieldIdx++;
break;
}
fieldIdx++;
continue;
}
// escape separator
while (pre < dateStr.length() && (Character.toString(dateStr.charAt(pre)).matches("\\p{Punct}"))
|| Character.isSpaceChar(dateStr.charAt(pre))) {
if (Character.isSpaceChar(dateStr.charAt(pre))) {
if (((1 << fieldIdx) & ALLOW_SPACE_MASK) == 0) {
throw new AnalysisException("parse datetime value failed: " + dateStr);
}
}
pre++;
}
fieldIdx++;
}
int numField = fieldIdx;
if (!isIntervalFormat) {
yearLen = dateLen[0];
}
for (; fieldIdx < MAX_DATE_PARTS; ++fieldIdx) {
dateLen[fieldIdx] = 0;
dateVal[fieldIdx] = 0;
}
if (yearLen == 2) {
if (dateVal[0] < YY_PART_YEAR) {
dateVal[0] += 2000;
} else {
dateVal[0] += 1900;
}
}

if (numField < 3) {
throw new AnalysisException("parse datetime value failed: " + dateStr);
}

year = dateVal[0];
month = dateVal[1];
day = dateVal[2];
hour = dateVal[3];
minute = dateVal[4];
second = dateVal[5];
microsecond = dateVal[6];

if (numField == 3) {
type = Type.DATE;
} else {
type = Type.DATETIME;
}

if (checkRange() || checkDate()) {
throw new AnalysisException("Datetime value is out of range: " + dateStr);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ public class ExprRewriter {
private final List<ExprRewriteRule> rules_;

// The type of clause that executes the rule.
// This type is only used in InferFiltersRule, other rules are not used
// This type is only used in InferFiltersRule, RewriteDateLiteralRule, other rules are not used
public enum ClauseType {
ON_CLAUSE,
WHERE_CLAUSE,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,23 +20,23 @@
import org.apache.doris.analysis.Analyzer;
import org.apache.doris.analysis.BinaryPredicate;
import org.apache.doris.analysis.CastExpr;
import org.apache.doris.analysis.DateLiteral;
import org.apache.doris.analysis.Expr;
import org.apache.doris.analysis.LiteralExpr;
import org.apache.doris.analysis.NullLiteral;
import org.apache.doris.common.AnalysisException;
import org.apache.doris.common.NotLiteralExprPredicate;

/**
* this rule try to convert date expression, if date is invalid, it will be
* converted into null literal to avoid to scan all partitions
* if a date data is invalid or contains nanosecond, it will be convert into CastExpr
* converted into null literal to avoid scanning all partitions
* if a date data is invalid, Doris will try to cast it as datetime firstly,
* only support rewriting pattern: slot + operator + date literal
* Examples:
* date = "2020-10-32" => NULL
* date = "2020-10-32" will throw analysis exception when in on clause or where clause,
* and be converted to be NULL when in other clause
*/
public class SimplifyInvalidDateBinaryPredicatesDateRule implements ExprRewriteRule {
public static ExprRewriteRule INSTANCE = new SimplifyInvalidDateBinaryPredicatesDateRule();
public static final int DATETIME_STRING_MAX_LENGTH = new String("yyyy-MM-dd HH:ii:ss").length();
private static final NotLiteralExprPredicate NOT_LITERAL_EXPR_PREDICATE = new NotLiteralExprPredicate();
public class RewriteDateLiteralRule implements ExprRewriteRule {
public final static ExprRewriteRule INSTANCE = new RewriteDateLiteralRule();

@Override
public Expr apply(Expr expr, Analyzer analyzer, ExprRewriter.ClauseType clauseType) throws AnalysisException {
Expand All @@ -52,28 +52,22 @@ public Expr apply(Expr expr, Analyzer analyzer, ExprRewriter.ClauseType clauseTy
if (!valueExpr.isConstant()) {
return expr;
}

// This is not a very good implementation and tricky.
// We have to handle the following cases:
// A. k1 is datetime, sql with "k1 > to_date(now())" will be converted to k1 > cast(to_date("xxxx-xx-xx"))
// B. k1 is datetime, sql with "k1 > '2021-10-32 10:00:00.100010'" will be converted to k1 > cast('2021-10-32 10:00:00.100010' as datetime)
// C. k1 is datetime, sql with "k1 > '2021-10-32'" will be converted to k1 > cast('2021-10-32' as datetime), and finally to converted to NullLiteral.
// Only consider CastExpr and try our best to convert non-date_literal to date_literal,to be compatible with MySQL
if (valueExpr instanceof CastExpr) {
valueExpr = valueExpr.getChild(0);
if (valueExpr.contains(NOT_LITERAL_EXPR_PREDICATE)) {
// Case A.
return expr;
}
String dateStr = valueExpr.toSql();
if (dateStr.length() > DATETIME_STRING_MAX_LENGTH && dateStr.contains(".")) {
// Case B
return expr;
}
// Case C
return new NullLiteral();
} else {
if (valueExpr.contains(NOT_LITERAL_EXPR_PREDICATE)) {
return expr;
Expr childExpr = valueExpr.getChild(0);
if (childExpr instanceof LiteralExpr) {
try {
String dateStr = childExpr.getStringValue();
DateLiteral dateLiteral = new DateLiteral();
dateLiteral.fromDateStr(dateStr);
expr.setChild(1, dateLiteral);
} catch (AnalysisException e) {
if (clauseType == ExprRewriter.ClauseType.OTHER_CLAUSE) {
return new NullLiteral();
} else {
throw new AnalysisException("Incorrect datetime value: " + valueExpr.toSql() + " in expression: " + expr.toSql());
}
}
}
}
return expr;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,6 @@

package org.apache.doris.rewrite;

import org.apache.commons.lang3.StringUtils;
import org.apache.doris.common.AnalysisException;
import org.apache.doris.common.FeConstants;

import org.apache.doris.qe.SessionVariable;
Expand All @@ -32,17 +30,12 @@
import org.junit.BeforeClass;
import org.junit.Test;

import java.util.Map;
import java.util.UUID;



public class InferFiltersRuleTest {

private static final Logger LOG = LogManager.getLogger(InferFiltersRuleTest.class);
private static String baseDir = "fe";
private static String runningDir = baseDir + "/mocked/InferFiltersRuleTest/"
+ UUID.randomUUID().toString() + "/";
+ UUID.randomUUID() + "/";
private static DorisAssert dorisAssert;
private static final String DB_NAME = "db1";
private static final String TABLE_NAME_1 = "tb1";
Expand Down
Loading