Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 21 additions & 28 deletions be/src/olap/delete_handler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,6 @@
#include <gen_cpp/olap_file.pb.h>
#include <thrift/protocol/TDebugProtocol.h>

#include <boost/regex.hpp>
#include <sstream>
#include <string>
#include <vector>

Expand All @@ -40,12 +38,10 @@
using apache::thrift::ThriftDebugString;
using std::vector;
using std::string;
using std::stringstream;

using ::google::protobuf::RepeatedPtrField;

namespace doris {
using namespace ErrorCode;

// construct sub condition from TCondition
std::string construct_sub_predicate(const TCondition& condition) {
Expand Down Expand Up @@ -312,38 +308,35 @@ Status DeleteHandler::parse_condition(const DeleteSubPredicatePB& sub_cond, TCon
// value: matches "1597751948193618247 and length(source)<1;\n;\n"
//
// For more info, see DeleteHandler::construct_sub_predicates
// FIXME(gavin): support unicode. And this is a tricky implementation, it should
// not be the final resolution, refactor it.
// FIXME(gavin): This is a tricky implementation, it should not be the final resolution, refactor it.
const char* const CONDITION_STR_PATTERN =
// .----------------- column-name ----------------. .----------------------- operator ------------------------. .------------ value ----------.
R"(([_a-zA-Z@0-9\s/][.a-zA-Z0-9_+-/?@#$%^&*"\s,:]*)\s*((?:=)|(?:!=)|(?:>>)|(?:<<)|(?:>=)|(?:<=)|(?:\*=)|(?: IS ))\s*('((?:[\s\S]+)?)'|(?:[\s\S]+)?))";
// '----------------- group 1 --------------------' '--------------------- group 2 ---------------------------' | '-- group 4--' |
// match any of: = != >> << >= <= *= " IS " '----------- group 3 ---------'
// match **ANY THING** without(4)
// or with(3) single quote
boost::regex DELETE_HANDLER_REGEX(CONDITION_STR_PATTERN);
// .----------------- column-name --------------------------. .----------------------- operator ------------------------. .------------ value ----------.
R"(([_a-zA-Z@0-9\s/\p{L}][.a-zA-Z0-9_+-/?@#$%^&*"\s,:\p{L}]*)\s*((?:=)|(?:!=)|(?:>>)|(?:<<)|(?:>=)|(?:<=)|(?:\*=)|(?: IS ))\s*('((?:[\s\S]+)?)'|(?:[\s\S]+)?))";
// '----------------- group 1 ------------------------------' '--------------------- group 2 ---------------------------' | '-- group 4--' |
// match any of: = != >> << >= <= *= " IS " '----------- group 3 ---------'
// match **ANY THING** without(4)
// or with(3) single quote
// clang-format on
RE2 DELETE_HANDLER_REGEX(CONDITION_STR_PATTERN);

Status DeleteHandler::parse_condition(const std::string& condition_str, TCondition* condition) {
bool matched = false;
boost::smatch what;
try {
VLOG_NOTICE << "condition_str: " << condition_str;
matched = boost::regex_match(condition_str, what, DELETE_HANDLER_REGEX) &&
condition_str.size() == what[0].str().size(); // exact match
} catch (boost::regex_error& e) {
VLOG_NOTICE << "fail to parse expr. [expr=" << condition_str << "; error=" << e.what()
<< "]";
}
std::string col_name, op, value, g4;

bool matched = RE2::FullMatch(condition_str, DELETE_HANDLER_REGEX, &col_name, &op, &value,
&g4); // exact match

if (!matched) {
return Status::Error<ErrorCode::INVALID_ARGUMENT>("fail to sub condition. condition={}",
condition_str);
return Status::InvalidArgument("fail to sub condition. condition={}", condition_str);
}

condition->column_name = what[1].str();
condition->condition_op = what[2].str() == " IS " ? "IS" : what[2].str();
condition->column_name = col_name;
condition->condition_op = op == " IS " ? "IS" : op;
// match string with single quotes, a = b or a = 'b'
condition->condition_values.push_back(what[3 + !!what[4].matched].str());
if (!g4.empty()) {
condition->condition_values.push_back(g4);
} else {
condition->condition_values.push_back(value);
}
VLOG_NOTICE << "parsed condition_str: col_name={" << condition->column_name << "} op={"
<< condition->condition_op << "} val={" << condition->condition_values.back()
<< "}";
Expand Down
4 changes: 4 additions & 0 deletions be/test/olap/delete_handler_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1225,6 +1225,10 @@ TEST_F(TestDeleteHandler, TestParseDeleteCondition) {
{R"(a IS b IS NOT NULL)", true, gen_cond(R"(a IS b)", "IS", R"(NOT NULL)" )}, // test " IS " in column name
{R"(_a-zA-Z@0-9 /.a-zA-Z0-9_+-/?@#$%^&*" ,:=hell)", true, gen_cond(R"(_a-zA-Z@0-9 /.a-zA-Z0-9_+-/?@#$%^&*" ,:)", "=", R"(hell)")}, // hellbound column name
{R"(this is a col very loooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooon colum name=long)", true, gen_cond(R"(this is a col very loooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooon colum name)", "=", R"(long)")}, // test " IS " in column name
{R"(中文列名1=b)" , true, gen_cond(R"(中文列名1)", "=" , R"(b)" )}, // Chinese case
{R"(错!!误!=b)" , false, gen_cond(R"(abc)" , "!=", R"(b)" )}, // illegal character
{R"(##错误<=b)" , false, gen_cond(R"(abc)" , "<=", R"(b)" )}, // illegal prefix
{R"(κάνεις지내세요>>b)" , true, gen_cond(R"(κάνεις지내세요)", ">>", R"(b)" )}, // other languages
};
for (auto& i : test_input) { test(i); }
}
Expand Down
6 changes: 6 additions & 0 deletions regression-test/data/delete_p0/test_delete_unicode.out
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
-- This file is automatically generated. You should know what you did if you want to edit this
-- !sql1 --
2020-12-12 1 1 1

-- !sql2 --

39 changes: 39 additions & 0 deletions regression-test/suites/delete_p0/test_delete_unicode.groovy
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

suite("test_delete_unicode") {
sql "set enable_unicode_name_support=true;"

sql """
CREATE TABLE `table_7298276` (
`中文列名1` date NOT NULL,
`中文列名2` int NOT NULL,
`中文列名3` bigint NOT NULL,
`中文列名4` largeint NOT NULL,
INDEX 中文列名2 (`中文列名2`) USING INVERTED,
INDEX 中文列名4 (`中文列名4`) USING INVERTED
) ENGINE=OLAP
DUPLICATE KEY(`中文列名1`, `中文列名2`, `中文列名3`)
DISTRIBUTED BY HASH(`中文列名1`, `中文列名2`, `中文列名3`) BUCKETS 4
properties("replication_num" = "1");
"""

sql """ insert into table_7298276 values ('2020-12-12',1,1,1);"""
qt_sql1 "select * from table_7298276;"
sql "delete from table_7298276 where 中文列名1 > '2012-08-17' and 中文列名2 > -68 and 中文列名3 in (1,2,3);"
qt_sql2 "select * from table_7298276;"
}