From 5ef331efaadc9b9beac86563cb91f813e63cbf21 Mon Sep 17 00:00:00 2001 From: Projjal Chanda Date: Tue, 19 Nov 2019 15:23:58 +0530 Subject: [PATCH 1/4] implemented regexp_matches function --- cpp/src/gandiva/CMakeLists.txt | 5 +- cpp/src/gandiva/expr_decomposer.cc | 4 +- cpp/src/gandiva/function_holder_registry.h | 7 +- cpp/src/gandiva/function_registry_string.cc | 4 + cpp/src/gandiva/gdv_function_stubs.cc | 18 +- cpp/src/gandiva/gdv_function_stubs.h | 3 + cpp/src/gandiva/like_holder.cc | 62 +----- cpp/src/gandiva/like_holder.h | 23 +-- cpp/src/gandiva/regexp_matches_holder.cc | 71 +++++++ cpp/src/gandiva/regexp_matches_holder.h | 54 +++++ cpp/src/gandiva/regexp_matches_holder_test.cc | 192 ++++++++++++++++++ cpp/src/gandiva/sql_like_holder.cc | 76 +++++++ cpp/src/gandiva/sql_like_holder.h | 56 +++++ ...holder_test.cc => sql_like_holder_test.cc} | 56 ++--- 14 files changed, 524 insertions(+), 107 deletions(-) create mode 100644 cpp/src/gandiva/regexp_matches_holder.cc create mode 100644 cpp/src/gandiva/regexp_matches_holder.h create mode 100644 cpp/src/gandiva/regexp_matches_holder_test.cc create mode 100644 cpp/src/gandiva/sql_like_holder.cc create mode 100644 cpp/src/gandiva/sql_like_holder.h rename cpp/src/gandiva/{like_holder_test.cc => sql_like_holder_test.cc} (66%) diff --git a/cpp/src/gandiva/CMakeLists.txt b/cpp/src/gandiva/CMakeLists.txt index 60bf63a914a..e88a00f6ee1 100644 --- a/cpp/src/gandiva/CMakeLists.txt +++ b/cpp/src/gandiva/CMakeLists.txt @@ -71,8 +71,10 @@ set(SRC_FILES like_holder.cc literal_holder.cc projector.cc + regexp_matches_holder.cc regex_util.cc selection_vector.cc + sql_like_holder.cc tree_expr_builder.cc to_date_holder.cc random_generator_holder.cc @@ -204,7 +206,8 @@ add_gandiva_test(internals-test lru_cache_test.cc to_date_holder_test.cc simple_arena_test.cc - like_holder_test.cc + sql_like_holder_test.cc + regexp_matches_holder_test.cc decimal_type_util_test.cc random_generator_holder_test.cc) diff --git a/cpp/src/gandiva/expr_decomposer.cc b/cpp/src/gandiva/expr_decomposer.cc index 0902468e31e..a56b27053ba 100644 --- a/cpp/src/gandiva/expr_decomposer.cc +++ b/cpp/src/gandiva/expr_decomposer.cc @@ -53,7 +53,9 @@ Status ExprDecomposer::Visit(const FieldNode& node) { // time. const FunctionNode ExprDecomposer::TryOptimize(const FunctionNode& node) { if (node.descriptor()->name() == "like") { - return LikeHolder::TryOptimize(node); + return SQLLikeHolder::TryOptimize(node); + } else if (node.descriptor()->name() == "regexp_matches" || node.descriptor()->name() == "regexp_like"){ + return RegexpMatchesHolder::TryOptimize(node); } else { return node; } diff --git a/cpp/src/gandiva/function_holder_registry.h b/cpp/src/gandiva/function_holder_registry.h index a2baa024b99..6f8a6d57772 100644 --- a/cpp/src/gandiva/function_holder_registry.h +++ b/cpp/src/gandiva/function_holder_registry.h @@ -26,7 +26,8 @@ #include "arrow/status.h" #include "gandiva/function_holder.h" -#include "gandiva/like_holder.h" +#include "gandiva/sql_like_holder.h" +#include "gandiva/regexp_matches_holder.h" #include "gandiva/node.h" #include "gandiva/random_generator_holder.h" #include "gandiva/to_date_holder.h" @@ -62,7 +63,9 @@ class FunctionHolderRegistry { private: static map_type& makers() { static map_type maker_map = { - {"like", LAMBDA_MAKER(LikeHolder)}, + {"like", LAMBDA_MAKER(SQLLikeHolder)}, + {"regexp_matches", LAMBDA_MAKER(RegexpMatchesHolder)}, + {"regexp_like", LAMBDA_MAKER(RegexpMatchesHolder)}, {"to_date", LAMBDA_MAKER(ToDateHolder)}, {"random", LAMBDA_MAKER(RandomGeneratorHolder)}, {"rand", LAMBDA_MAKER(RandomGeneratorHolder)}, diff --git a/cpp/src/gandiva/function_registry_string.cc b/cpp/src/gandiva/function_registry_string.cc index 3a7066d87e0..fa693e28e0b 100644 --- a/cpp/src/gandiva/function_registry_string.cc +++ b/cpp/src/gandiva/function_registry_string.cc @@ -76,6 +76,10 @@ std::vector GetStringFunctionRegistry() { kResultNullIfNull, "gdv_fn_like_utf8_utf8", NativeFunction::kNeedsFunctionHolder), + NativeFunction("regexp_matches", {"regexp_like"}, DataTypeVector{utf8(), utf8()}, boolean(), + kResultNullIfNull, "gdv_fn_regexp_matches_utf8_utf8", + NativeFunction::kNeedsFunctionHolder), + NativeFunction("substr", {"substring"}, DataTypeVector{utf8(), int64() /*offset*/, int64() /*length*/}, utf8(), kResultNullIfNull, "substr_utf8_int64_int64", diff --git a/cpp/src/gandiva/gdv_function_stubs.cc b/cpp/src/gandiva/gdv_function_stubs.cc index da7a03b312c..bbe36aabe93 100644 --- a/cpp/src/gandiva/gdv_function_stubs.cc +++ b/cpp/src/gandiva/gdv_function_stubs.cc @@ -23,7 +23,7 @@ #include "gandiva/engine.h" #include "gandiva/exported_funcs.h" #include "gandiva/in_holder.h" -#include "gandiva/like_holder.h" +#include "gandiva/sql_like_holder.h" #include "gandiva/random_generator_holder.h" #include "gandiva/to_date_holder.h" @@ -37,6 +37,11 @@ bool gdv_fn_like_utf8_utf8(int64_t ptr, const char* data, int data_len, return (*holder)(std::string(data, data_len)); } +bool gdv_fn_regexp_matches_utf8_utf8(int64_t ptr, const char* data, int data_len, +const char* pattern, int pattern_len) { + return gdv_fn_like_utf8_utf8(ptr, data, data_len, pattern, pattern_len); +} + double gdv_fn_random(int64_t ptr) { gandiva::RandomGeneratorHolder* holder = reinterpret_cast(ptr); @@ -187,6 +192,17 @@ void ExportedStubFunctions::AddMappings(Engine* engine) const { types->i1_type() /*return_type*/, args, reinterpret_cast(gdv_fn_like_utf8_utf8)); + // gdv_fn_regexp_matches_utf8_utf8 + args = {types->i64_type(), // int64_t ptr + types->i8_ptr_type(), // const char* data + types->i32_type(), // int data_len + types->i8_ptr_type(), // const char* pattern + types->i32_type()}; // int pattern_len + + engine->AddGlobalMappingForFunc("gdv_fn_regexp_matches_utf8_utf8", + types->i1_type() /*return_type*/, args, + reinterpret_cast(gdv_fn_regexp_matches_utf8_utf8)); + // gdv_fn_to_date_utf8_utf8_int32 args = {types->i64_type(), // int64_t execution_context types->i64_type(), // int64_t holder_ptr diff --git a/cpp/src/gandiva/gdv_function_stubs.h b/cpp/src/gandiva/gdv_function_stubs.h index fcdf7d6ac66..f8bdcdc38c9 100644 --- a/cpp/src/gandiva/gdv_function_stubs.h +++ b/cpp/src/gandiva/gdv_function_stubs.h @@ -26,6 +26,9 @@ extern "C" { bool gdv_fn_like_utf8_utf8(int64_t ptr, const char* data, int data_len, const char* pattern, int pattern_len); +bool gdv_fn_regexp_matches_utf8_utf8(int64_t ptr, const char* data, int data_len, + const char* pattern, int pattern_len); + int64_t gdv_fn_to_date_utf8_utf8_int32(int64_t context, int64_t ptr, const char* data, int data_len, bool in1_validity, const char* pattern, int pattern_len, diff --git a/cpp/src/gandiva/like_holder.cc b/cpp/src/gandiva/like_holder.cc index 404105b1070..71f67376d89 100644 --- a/cpp/src/gandiva/like_holder.cc +++ b/cpp/src/gandiva/like_holder.cc @@ -17,76 +17,30 @@ #include "gandiva/like_holder.h" -#include #include "gandiva/node.h" -#include "gandiva/regex_util.h" namespace gandiva { -RE2 LikeHolder::starts_with_regex_(R"((\w|\s)*\.\*)"); -RE2 LikeHolder::ends_with_regex_(R"(\.\*(\w|\s)*)"); - -// Short-circuit pattern matches for the two common sub cases : -// - starts_with and ends_with. -const FunctionNode LikeHolder::TryOptimize(const FunctionNode& node) { - std::shared_ptr holder; - auto status = Make(node, &holder); - if (status.ok()) { - std::string& pattern = holder->pattern_; - auto literal_type = node.children().at(1)->return_type(); - - if (RE2::FullMatch(pattern, starts_with_regex_)) { - auto prefix = pattern.substr(0, pattern.length() - 2); // trim .* - auto prefix_node = - std::make_shared(literal_type, LiteralHolder(prefix), false); - return FunctionNode("starts_with", {node.children().at(0), prefix_node}, - node.return_type()); - } else if (RE2::FullMatch(pattern, ends_with_regex_)) { - auto suffix = pattern.substr(2); // skip .* - auto suffix_node = - std::make_shared(literal_type, LiteralHolder(suffix), false); - return FunctionNode("ends_with", {node.children().at(0), suffix_node}, - node.return_type()); - } - } - - // Could not optimize, return original node. - return node; -} - static bool IsArrowStringLiteral(arrow::Type::type type) { return type == arrow::Type::STRING || type == arrow::Type::BINARY; } -Status LikeHolder::Make(const FunctionNode& node, std::shared_ptr* holder) { +Status LikeHolder::Make(const FunctionNode& node, std::string* pattern) { ARROW_RETURN_IF(node.children().size() != 2, - Status::Invalid("'like' function requires two parameters")); + Status::Invalid("'" + node.descriptor()->name() + "' function requires two parameters")); auto literal = dynamic_cast(node.children().at(1).get()); ARROW_RETURN_IF( - literal == nullptr, - Status::Invalid("'like' function requires a literal as the second parameter")); + literal == nullptr, + Status::Invalid("'" + node.descriptor()->name() + "' function requires a literal as the second parameter")); auto literal_type = literal->return_type()->id(); ARROW_RETURN_IF( - !IsArrowStringLiteral(literal_type), - Status::Invalid( - "'like' function requires a string literal as the second parameter")); + !IsArrowStringLiteral(literal_type), + Status::Invalid( + "'" + node.descriptor()->name() + " function requires a string literal as the second parameter")); - return Make(arrow::util::get(literal->holder()), holder); -} - -Status LikeHolder::Make(const std::string& sql_pattern, - std::shared_ptr* holder) { - std::string pcre_pattern; - ARROW_RETURN_NOT_OK(RegexUtil::SqlLikePatternToPcre(sql_pattern, pcre_pattern)); - - auto lholder = std::shared_ptr(new LikeHolder(pcre_pattern)); - ARROW_RETURN_IF(!lholder->regex_.ok(), - Status::Invalid("Building RE2 pattern '", pcre_pattern, "' failed")); - - *holder = lholder; + *pattern = arrow::util::get(literal->holder()); return Status::OK(); } - } // namespace gandiva diff --git a/cpp/src/gandiva/like_holder.h b/cpp/src/gandiva/like_holder.h index eab30bf732f..0d61742236b 100644 --- a/cpp/src/gandiva/like_holder.h +++ b/cpp/src/gandiva/like_holder.h @@ -31,29 +31,12 @@ namespace gandiva { -/// Function Holder for SQL 'like' +/// Base class for Function Holder for pattern matching SQL functions like 'like' and 'regexp_matches' class GANDIVA_EXPORT LikeHolder : public FunctionHolder { public: - ~LikeHolder() override = default; + static Status Make(const FunctionNode& node, std::string* pattern); - static Status Make(const FunctionNode& node, std::shared_ptr* holder); - - static Status Make(const std::string& sql_pattern, std::shared_ptr* holder); - - // Try and optimise a function node with a "like" pattern. - static const FunctionNode TryOptimize(const FunctionNode& node); - - /// Return true if the data matches the pattern. - bool operator()(const std::string& data) { return RE2::FullMatch(data, regex_); } - - private: - explicit LikeHolder(const std::string& pattern) : pattern_(pattern), regex_(pattern) {} - - std::string pattern_; // posix pattern string, to help debugging - RE2 regex_; // compiled regex for the pattern - - static RE2 starts_with_regex_; // pre-compiled pattern for matching starts_with - static RE2 ends_with_regex_; // pre-compiled pattern for matching ends_with + virtual bool operator()(const std::string& data) = 0; }; } // namespace gandiva diff --git a/cpp/src/gandiva/regexp_matches_holder.cc b/cpp/src/gandiva/regexp_matches_holder.cc new file mode 100644 index 00000000000..8979072a629 --- /dev/null +++ b/cpp/src/gandiva/regexp_matches_holder.cc @@ -0,0 +1,71 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "gandiva/regexp_matches_holder.h" + +#include +#include "gandiva/node.h" +#include "gandiva/regex_util.h" + +namespace gandiva { + + RE2 RegexpMatchesHolder::starts_with_regex_(R"(\^([\w\s]+)(\.\*)?)"); + RE2 RegexpMatchesHolder::ends_with_regex_(R"((\.\*)?([\w\s]+)\$)"); + + // Short-circuit pattern matches for the two common sub cases : + // - starts_with and ends_with. + const FunctionNode RegexpMatchesHolder::TryOptimize(const FunctionNode& node) { + std::shared_ptr holder; + auto status = Make(node, &holder); + if (status.ok()) { + std::string& pattern = holder->pattern_; + auto literal_type = node.children().at(1)->return_type(); + std::string substr; + if (RE2::FullMatch(pattern, starts_with_regex_, &substr)) { + auto prefix_node = + std::make_shared(literal_type, LiteralHolder(substr), false); + return FunctionNode("starts_with", {node.children().at(0), prefix_node}, + node.return_type()); + } else if (RE2::FullMatch(pattern, ends_with_regex_, (void *)NULL, &substr)) { + auto suffix_node = + std::make_shared(literal_type, LiteralHolder(substr), false); + return FunctionNode("ends_with", {node.children().at(0), suffix_node}, + node.return_type()); + } + } + + // Could not optimize, return original node. + return node; + } + + Status RegexpMatchesHolder::Make(const FunctionNode& node, std::shared_ptr* holder) { + std::string pcre_pattern; + ARROW_RETURN_NOT_OK(LikeHolder::Make(node, &pcre_pattern)); + return Make(pcre_pattern, holder); + } + + Status RegexpMatchesHolder::Make(const std::string& pcre_pattern, + std::shared_ptr* holder) { + auto lholder = std::shared_ptr(new RegexpMatchesHolder(pcre_pattern)); + ARROW_RETURN_IF(!lholder->regex_.ok(), + Status::Invalid("Building RE2 pattern '", pcre_pattern, "' failed")); + + *holder = lholder; + return Status::OK(); + } + +} // namespace gandiva diff --git a/cpp/src/gandiva/regexp_matches_holder.h b/cpp/src/gandiva/regexp_matches_holder.h new file mode 100644 index 00000000000..ebeba9b3336 --- /dev/null +++ b/cpp/src/gandiva/regexp_matches_holder.h @@ -0,0 +1,54 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#ifndef GANDIVA_REGEXP_MATCHES_HOLDER_H +#define GANDIVA_REGEXP_MATCHES_HOLDER_H + +#include + +#include + +#include "gandiva/like_holder.h" + +namespace gandiva { + +/// Function Holder for 'regexp_matches' and 'regexp_like' functions +class GANDIVA_EXPORT RegexpMatchesHolder : public LikeHolder { + public: + ~RegexpMatchesHolder() override = default; + + static Status Make(const FunctionNode& node, std::shared_ptr* holder); + + static Status Make(const std::string& pcre_pattern, std::shared_ptr* holder); + + // Try and optimise a function node with a "regexp_matches" pattern. + static const FunctionNode TryOptimize(const FunctionNode& node); + + /// Return true if there is a match in the data. + bool operator()(const std::string& data) override {return RE2::PartialMatch(data, regex_);} + + private: + explicit RegexpMatchesHolder(const std::string& pattern) : pattern_(pattern), regex_(pattern) {} + + std::string pattern_; // posix pattern string, to help debugging + RE2 regex_; // compiled regex for the pattern + + static RE2 starts_with_regex_; // pre-compiled pattern for matching starts_with + static RE2 ends_with_regex_; // pre-compiled pattern for matching ends_with +}; +} +#endif //GANDIVA_REGEXP_MATCHES_HOLDER_H diff --git a/cpp/src/gandiva/regexp_matches_holder_test.cc b/cpp/src/gandiva/regexp_matches_holder_test.cc new file mode 100644 index 00000000000..867e483e14a --- /dev/null +++ b/cpp/src/gandiva/regexp_matches_holder_test.cc @@ -0,0 +1,192 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "gandiva/regexp_matches_holder.h" +#include "gandiva/regex_util.h" + +#include +#include + +#include + +namespace gandiva { + +class TestRegexpMatchesHolder : public ::testing::Test { +public: + FunctionNode BuildRegexpMatches(std::string pattern) { + auto field = std::make_shared(arrow::field("in", arrow::utf8())); + auto pattern_node = + std::make_shared(arrow::utf8(), LiteralHolder(pattern), false); + return FunctionNode("regexp_matches", {field, pattern_node}, arrow::boolean()); + } +}; + +TEST_F(TestRegexpMatchesHolder, TestString) { + std::shared_ptr regexp_matches_holder; + + auto status = RegexpMatchesHolder::Make("ab", ®exp_matches_holder); + EXPECT_EQ(status.ok(), true) << status.message(); + + auto& like = *regexp_matches_holder; + EXPECT_TRUE(like("ab")); + EXPECT_TRUE(like("abc")); + EXPECT_TRUE(like("abcd")); + EXPECT_TRUE(like("cab")); + + EXPECT_FALSE(like("a")); +} + +TEST_F(TestRegexpMatchesHolder, TestDotStar) { + std::shared_ptr regexp_matches_holder; + + auto status = RegexpMatchesHolder::Make("a.*b", ®exp_matches_holder); + EXPECT_EQ(status.ok(), true) << status.message(); + + auto& like = *regexp_matches_holder; + EXPECT_TRUE(like("ab")); + EXPECT_TRUE(like("adeb")); + EXPECT_TRUE(like("abc")); + EXPECT_TRUE(like("cabc")); + EXPECT_TRUE(like("caebf")); + + EXPECT_FALSE(like("ba")); + EXPECT_FALSE(like("a")); +} + +TEST_F(TestRegexpMatchesHolder, TestDot) { + std::shared_ptr regexp_matches_holder; + + auto status = RegexpMatchesHolder::Make("ab.", ®exp_matches_holder); + EXPECT_EQ(status.ok(), true) << status.message(); + + auto& like = *regexp_matches_holder; + EXPECT_TRUE(like("abc")); + EXPECT_TRUE(like("abd")); + EXPECT_TRUE(like("abcd")); + EXPECT_TRUE(like("dabc")); + + EXPECT_FALSE(like("a")); + EXPECT_FALSE(like("ab")); +} + +TEST_F(TestRegexpMatchesHolder, TestAnchors) { + std::shared_ptr regexp_matches_holder; + + auto status = RegexpMatchesHolder::Make("^ab.*c$", ®exp_matches_holder); + EXPECT_EQ(status.ok(), true) << status.message(); + + auto& like = *regexp_matches_holder; + EXPECT_TRUE(like("abdc")); + EXPECT_TRUE(like("abc")); + + EXPECT_FALSE(like("abcd")); + EXPECT_FALSE(like("dabc")); +} + +TEST_F(TestRegexpMatchesHolder, TestIgnoreCase) { + std::shared_ptr regexp_matches_holder; + + auto status = RegexpMatchesHolder::Make("(?i)ab", ®exp_matches_holder); + EXPECT_EQ(status.ok(), true) << status.message(); + + auto& like = *regexp_matches_holder; + EXPECT_TRUE(like("abc")); + EXPECT_TRUE(like("daBc")); + EXPECT_TRUE(like("CAB")); + + EXPECT_FALSE(like("ba")); +} + +TEST_F(TestRegexpMatchesHolder, TestCharacterClass) { + std::shared_ptr regexp_matches_holder; + + auto status = RegexpMatchesHolder::Make("[ab]c", ®exp_matches_holder); + EXPECT_EQ(status.ok(), true) << status.message(); + + auto& like = *regexp_matches_holder; + EXPECT_TRUE(like("acd")); + EXPECT_TRUE(like("ebc")); + EXPECT_TRUE(like("abc")); + + EXPECT_FALSE(like("ab")); +} + + +TEST_F(TestRegexpMatchesHolder, TestEscapeCharacter) { + std::shared_ptr regexp_matches_holder; + + auto status = RegexpMatchesHolder::Make("\\.\\*", ®exp_matches_holder); + EXPECT_EQ(status.ok(), true) << status.message(); + + auto& like = *regexp_matches_holder; + EXPECT_TRUE(like(".*")); + + EXPECT_FALSE(like("ab")); +} + +TEST_F(TestRegexpMatchesHolder, TestNonAsciiMatches) { + std::shared_ptr regexp_matches_holder; + + auto status = RegexpMatchesHolder::Make(".*çåå†.*", ®exp_matches_holder); + EXPECT_EQ(status.ok(), true) << status.message(); + + auto& like = *regexp_matches_holder; + EXPECT_TRUE(like("açåå†b")); + + EXPECT_FALSE(like("ab")); +} + +TEST_F(TestRegexpMatchesHolder, TestOptimise) { + // optimise for 'starts_with' + auto fnode = RegexpMatchesHolder::TryOptimize(BuildRegexpMatches("^abc")); + EXPECT_EQ(fnode.descriptor()->name(), "starts_with"); + EXPECT_EQ(fnode.ToString(), "bool starts_with((string) in, (const string) abc)"); + + fnode = RegexpMatchesHolder::TryOptimize(BuildRegexpMatches("^abc.*")); + EXPECT_EQ(fnode.descriptor()->name(), "starts_with"); + EXPECT_EQ(fnode.ToString(), "bool starts_with((string) in, (const string) abc)"); + + fnode = RegexpMatchesHolder::TryOptimize(BuildRegexpMatches("^ab cd")); + EXPECT_EQ(fnode.descriptor()->name(), "starts_with"); + EXPECT_EQ(fnode.ToString(), "bool starts_with((string) in, (const string) ab cd)"); + + // optimise for 'ends_with' + fnode = RegexpMatchesHolder::TryOptimize(BuildRegexpMatches("xyz$")); + EXPECT_EQ(fnode.descriptor()->name(), "ends_with"); + EXPECT_EQ(fnode.ToString(), "bool ends_with((string) in, (const string) xyz)"); + + fnode = RegexpMatchesHolder::TryOptimize(BuildRegexpMatches(".*xyz$")); + EXPECT_EQ(fnode.descriptor()->name(), "ends_with"); + EXPECT_EQ(fnode.ToString(), "bool ends_with((string) in, (const string) xyz)"); + + // no optimisation for others. + fnode = RegexpMatchesHolder::TryOptimize(BuildRegexpMatches("^xyz$")); + EXPECT_EQ(fnode.descriptor()->name(), "regexp_matches"); + + fnode = RegexpMatchesHolder::TryOptimize(BuildRegexpMatches("^xy.*z")); + EXPECT_EQ(fnode.descriptor()->name(), "regexp_matches"); + + fnode = RegexpMatchesHolder::TryOptimize(BuildRegexpMatches("^.*")); + EXPECT_EQ(fnode.descriptor()->name(), "regexp_matches"); + + fnode = RegexpMatchesHolder::TryOptimize(BuildRegexpMatches("x.yz$")); + EXPECT_EQ(fnode.descriptor()->name(), "regexp_matches"); + + fnode = RegexpMatchesHolder::TryOptimize(BuildRegexpMatches("^[xyz]")); + EXPECT_EQ(fnode.descriptor()->name(), "regexp_matches"); +} +} // namespace gandiva diff --git a/cpp/src/gandiva/sql_like_holder.cc b/cpp/src/gandiva/sql_like_holder.cc new file mode 100644 index 00000000000..08c8a600ca6 --- /dev/null +++ b/cpp/src/gandiva/sql_like_holder.cc @@ -0,0 +1,76 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "gandiva/sql_like_holder.h" + +#include +#include "gandiva/node.h" +#include "gandiva/regex_util.h" + +namespace gandiva { + + RE2 SQLLikeHolder::starts_with_regex_(R"((\w|\s)*\.\*)"); + RE2 SQLLikeHolder::ends_with_regex_(R"(\.\*(\w|\s)*)"); + +// Short-circuit pattern matches for the two common sub cases : +// - starts_with and ends_with. + const FunctionNode SQLLikeHolder::TryOptimize(const FunctionNode& node) { + std::shared_ptr holder; + auto status = Make(node, &holder); + if (status.ok()) { + std::string& pattern = holder->pattern_; + auto literal_type = node.children().at(1)->return_type(); + + if (RE2::FullMatch(pattern, starts_with_regex_)) { + auto prefix = pattern.substr(0, pattern.length() - 2); // trim .* + auto prefix_node = + std::make_shared(literal_type, LiteralHolder(prefix), false); + return FunctionNode("starts_with", {node.children().at(0), prefix_node}, + node.return_type()); + } else if (RE2::FullMatch(pattern, ends_with_regex_)) { + auto suffix = pattern.substr(2); // skip .* + auto suffix_node = + std::make_shared(literal_type, LiteralHolder(suffix), false); + return FunctionNode("ends_with", {node.children().at(0), suffix_node}, + node.return_type()); + } + } + + // Could not optimize, return original node. + return node; + } + + Status SQLLikeHolder::Make(const FunctionNode& node, std::shared_ptr* holder) { + std::string sql_pattern; + ARROW_RETURN_NOT_OK(LikeHolder::Make(node, &sql_pattern)); + return Make(sql_pattern, holder); + } + + Status SQLLikeHolder::Make(const std::string& sql_pattern, + std::shared_ptr* holder) { + std::string pcre_pattern; + ARROW_RETURN_NOT_OK(RegexUtil::SqlLikePatternToPcre(sql_pattern, pcre_pattern)); + + auto lholder = std::shared_ptr(new SQLLikeHolder(pcre_pattern)); + ARROW_RETURN_IF(!lholder->regex_.ok(), + Status::Invalid("Building RE2 pattern '", pcre_pattern, "' failed")); + + *holder = lholder; + return Status::OK(); + } + +} // namespace gandiva diff --git a/cpp/src/gandiva/sql_like_holder.h b/cpp/src/gandiva/sql_like_holder.h new file mode 100644 index 00000000000..beaea1af017 --- /dev/null +++ b/cpp/src/gandiva/sql_like_holder.h @@ -0,0 +1,56 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#ifndef GANDIVA_SQL_LIKE_HOLDER_H +#define GANDIVA_SQL_LIKE_HOLDER_H + +#include + +#include + +#include "gandiva/like_holder.h" + +namespace gandiva { + +/// Function Holder for SQL 'like' +class GANDIVA_EXPORT SQLLikeHolder : public LikeHolder { + public: + ~SQLLikeHolder() override = default; + + static Status Make(const FunctionNode& node, std::shared_ptr* holder); + + static Status Make(const std::string& sql_pattern, std::shared_ptr* holder); + + // Try and optimise a function node with a "like" pattern. + static const FunctionNode TryOptimize(const FunctionNode& node); + + /// Return true if the data matches the pattern. + bool operator()(const std::string& data) override {return RE2::FullMatch(data, regex_);} + +private: + explicit SQLLikeHolder(const std::string& pattern) : pattern_(pattern), regex_(pattern) {} + + std::string pattern_; // posix pattern string, to help debugging + RE2 regex_; // compiled regex for the pattern + + static RE2 starts_with_regex_; // pre-compiled pattern for matching starts_with + static RE2 ends_with_regex_; // pre-compiled pattern for matching ends_with +}; + +} // namespace gandiva + +#endif // GANDIVA_SQL_LIKE_HOLDER_H diff --git a/cpp/src/gandiva/like_holder_test.cc b/cpp/src/gandiva/sql_like_holder_test.cc similarity index 66% rename from cpp/src/gandiva/like_holder_test.cc rename to cpp/src/gandiva/sql_like_holder_test.cc index 817473d7bb2..60d2ddb0f7a 100644 --- a/cpp/src/gandiva/like_holder_test.cc +++ b/cpp/src/gandiva/sql_like_holder_test.cc @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -#include "gandiva/like_holder.h" +#include "gandiva/sql_like_holder.h" #include "gandiva/regex_util.h" #include @@ -25,7 +25,7 @@ namespace gandiva { -class TestLikeHolder : public ::testing::Test { +class TestSQLLikeHolder : public ::testing::Test { public: FunctionNode BuildLike(std::string pattern) { auto field = std::make_shared(arrow::field("in", arrow::utf8())); @@ -35,13 +35,13 @@ class TestLikeHolder : public ::testing::Test { } }; -TEST_F(TestLikeHolder, TestMatchAny) { - std::shared_ptr like_holder; +TEST_F(TestSQLLikeHolder, TestMatchAny) { + std::shared_ptr sql_like_holder; - auto status = LikeHolder::Make("ab%", &like_holder); + auto status = SQLLikeHolder::Make("ab%", &sql_like_holder); EXPECT_EQ(status.ok(), true) << status.message(); - auto& like = *like_holder; + auto& like = *sql_like_holder; EXPECT_TRUE(like("ab")); EXPECT_TRUE(like("abc")); EXPECT_TRUE(like("abcd")); @@ -50,13 +50,13 @@ TEST_F(TestLikeHolder, TestMatchAny) { EXPECT_FALSE(like("cab")); } -TEST_F(TestLikeHolder, TestMatchOne) { - std::shared_ptr like_holder; +TEST_F(TestSQLLikeHolder, TestMatchOne) { + std::shared_ptr sql_like_holder; - auto status = LikeHolder::Make("ab_", &like_holder); + auto status = SQLLikeHolder::Make("ab_", &sql_like_holder); EXPECT_EQ(status.ok(), true) << status.message(); - auto& like = *like_holder; + auto& like = *sql_like_holder; EXPECT_TRUE(like("abc")); EXPECT_TRUE(like("abd")); @@ -65,18 +65,18 @@ TEST_F(TestLikeHolder, TestMatchOne) { EXPECT_FALSE(like("dabc")); } -TEST_F(TestLikeHolder, TestPcreSpecial) { - std::shared_ptr like_holder; +TEST_F(TestSQLLikeHolder, TestPcreSpecial) { + std::shared_ptr sql_like_holder; - auto status = LikeHolder::Make(".*ab_", &like_holder); + auto status = SQLLikeHolder::Make(".*ab_", &sql_like_holder); EXPECT_EQ(status.ok(), true) << status.message(); - auto& like = *like_holder; + auto& like = *sql_like_holder; EXPECT_TRUE(like(".*abc")); // . and * aren't special in sql regex EXPECT_FALSE(like("xxabc")); } -TEST_F(TestLikeHolder, TestRegexEscape) { +TEST_F(TestSQLLikeHolder, TestRegexEscape) { std::string res; auto status = RegexUtil::SqlLikePatternToPcre("#%hello#_abc_def##", '#', res); EXPECT_TRUE(status.ok()) << status.message(); @@ -84,44 +84,44 @@ TEST_F(TestLikeHolder, TestRegexEscape) { EXPECT_EQ(res, "%hello_abc.def#"); } -TEST_F(TestLikeHolder, TestDot) { - std::shared_ptr like_holder; +TEST_F(TestSQLLikeHolder, TestDot) { + std::shared_ptr sql_like_holder; - auto status = LikeHolder::Make("abc.", &like_holder); + auto status = SQLLikeHolder::Make("abc.", &sql_like_holder); EXPECT_EQ(status.ok(), true) << status.message(); - auto& like = *like_holder; + auto& like = *sql_like_holder; EXPECT_FALSE(like("abcd")); } -TEST_F(TestLikeHolder, TestOptimise) { +TEST_F(TestSQLLikeHolder, TestOptimise) { // optimise for 'starts_with' - auto fnode = LikeHolder::TryOptimize(BuildLike("xy 123z%")); + auto fnode = SQLLikeHolder::TryOptimize(BuildLike("xy 123z%")); EXPECT_EQ(fnode.descriptor()->name(), "starts_with"); EXPECT_EQ(fnode.ToString(), "bool starts_with((string) in, (const string) xy 123z)"); // optimise for 'ends_with' - fnode = LikeHolder::TryOptimize(BuildLike("%xyz")); + fnode = SQLLikeHolder::TryOptimize(BuildLike("%xyz")); EXPECT_EQ(fnode.descriptor()->name(), "ends_with"); EXPECT_EQ(fnode.ToString(), "bool ends_with((string) in, (const string) xyz)"); // no optimisation for others. - fnode = LikeHolder::TryOptimize(BuildLike("xyz_")); + fnode = SQLLikeHolder::TryOptimize(BuildLike("xyz_")); EXPECT_EQ(fnode.descriptor()->name(), "like"); - fnode = LikeHolder::TryOptimize(BuildLike("_xyz")); + fnode = SQLLikeHolder::TryOptimize(BuildLike("_xyz")); EXPECT_EQ(fnode.descriptor()->name(), "like"); - fnode = LikeHolder::TryOptimize(BuildLike("%xyz%")); + fnode = SQLLikeHolder::TryOptimize(BuildLike("%xyz%")); EXPECT_EQ(fnode.descriptor()->name(), "like"); - fnode = LikeHolder::TryOptimize(BuildLike("_xyz_")); + fnode = SQLLikeHolder::TryOptimize(BuildLike("_xyz_")); EXPECT_EQ(fnode.descriptor()->name(), "like"); - fnode = LikeHolder::TryOptimize(BuildLike("%xyz_")); + fnode = SQLLikeHolder::TryOptimize(BuildLike("%xyz_")); EXPECT_EQ(fnode.descriptor()->name(), "like"); - fnode = LikeHolder::TryOptimize(BuildLike("x_yz%")); + fnode = SQLLikeHolder::TryOptimize(BuildLike("x_yz%")); EXPECT_EQ(fnode.descriptor()->name(), "like"); } From 5bf04d6052ea5d96efa7d285f2879a07e5afbf1e Mon Sep 17 00:00:00 2001 From: Projjal Chanda Date: Tue, 19 Nov 2019 16:14:59 +0530 Subject: [PATCH 2/4] fixed linting issues --- cpp/src/gandiva/expr_decomposer.cc | 3 ++- cpp/src/gandiva/function_registry_string.cc | 3 ++- cpp/src/gandiva/gdv_function_stubs.cc | 7 ++++--- cpp/src/gandiva/like_holder.cc | 9 ++++++--- cpp/src/gandiva/like_holder.h | 3 ++- cpp/src/gandiva/regexp_matches_holder.cc | 6 ++++-- cpp/src/gandiva/regexp_matches_holder.h | 17 ++++++++++++----- cpp/src/gandiva/regexp_matches_holder_test.cc | 2 +- cpp/src/gandiva/sql_like_holder.cc | 3 ++- cpp/src/gandiva/sql_like_holder.h | 13 +++++++++---- 10 files changed, 44 insertions(+), 22 deletions(-) diff --git a/cpp/src/gandiva/expr_decomposer.cc b/cpp/src/gandiva/expr_decomposer.cc index a56b27053ba..4842c3a8190 100644 --- a/cpp/src/gandiva/expr_decomposer.cc +++ b/cpp/src/gandiva/expr_decomposer.cc @@ -54,7 +54,8 @@ Status ExprDecomposer::Visit(const FieldNode& node) { const FunctionNode ExprDecomposer::TryOptimize(const FunctionNode& node) { if (node.descriptor()->name() == "like") { return SQLLikeHolder::TryOptimize(node); - } else if (node.descriptor()->name() == "regexp_matches" || node.descriptor()->name() == "regexp_like"){ + } else if (node.descriptor()->name() == "regexp_matches" + || node.descriptor()->name() == "regexp_like") { return RegexpMatchesHolder::TryOptimize(node); } else { return node; diff --git a/cpp/src/gandiva/function_registry_string.cc b/cpp/src/gandiva/function_registry_string.cc index fa693e28e0b..bb810e030b1 100644 --- a/cpp/src/gandiva/function_registry_string.cc +++ b/cpp/src/gandiva/function_registry_string.cc @@ -76,7 +76,8 @@ std::vector GetStringFunctionRegistry() { kResultNullIfNull, "gdv_fn_like_utf8_utf8", NativeFunction::kNeedsFunctionHolder), - NativeFunction("regexp_matches", {"regexp_like"}, DataTypeVector{utf8(), utf8()}, boolean(), + NativeFunction("regexp_matches", {"regexp_like"}, + DataTypeVector{utf8(), utf8()}, boolean(), kResultNullIfNull, "gdv_fn_regexp_matches_utf8_utf8", NativeFunction::kNeedsFunctionHolder), diff --git a/cpp/src/gandiva/gdv_function_stubs.cc b/cpp/src/gandiva/gdv_function_stubs.cc index bbe36aabe93..8d3b3fa16a8 100644 --- a/cpp/src/gandiva/gdv_function_stubs.cc +++ b/cpp/src/gandiva/gdv_function_stubs.cc @@ -199,9 +199,10 @@ void ExportedStubFunctions::AddMappings(Engine* engine) const { types->i8_ptr_type(), // const char* pattern types->i32_type()}; // int pattern_len - engine->AddGlobalMappingForFunc("gdv_fn_regexp_matches_utf8_utf8", - types->i1_type() /*return_type*/, args, - reinterpret_cast(gdv_fn_regexp_matches_utf8_utf8)); + engine->AddGlobalMappingForFunc( + "gdv_fn_regexp_matches_utf8_utf8", + types->i1_type() /*return_type*/, args, + reinterpret_cast(gdv_fn_regexp_matches_utf8_utf8)); // gdv_fn_to_date_utf8_utf8_int32 args = {types->i64_type(), // int64_t execution_context diff --git a/cpp/src/gandiva/like_holder.cc b/cpp/src/gandiva/like_holder.cc index 71f67376d89..79bcb56e2bd 100644 --- a/cpp/src/gandiva/like_holder.cc +++ b/cpp/src/gandiva/like_holder.cc @@ -27,18 +27,21 @@ static bool IsArrowStringLiteral(arrow::Type::type type) { Status LikeHolder::Make(const FunctionNode& node, std::string* pattern) { ARROW_RETURN_IF(node.children().size() != 2, - Status::Invalid("'" + node.descriptor()->name() + "' function requires two parameters")); + Status::Invalid("'" + node.descriptor()->name() + + "' function requires two parameters")); auto literal = dynamic_cast(node.children().at(1).get()); ARROW_RETURN_IF( literal == nullptr, - Status::Invalid("'" + node.descriptor()->name() + "' function requires a literal as the second parameter")); + Status::Invalid("'" + node.descriptor()->name() + + "' function requires a literal as the second parameter")); auto literal_type = literal->return_type()->id(); ARROW_RETURN_IF( !IsArrowStringLiteral(literal_type), Status::Invalid( - "'" + node.descriptor()->name() + " function requires a string literal as the second parameter")); + "'" + node.descriptor()->name() + + " function requires a string literal as the second parameter")); *pattern = arrow::util::get(literal->holder()); return Status::OK(); diff --git a/cpp/src/gandiva/like_holder.h b/cpp/src/gandiva/like_holder.h index 0d61742236b..866d74f4e85 100644 --- a/cpp/src/gandiva/like_holder.h +++ b/cpp/src/gandiva/like_holder.h @@ -31,7 +31,8 @@ namespace gandiva { -/// Base class for Function Holder for pattern matching SQL functions like 'like' and 'regexp_matches' +/// Base class for Function Holder for pattern matching SQL functions like +/// 'like' and 'regexp_matches' class GANDIVA_EXPORT LikeHolder : public FunctionHolder { public: static Status Make(const FunctionNode& node, std::string* pattern); diff --git a/cpp/src/gandiva/regexp_matches_holder.cc b/cpp/src/gandiva/regexp_matches_holder.cc index 8979072a629..93db837db08 100644 --- a/cpp/src/gandiva/regexp_matches_holder.cc +++ b/cpp/src/gandiva/regexp_matches_holder.cc @@ -52,7 +52,8 @@ namespace gandiva { return node; } - Status RegexpMatchesHolder::Make(const FunctionNode& node, std::shared_ptr* holder) { + Status RegexpMatchesHolder::Make(const FunctionNode& node, + std::shared_ptr* holder) { std::string pcre_pattern; ARROW_RETURN_NOT_OK(LikeHolder::Make(node, &pcre_pattern)); return Make(pcre_pattern, holder); @@ -60,7 +61,8 @@ namespace gandiva { Status RegexpMatchesHolder::Make(const std::string& pcre_pattern, std::shared_ptr* holder) { - auto lholder = std::shared_ptr(new RegexpMatchesHolder(pcre_pattern)); + auto lholder = std::shared_ptr( + new RegexpMatchesHolder(pcre_pattern)); ARROW_RETURN_IF(!lholder->regex_.ok(), Status::Invalid("Building RE2 pattern '", pcre_pattern, "' failed")); diff --git a/cpp/src/gandiva/regexp_matches_holder.h b/cpp/src/gandiva/regexp_matches_holder.h index ebeba9b3336..803ba3ceabd 100644 --- a/cpp/src/gandiva/regexp_matches_holder.h +++ b/cpp/src/gandiva/regexp_matches_holder.h @@ -19,6 +19,7 @@ #define GANDIVA_REGEXP_MATCHES_HOLDER_H #include +#include #include @@ -31,18 +32,23 @@ class GANDIVA_EXPORT RegexpMatchesHolder : public LikeHolder { public: ~RegexpMatchesHolder() override = default; - static Status Make(const FunctionNode& node, std::shared_ptr* holder); + static Status Make(const FunctionNode& node, + std::shared_ptr* holder); - static Status Make(const std::string& pcre_pattern, std::shared_ptr* holder); + static Status Make(const std::string& pcre_pattern, + std::shared_ptr* holder); // Try and optimise a function node with a "regexp_matches" pattern. static const FunctionNode TryOptimize(const FunctionNode& node); /// Return true if there is a match in the data. - bool operator()(const std::string& data) override {return RE2::PartialMatch(data, regex_);} + bool operator()(const std::string& data) override { + return RE2::PartialMatch(data, regex_); + } private: - explicit RegexpMatchesHolder(const std::string& pattern) : pattern_(pattern), regex_(pattern) {} + explicit RegexpMatchesHolder(const std::string& pattern) : + pattern_(pattern), regex_(pattern) {} std::string pattern_; // posix pattern string, to help debugging RE2 regex_; // compiled regex for the pattern @@ -50,5 +56,6 @@ class GANDIVA_EXPORT RegexpMatchesHolder : public LikeHolder { static RE2 starts_with_regex_; // pre-compiled pattern for matching starts_with static RE2 ends_with_regex_; // pre-compiled pattern for matching ends_with }; -} +} // namespace gandiva + #endif //GANDIVA_REGEXP_MATCHES_HOLDER_H diff --git a/cpp/src/gandiva/regexp_matches_holder_test.cc b/cpp/src/gandiva/regexp_matches_holder_test.cc index 867e483e14a..24392819b63 100644 --- a/cpp/src/gandiva/regexp_matches_holder_test.cc +++ b/cpp/src/gandiva/regexp_matches_holder_test.cc @@ -26,7 +26,7 @@ namespace gandiva { class TestRegexpMatchesHolder : public ::testing::Test { -public: + public: FunctionNode BuildRegexpMatches(std::string pattern) { auto field = std::make_shared(arrow::field("in", arrow::utf8())); auto pattern_node = diff --git a/cpp/src/gandiva/sql_like_holder.cc b/cpp/src/gandiva/sql_like_holder.cc index 08c8a600ca6..fd8986f47a6 100644 --- a/cpp/src/gandiva/sql_like_holder.cc +++ b/cpp/src/gandiva/sql_like_holder.cc @@ -54,7 +54,8 @@ namespace gandiva { return node; } - Status SQLLikeHolder::Make(const FunctionNode& node, std::shared_ptr* holder) { + Status SQLLikeHolder::Make(const FunctionNode& node, + std::shared_ptr* holder) { std::string sql_pattern; ARROW_RETURN_NOT_OK(LikeHolder::Make(node, &sql_pattern)); return Make(sql_pattern, holder); diff --git a/cpp/src/gandiva/sql_like_holder.h b/cpp/src/gandiva/sql_like_holder.h index beaea1af017..e8d19289b09 100644 --- a/cpp/src/gandiva/sql_like_holder.h +++ b/cpp/src/gandiva/sql_like_holder.h @@ -19,6 +19,7 @@ #define GANDIVA_SQL_LIKE_HOLDER_H #include +#include #include @@ -33,16 +34,20 @@ class GANDIVA_EXPORT SQLLikeHolder : public LikeHolder { static Status Make(const FunctionNode& node, std::shared_ptr* holder); - static Status Make(const std::string& sql_pattern, std::shared_ptr* holder); + static Status Make(const std::string& sql_pattern, + std::shared_ptr* holder); // Try and optimise a function node with a "like" pattern. static const FunctionNode TryOptimize(const FunctionNode& node); /// Return true if the data matches the pattern. - bool operator()(const std::string& data) override {return RE2::FullMatch(data, regex_);} + bool operator()(const std::string& data) override { + return RE2::FullMatch(data, regex_); + } -private: - explicit SQLLikeHolder(const std::string& pattern) : pattern_(pattern), regex_(pattern) {} + private: + explicit SQLLikeHolder(const std::string& pattern) : + pattern_(pattern), regex_(pattern) {} std::string pattern_; // posix pattern string, to help debugging RE2 regex_; // compiled regex for the pattern From 50579a6d5fb531183bf1cbc58f44110d66ff4211 Mon Sep 17 00:00:00 2001 From: Projjal Chanda Date: Wed, 20 Nov 2019 12:09:00 +0530 Subject: [PATCH 3/4] fixed check style issues --- cpp/src/gandiva/expr_decomposer.cc | 4 +- cpp/src/gandiva/function_holder_registry.h | 4 +- cpp/src/gandiva/function_registry_string.cc | 5 +- cpp/src/gandiva/gdv_function_stubs.cc | 9 +-- cpp/src/gandiva/like_holder.cc | 15 ++-- cpp/src/gandiva/regexp_matches_holder.cc | 76 +++++++++---------- cpp/src/gandiva/regexp_matches_holder.h | 10 +-- cpp/src/gandiva/regexp_matches_holder_test.cc | 3 +- cpp/src/gandiva/sql_like_holder.cc | 76 +++++++++---------- cpp/src/gandiva/sql_like_holder.h | 6 +- 10 files changed, 102 insertions(+), 106 deletions(-) diff --git a/cpp/src/gandiva/expr_decomposer.cc b/cpp/src/gandiva/expr_decomposer.cc index 4842c3a8190..521446175ef 100644 --- a/cpp/src/gandiva/expr_decomposer.cc +++ b/cpp/src/gandiva/expr_decomposer.cc @@ -54,8 +54,8 @@ Status ExprDecomposer::Visit(const FieldNode& node) { const FunctionNode ExprDecomposer::TryOptimize(const FunctionNode& node) { if (node.descriptor()->name() == "like") { return SQLLikeHolder::TryOptimize(node); - } else if (node.descriptor()->name() == "regexp_matches" - || node.descriptor()->name() == "regexp_like") { + } else if (node.descriptor()->name() == "regexp_matches" || + node.descriptor()->name() == "regexp_like") { return RegexpMatchesHolder::TryOptimize(node); } else { return node; diff --git a/cpp/src/gandiva/function_holder_registry.h b/cpp/src/gandiva/function_holder_registry.h index 6f8a6d57772..6a489c3502c 100644 --- a/cpp/src/gandiva/function_holder_registry.h +++ b/cpp/src/gandiva/function_holder_registry.h @@ -26,10 +26,10 @@ #include "arrow/status.h" #include "gandiva/function_holder.h" -#include "gandiva/sql_like_holder.h" -#include "gandiva/regexp_matches_holder.h" #include "gandiva/node.h" #include "gandiva/random_generator_holder.h" +#include "gandiva/regexp_matches_holder.h" +#include "gandiva/sql_like_holder.h" #include "gandiva/to_date_holder.h" namespace gandiva { diff --git a/cpp/src/gandiva/function_registry_string.cc b/cpp/src/gandiva/function_registry_string.cc index bb810e030b1..a725df6b5bf 100644 --- a/cpp/src/gandiva/function_registry_string.cc +++ b/cpp/src/gandiva/function_registry_string.cc @@ -76,9 +76,8 @@ std::vector GetStringFunctionRegistry() { kResultNullIfNull, "gdv_fn_like_utf8_utf8", NativeFunction::kNeedsFunctionHolder), - NativeFunction("regexp_matches", {"regexp_like"}, - DataTypeVector{utf8(), utf8()}, boolean(), - kResultNullIfNull, "gdv_fn_regexp_matches_utf8_utf8", + NativeFunction("regexp_matches", {"regexp_like"}, DataTypeVector{utf8(), utf8()}, + boolean(), kResultNullIfNull, "gdv_fn_regexp_matches_utf8_utf8", NativeFunction::kNeedsFunctionHolder), NativeFunction("substr", {"substring"}, diff --git a/cpp/src/gandiva/gdv_function_stubs.cc b/cpp/src/gandiva/gdv_function_stubs.cc index 8d3b3fa16a8..d0466ed1caa 100644 --- a/cpp/src/gandiva/gdv_function_stubs.cc +++ b/cpp/src/gandiva/gdv_function_stubs.cc @@ -23,8 +23,8 @@ #include "gandiva/engine.h" #include "gandiva/exported_funcs.h" #include "gandiva/in_holder.h" -#include "gandiva/sql_like_holder.h" #include "gandiva/random_generator_holder.h" +#include "gandiva/sql_like_holder.h" #include "gandiva/to_date_holder.h" /// Stub functions that can be accessed from LLVM or the pre-compiled library. @@ -38,7 +38,7 @@ bool gdv_fn_like_utf8_utf8(int64_t ptr, const char* data, int data_len, } bool gdv_fn_regexp_matches_utf8_utf8(int64_t ptr, const char* data, int data_len, -const char* pattern, int pattern_len) { + const char* pattern, int pattern_len) { return gdv_fn_like_utf8_utf8(ptr, data, data_len, pattern, pattern_len); } @@ -200,9 +200,8 @@ void ExportedStubFunctions::AddMappings(Engine* engine) const { types->i32_type()}; // int pattern_len engine->AddGlobalMappingForFunc( - "gdv_fn_regexp_matches_utf8_utf8", - types->i1_type() /*return_type*/, args, - reinterpret_cast(gdv_fn_regexp_matches_utf8_utf8)); + "gdv_fn_regexp_matches_utf8_utf8", types->i1_type() /*return_type*/, args, + reinterpret_cast(gdv_fn_regexp_matches_utf8_utf8)); // gdv_fn_to_date_utf8_utf8_int32 args = {types->i64_type(), // int64_t execution_context diff --git a/cpp/src/gandiva/like_holder.cc b/cpp/src/gandiva/like_holder.cc index 79bcb56e2bd..b25358baa5f 100644 --- a/cpp/src/gandiva/like_holder.cc +++ b/cpp/src/gandiva/like_holder.cc @@ -28,20 +28,19 @@ static bool IsArrowStringLiteral(arrow::Type::type type) { Status LikeHolder::Make(const FunctionNode& node, std::string* pattern) { ARROW_RETURN_IF(node.children().size() != 2, Status::Invalid("'" + node.descriptor()->name() + - "' function requires two parameters")); + "' function requires two parameters")); auto literal = dynamic_cast(node.children().at(1).get()); ARROW_RETURN_IF( - literal == nullptr, - Status::Invalid("'" + node.descriptor()->name() + - "' function requires a literal as the second parameter")); + literal == nullptr, + Status::Invalid("'" + node.descriptor()->name() + + "' function requires a literal as the second parameter")); auto literal_type = literal->return_type()->id(); ARROW_RETURN_IF( - !IsArrowStringLiteral(literal_type), - Status::Invalid( - "'" + node.descriptor()->name() + - " function requires a string literal as the second parameter")); + !IsArrowStringLiteral(literal_type), + Status::Invalid("'" + node.descriptor()->name() + + " function requires a string literal as the second parameter")); *pattern = arrow::util::get(literal->holder()); return Status::OK(); diff --git a/cpp/src/gandiva/regexp_matches_holder.cc b/cpp/src/gandiva/regexp_matches_holder.cc index 93db837db08..6b10678573d 100644 --- a/cpp/src/gandiva/regexp_matches_holder.cc +++ b/cpp/src/gandiva/regexp_matches_holder.cc @@ -23,51 +23,51 @@ namespace gandiva { - RE2 RegexpMatchesHolder::starts_with_regex_(R"(\^([\w\s]+)(\.\*)?)"); - RE2 RegexpMatchesHolder::ends_with_regex_(R"((\.\*)?([\w\s]+)\$)"); +RE2 RegexpMatchesHolder::starts_with_regex_(R"(\^([\w\s]+)(\.\*)?)"); +RE2 RegexpMatchesHolder::ends_with_regex_(R"((\.\*)?([\w\s]+)\$)"); - // Short-circuit pattern matches for the two common sub cases : - // - starts_with and ends_with. - const FunctionNode RegexpMatchesHolder::TryOptimize(const FunctionNode& node) { - std::shared_ptr holder; - auto status = Make(node, &holder); - if (status.ok()) { - std::string& pattern = holder->pattern_; - auto literal_type = node.children().at(1)->return_type(); - std::string substr; - if (RE2::FullMatch(pattern, starts_with_regex_, &substr)) { - auto prefix_node = +// Short-circuit pattern matches for the two common sub cases : +// - starts_with and ends_with. +const FunctionNode RegexpMatchesHolder::TryOptimize(const FunctionNode& node) { + std::shared_ptr holder; + auto status = Make(node, &holder); + if (status.ok()) { + std::string& pattern = holder->pattern_; + auto literal_type = node.children().at(1)->return_type(); + std::string substr; + if (RE2::FullMatch(pattern, starts_with_regex_, &substr)) { + auto prefix_node = std::make_shared(literal_type, LiteralHolder(substr), false); - return FunctionNode("starts_with", {node.children().at(0), prefix_node}, - node.return_type()); - } else if (RE2::FullMatch(pattern, ends_with_regex_, (void *)NULL, &substr)) { - auto suffix_node = + return FunctionNode("starts_with", {node.children().at(0), prefix_node}, + node.return_type()); + } else if (RE2::FullMatch(pattern, ends_with_regex_, (void*)NULL, &substr)) { + auto suffix_node = std::make_shared(literal_type, LiteralHolder(substr), false); - return FunctionNode("ends_with", {node.children().at(0), suffix_node}, - node.return_type()); - } + return FunctionNode("ends_with", {node.children().at(0), suffix_node}, + node.return_type()); } - - // Could not optimize, return original node. - return node; } - Status RegexpMatchesHolder::Make(const FunctionNode& node, - std::shared_ptr* holder) { - std::string pcre_pattern; - ARROW_RETURN_NOT_OK(LikeHolder::Make(node, &pcre_pattern)); - return Make(pcre_pattern, holder); - } + // Could not optimize, return original node. + return node; +} - Status RegexpMatchesHolder::Make(const std::string& pcre_pattern, - std::shared_ptr* holder) { - auto lholder = std::shared_ptr( - new RegexpMatchesHolder(pcre_pattern)); - ARROW_RETURN_IF(!lholder->regex_.ok(), - Status::Invalid("Building RE2 pattern '", pcre_pattern, "' failed")); +Status RegexpMatchesHolder::Make(const FunctionNode& node, + std::shared_ptr* holder) { + std::string pcre_pattern; + ARROW_RETURN_NOT_OK(LikeHolder::Make(node, &pcre_pattern)); + return Make(pcre_pattern, holder); +} - *holder = lholder; - return Status::OK(); - } +Status RegexpMatchesHolder::Make(const std::string& pcre_pattern, + std::shared_ptr* holder) { + auto lholder = + std::shared_ptr(new RegexpMatchesHolder(pcre_pattern)); + ARROW_RETURN_IF(!lholder->regex_.ok(), + Status::Invalid("Building RE2 pattern '", pcre_pattern, "' failed")); + + *holder = lholder; + return Status::OK(); +} } // namespace gandiva diff --git a/cpp/src/gandiva/regexp_matches_holder.h b/cpp/src/gandiva/regexp_matches_holder.h index 803ba3ceabd..d2ca892a177 100644 --- a/cpp/src/gandiva/regexp_matches_holder.h +++ b/cpp/src/gandiva/regexp_matches_holder.h @@ -18,8 +18,8 @@ #ifndef GANDIVA_REGEXP_MATCHES_HOLDER_H #define GANDIVA_REGEXP_MATCHES_HOLDER_H -#include #include +#include #include @@ -47,8 +47,8 @@ class GANDIVA_EXPORT RegexpMatchesHolder : public LikeHolder { } private: - explicit RegexpMatchesHolder(const std::string& pattern) : - pattern_(pattern), regex_(pattern) {} + explicit RegexpMatchesHolder(const std::string& pattern) + : pattern_(pattern), regex_(pattern) {} std::string pattern_; // posix pattern string, to help debugging RE2 regex_; // compiled regex for the pattern @@ -56,6 +56,6 @@ class GANDIVA_EXPORT RegexpMatchesHolder : public LikeHolder { static RE2 starts_with_regex_; // pre-compiled pattern for matching starts_with static RE2 ends_with_regex_; // pre-compiled pattern for matching ends_with }; -} // namespace gandiva +} // namespace gandiva -#endif //GANDIVA_REGEXP_MATCHES_HOLDER_H +#endif // GANDIVA_REGEXP_MATCHES_HOLDER_H diff --git a/cpp/src/gandiva/regexp_matches_holder_test.cc b/cpp/src/gandiva/regexp_matches_holder_test.cc index 24392819b63..6a4d1f2c7c0 100644 --- a/cpp/src/gandiva/regexp_matches_holder_test.cc +++ b/cpp/src/gandiva/regexp_matches_holder_test.cc @@ -30,7 +30,7 @@ class TestRegexpMatchesHolder : public ::testing::Test { FunctionNode BuildRegexpMatches(std::string pattern) { auto field = std::make_shared(arrow::field("in", arrow::utf8())); auto pattern_node = - std::make_shared(arrow::utf8(), LiteralHolder(pattern), false); + std::make_shared(arrow::utf8(), LiteralHolder(pattern), false); return FunctionNode("regexp_matches", {field, pattern_node}, arrow::boolean()); } }; @@ -125,7 +125,6 @@ TEST_F(TestRegexpMatchesHolder, TestCharacterClass) { EXPECT_FALSE(like("ab")); } - TEST_F(TestRegexpMatchesHolder, TestEscapeCharacter) { std::shared_ptr regexp_matches_holder; diff --git a/cpp/src/gandiva/sql_like_holder.cc b/cpp/src/gandiva/sql_like_holder.cc index fd8986f47a6..c0d8aa26825 100644 --- a/cpp/src/gandiva/sql_like_holder.cc +++ b/cpp/src/gandiva/sql_like_holder.cc @@ -23,55 +23,55 @@ namespace gandiva { - RE2 SQLLikeHolder::starts_with_regex_(R"((\w|\s)*\.\*)"); - RE2 SQLLikeHolder::ends_with_regex_(R"(\.\*(\w|\s)*)"); +RE2 SQLLikeHolder::starts_with_regex_(R"((\w|\s)*\.\*)"); +RE2 SQLLikeHolder::ends_with_regex_(R"(\.\*(\w|\s)*)"); // Short-circuit pattern matches for the two common sub cases : // - starts_with and ends_with. - const FunctionNode SQLLikeHolder::TryOptimize(const FunctionNode& node) { - std::shared_ptr holder; - auto status = Make(node, &holder); - if (status.ok()) { - std::string& pattern = holder->pattern_; - auto literal_type = node.children().at(1)->return_type(); +const FunctionNode SQLLikeHolder::TryOptimize(const FunctionNode& node) { + std::shared_ptr holder; + auto status = Make(node, &holder); + if (status.ok()) { + std::string& pattern = holder->pattern_; + auto literal_type = node.children().at(1)->return_type(); - if (RE2::FullMatch(pattern, starts_with_regex_)) { - auto prefix = pattern.substr(0, pattern.length() - 2); // trim .* - auto prefix_node = + if (RE2::FullMatch(pattern, starts_with_regex_)) { + auto prefix = pattern.substr(0, pattern.length() - 2); // trim .* + auto prefix_node = std::make_shared(literal_type, LiteralHolder(prefix), false); - return FunctionNode("starts_with", {node.children().at(0), prefix_node}, - node.return_type()); - } else if (RE2::FullMatch(pattern, ends_with_regex_)) { - auto suffix = pattern.substr(2); // skip .* - auto suffix_node = + return FunctionNode("starts_with", {node.children().at(0), prefix_node}, + node.return_type()); + } else if (RE2::FullMatch(pattern, ends_with_regex_)) { + auto suffix = pattern.substr(2); // skip .* + auto suffix_node = std::make_shared(literal_type, LiteralHolder(suffix), false); - return FunctionNode("ends_with", {node.children().at(0), suffix_node}, - node.return_type()); - } + return FunctionNode("ends_with", {node.children().at(0), suffix_node}, + node.return_type()); } - - // Could not optimize, return original node. - return node; } - Status SQLLikeHolder::Make(const FunctionNode& node, - std::shared_ptr* holder) { - std::string sql_pattern; - ARROW_RETURN_NOT_OK(LikeHolder::Make(node, &sql_pattern)); - return Make(sql_pattern, holder); - } + // Could not optimize, return original node. + return node; +} - Status SQLLikeHolder::Make(const std::string& sql_pattern, - std::shared_ptr* holder) { - std::string pcre_pattern; - ARROW_RETURN_NOT_OK(RegexUtil::SqlLikePatternToPcre(sql_pattern, pcre_pattern)); +Status SQLLikeHolder::Make(const FunctionNode& node, + std::shared_ptr* holder) { + std::string sql_pattern; + ARROW_RETURN_NOT_OK(LikeHolder::Make(node, &sql_pattern)); + return Make(sql_pattern, holder); +} - auto lholder = std::shared_ptr(new SQLLikeHolder(pcre_pattern)); - ARROW_RETURN_IF(!lholder->regex_.ok(), - Status::Invalid("Building RE2 pattern '", pcre_pattern, "' failed")); +Status SQLLikeHolder::Make(const std::string& sql_pattern, + std::shared_ptr* holder) { + std::string pcre_pattern; + ARROW_RETURN_NOT_OK(RegexUtil::SqlLikePatternToPcre(sql_pattern, pcre_pattern)); - *holder = lholder; - return Status::OK(); - } + auto lholder = std::shared_ptr(new SQLLikeHolder(pcre_pattern)); + ARROW_RETURN_IF(!lholder->regex_.ok(), + Status::Invalid("Building RE2 pattern '", pcre_pattern, "' failed")); + + *holder = lholder; + return Status::OK(); +} } // namespace gandiva diff --git a/cpp/src/gandiva/sql_like_holder.h b/cpp/src/gandiva/sql_like_holder.h index e8d19289b09..44e9750a80a 100644 --- a/cpp/src/gandiva/sql_like_holder.h +++ b/cpp/src/gandiva/sql_like_holder.h @@ -18,8 +18,8 @@ #ifndef GANDIVA_SQL_LIKE_HOLDER_H #define GANDIVA_SQL_LIKE_HOLDER_H -#include #include +#include #include @@ -46,8 +46,8 @@ class GANDIVA_EXPORT SQLLikeHolder : public LikeHolder { } private: - explicit SQLLikeHolder(const std::string& pattern) : - pattern_(pattern), regex_(pattern) {} + explicit SQLLikeHolder(const std::string& pattern) + : pattern_(pattern), regex_(pattern) {} std::string pattern_; // posix pattern string, to help debugging RE2 regex_; // compiled regex for the pattern From f647446ceddd267916fef7168f6a1c07f370e63d Mon Sep 17 00:00:00 2001 From: Projjal Chanda Date: Wed, 20 Nov 2019 14:49:55 +0530 Subject: [PATCH 4/4] Fixed some names --- cpp/src/gandiva/regexp_matches_holder_test.cc | 84 +++++++++---------- 1 file changed, 42 insertions(+), 42 deletions(-) diff --git a/cpp/src/gandiva/regexp_matches_holder_test.cc b/cpp/src/gandiva/regexp_matches_holder_test.cc index 6a4d1f2c7c0..d2ece61a754 100644 --- a/cpp/src/gandiva/regexp_matches_holder_test.cc +++ b/cpp/src/gandiva/regexp_matches_holder_test.cc @@ -41,13 +41,13 @@ TEST_F(TestRegexpMatchesHolder, TestString) { auto status = RegexpMatchesHolder::Make("ab", ®exp_matches_holder); EXPECT_EQ(status.ok(), true) << status.message(); - auto& like = *regexp_matches_holder; - EXPECT_TRUE(like("ab")); - EXPECT_TRUE(like("abc")); - EXPECT_TRUE(like("abcd")); - EXPECT_TRUE(like("cab")); + auto& regexp_matches = *regexp_matches_holder; + EXPECT_TRUE(regexp_matches("ab")); + EXPECT_TRUE(regexp_matches("abc")); + EXPECT_TRUE(regexp_matches("abcd")); + EXPECT_TRUE(regexp_matches("cab")); - EXPECT_FALSE(like("a")); + EXPECT_FALSE(regexp_matches("a")); } TEST_F(TestRegexpMatchesHolder, TestDotStar) { @@ -56,15 +56,15 @@ TEST_F(TestRegexpMatchesHolder, TestDotStar) { auto status = RegexpMatchesHolder::Make("a.*b", ®exp_matches_holder); EXPECT_EQ(status.ok(), true) << status.message(); - auto& like = *regexp_matches_holder; - EXPECT_TRUE(like("ab")); - EXPECT_TRUE(like("adeb")); - EXPECT_TRUE(like("abc")); - EXPECT_TRUE(like("cabc")); - EXPECT_TRUE(like("caebf")); + auto& regexp_matches = *regexp_matches_holder; + EXPECT_TRUE(regexp_matches("ab")); + EXPECT_TRUE(regexp_matches("adeb")); + EXPECT_TRUE(regexp_matches("abc")); + EXPECT_TRUE(regexp_matches("cabc")); + EXPECT_TRUE(regexp_matches("caebf")); - EXPECT_FALSE(like("ba")); - EXPECT_FALSE(like("a")); + EXPECT_FALSE(regexp_matches("ba")); + EXPECT_FALSE(regexp_matches("a")); } TEST_F(TestRegexpMatchesHolder, TestDot) { @@ -73,14 +73,14 @@ TEST_F(TestRegexpMatchesHolder, TestDot) { auto status = RegexpMatchesHolder::Make("ab.", ®exp_matches_holder); EXPECT_EQ(status.ok(), true) << status.message(); - auto& like = *regexp_matches_holder; - EXPECT_TRUE(like("abc")); - EXPECT_TRUE(like("abd")); - EXPECT_TRUE(like("abcd")); - EXPECT_TRUE(like("dabc")); + auto& regexp_matches = *regexp_matches_holder; + EXPECT_TRUE(regexp_matches("abc")); + EXPECT_TRUE(regexp_matches("abd")); + EXPECT_TRUE(regexp_matches("abcd")); + EXPECT_TRUE(regexp_matches("dabc")); - EXPECT_FALSE(like("a")); - EXPECT_FALSE(like("ab")); + EXPECT_FALSE(regexp_matches("a")); + EXPECT_FALSE(regexp_matches("ab")); } TEST_F(TestRegexpMatchesHolder, TestAnchors) { @@ -89,12 +89,12 @@ TEST_F(TestRegexpMatchesHolder, TestAnchors) { auto status = RegexpMatchesHolder::Make("^ab.*c$", ®exp_matches_holder); EXPECT_EQ(status.ok(), true) << status.message(); - auto& like = *regexp_matches_holder; - EXPECT_TRUE(like("abdc")); - EXPECT_TRUE(like("abc")); + auto& regexp_matches = *regexp_matches_holder; + EXPECT_TRUE(regexp_matches("abdc")); + EXPECT_TRUE(regexp_matches("abc")); - EXPECT_FALSE(like("abcd")); - EXPECT_FALSE(like("dabc")); + EXPECT_FALSE(regexp_matches("abcd")); + EXPECT_FALSE(regexp_matches("dabc")); } TEST_F(TestRegexpMatchesHolder, TestIgnoreCase) { @@ -103,12 +103,12 @@ TEST_F(TestRegexpMatchesHolder, TestIgnoreCase) { auto status = RegexpMatchesHolder::Make("(?i)ab", ®exp_matches_holder); EXPECT_EQ(status.ok(), true) << status.message(); - auto& like = *regexp_matches_holder; - EXPECT_TRUE(like("abc")); - EXPECT_TRUE(like("daBc")); - EXPECT_TRUE(like("CAB")); + auto& regexp_matches = *regexp_matches_holder; + EXPECT_TRUE(regexp_matches("abc")); + EXPECT_TRUE(regexp_matches("daBc")); + EXPECT_TRUE(regexp_matches("CAB")); - EXPECT_FALSE(like("ba")); + EXPECT_FALSE(regexp_matches("ba")); } TEST_F(TestRegexpMatchesHolder, TestCharacterClass) { @@ -117,12 +117,12 @@ TEST_F(TestRegexpMatchesHolder, TestCharacterClass) { auto status = RegexpMatchesHolder::Make("[ab]c", ®exp_matches_holder); EXPECT_EQ(status.ok(), true) << status.message(); - auto& like = *regexp_matches_holder; - EXPECT_TRUE(like("acd")); - EXPECT_TRUE(like("ebc")); - EXPECT_TRUE(like("abc")); + auto& regexp_matches = *regexp_matches_holder; + EXPECT_TRUE(regexp_matches("acd")); + EXPECT_TRUE(regexp_matches("ebc")); + EXPECT_TRUE(regexp_matches("abc")); - EXPECT_FALSE(like("ab")); + EXPECT_FALSE(regexp_matches("ab")); } TEST_F(TestRegexpMatchesHolder, TestEscapeCharacter) { @@ -131,10 +131,10 @@ TEST_F(TestRegexpMatchesHolder, TestEscapeCharacter) { auto status = RegexpMatchesHolder::Make("\\.\\*", ®exp_matches_holder); EXPECT_EQ(status.ok(), true) << status.message(); - auto& like = *regexp_matches_holder; - EXPECT_TRUE(like(".*")); + auto& regexp_matches = *regexp_matches_holder; + EXPECT_TRUE(regexp_matches(".*")); - EXPECT_FALSE(like("ab")); + EXPECT_FALSE(regexp_matches("ab")); } TEST_F(TestRegexpMatchesHolder, TestNonAsciiMatches) { @@ -143,10 +143,10 @@ TEST_F(TestRegexpMatchesHolder, TestNonAsciiMatches) { auto status = RegexpMatchesHolder::Make(".*çåå†.*", ®exp_matches_holder); EXPECT_EQ(status.ok(), true) << status.message(); - auto& like = *regexp_matches_holder; - EXPECT_TRUE(like("açåå†b")); + auto& regexp_matches = *regexp_matches_holder; + EXPECT_TRUE(regexp_matches("açåå†b")); - EXPECT_FALSE(like("ab")); + EXPECT_FALSE(regexp_matches("ab")); } TEST_F(TestRegexpMatchesHolder, TestOptimise) {