Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions cpp/src/gandiva/function_holder_registry.h
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,8 @@ class FunctionHolderRegistry {
static map_type& makers() {
static map_type maker_map = {
{"like", LAMBDA_MAKER(LikeHolder)},
{"regexp_like", LAMBDA_MAKER(LikeHolder)},
{"regexp_matches", LAMBDA_MAKER(LikeHolder)},
{"to_date", LAMBDA_MAKER(ToDateHolder)},
{"random", LAMBDA_MAKER(RandomGeneratorHolder)},
{"rand", LAMBDA_MAKER(RandomGeneratorHolder)},
Expand Down
5 changes: 5 additions & 0 deletions cpp/src/gandiva/function_registry_string.cc
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,11 @@ std::vector<NativeFunction> GetStringFunctionRegistry() {
kResultNullIfNull, "gdv_fn_like_utf8_utf8",
NativeFunction::kNeedsFunctionHolder),

NativeFunction("regexp_like", {"regexp_matches"},
DataTypeVector{utf8(), utf8()}, boolean(),
kResultNullIfNull, "gdv_fn_like_utf8_utf8",
NativeFunction::kNeedsFunctionHolder),

NativeFunction("ltrim", {}, DataTypeVector{utf8(), utf8()}, utf8(),
kResultNullIfNull, "ltrim_utf8_utf8", NativeFunction::kNeedsContext),

Expand Down
19 changes: 16 additions & 3 deletions cpp/src/gandiva/like_holder.cc
Original file line number Diff line number Diff line change
Expand Up @@ -81,20 +81,33 @@ Status LikeHolder::Make(const FunctionNode& node, std::shared_ptr<LikeHolder>* h
Status::Invalid(
"'like' function requires a string literal as the second parameter"));

// Checks if it should compile the pattern directly as an regex expression.
auto function_name = node.descriptor()->name();
if (function_name == "regexp_matches" || function_name == "regexp_like") {
return Make(arrow::util::get<std::string>(literal->holder()), holder, true);
}
return Make(arrow::util::get<std::string>(literal->holder()), holder);
}

Status LikeHolder::Make(const std::string& sql_pattern,
std::shared_ptr<LikeHolder>* holder) {
std::shared_ptr<LikeHolder> lholder;
std::string pcre_pattern;
ARROW_RETURN_NOT_OK(RegexUtil::SqlLikePatternToPcre(sql_pattern, pcre_pattern));

auto lholder = std::shared_ptr<LikeHolder>(new LikeHolder(pcre_pattern));
lholder = std::shared_ptr<LikeHolder>(new LikeHolder(pcre_pattern));
ARROW_RETURN_IF(!lholder->regex_.ok(),
Status::Invalid("Building RE2 pattern '", pcre_pattern, "' failed"));

*holder = lholder;
return Status::OK();
}

Status LikeHolder::Make(const std::string& pattern,
std::shared_ptr<LikeHolder>* holder, bool is_regex) {
std::shared_ptr<LikeHolder> lholder;
lholder = std::shared_ptr<LikeHolder>(new LikeHolder(pattern));
ARROW_RETURN_IF(!lholder->regex_.ok(),
Status::Invalid("Building RE2 pattern '", pattern, "' failed"));
*holder = lholder;
return Status::OK();
}
} // namespace gandiva
3 changes: 3 additions & 0 deletions cpp/src/gandiva/like_holder.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,9 @@ class GANDIVA_EXPORT LikeHolder : public FunctionHolder {

static Status Make(const std::string& sql_pattern, std::shared_ptr<LikeHolder>* holder);

static Status Make(
const std::string& pattern, std::shared_ptr<LikeHolder>* holder, bool is_regex);

// Try and optimise a function node with a "like" pattern.
static const FunctionNode TryOptimize(const FunctionNode& node);

Expand Down
15 changes: 15 additions & 0 deletions cpp/src/gandiva/like_holder_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,21 @@ TEST_F(TestLikeHolder, TestMatchOne) {
EXPECT_FALSE(like("dabc"));
}

TEST_F(TestLikeHolder, TestMatchOneRegex) {
std::shared_ptr<LikeHolder> like_holder;

auto status = LikeHolder::Make("ab.", &like_holder, true);
EXPECT_EQ(status.ok(), true) << status.message();

auto& like = *like_holder;
EXPECT_TRUE(like("abc"));
EXPECT_TRUE(like("abd"));

EXPECT_FALSE(like("a"));
EXPECT_FALSE(like("abcd"));
EXPECT_FALSE(like("dabc"));
}

TEST_F(TestLikeHolder, TestPcreSpecial) {
std::shared_ptr<LikeHolder> like_holder;

Expand Down
41 changes: 41 additions & 0 deletions cpp/src/gandiva/tests/utf8_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,47 @@ TEST_F(TestUtf8, TestLike) {
EXPECT_ARROW_ARRAY_EQUALS(exp, outputs.at(0));
}

TEST_F(TestUtf8, TestRegexpLike) {
// schema for input fields
auto field_a = field("a", utf8());
auto schema = arrow::schema({field_a});

// output fields
auto res = field("res", boolean());

// build expressions.
// like(literal(s), a)

auto node_a = TreeExprBuilder::MakeField(field_a);
auto literal_s = TreeExprBuilder::MakeStringLiteral(".*spark.*");
auto is_like = TreeExprBuilder::MakeFunction("regexp_like", {node_a, literal_s}, boolean());
auto expr = TreeExprBuilder::MakeExpression(is_like, res);

// Build a projector for the expressions.
std::shared_ptr<Projector> projector;
auto status = Projector::Make(schema, {expr}, TestConfiguration(), &projector);
EXPECT_TRUE(status.ok()) << status.message();

// Create a row-batch with some sample data
int num_records = 4;
auto array_a = MakeArrowArrayUtf8({"park", "sparkle", "bright spark and fire", "spark"},
{true, true, true, true});

// expected output
auto exp = MakeArrowArrayBool({false, true, true, true}, {true, true, true, true});

// prepare input record batch
auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array_a});

// Evaluate expression
arrow::ArrayVector outputs;
status = projector->Evaluate(*in_batch, pool_, &outputs);
EXPECT_TRUE(status.ok()) << status.message();

// Validate results
EXPECT_ARROW_ARRAY_EQUALS(exp, outputs.at(0));
}

TEST_F(TestUtf8, TestBeginsEnds) {
// schema for input fields
auto field_a = field("a", utf8());
Expand Down