From 71d60f54892b6a522830b4895945a23ce6a5ee86 Mon Sep 17 00:00:00 2001 From: qiye Date: Fri, 26 Apr 2024 19:46:49 +0800 Subject: [PATCH] [fix](ES catalog)Make col != '' behavior consistent with SQL (#34151) In SQL syntax, `col != ''` equals `col.length() > 0`. It means that this column must exist in ES doc fields and its content is not empty. In this PR, we make a special translation for this binary predicate to keep the behavior of both consistent. --------- Co-authored-by: Luennng --- .../doris/external/elasticsearch/QueryBuilders.java | 10 ++++++++++ .../external/elasticsearch/QueryBuildersTest.java | 9 +++++++++ .../data/external_table_p0/es/test_es_query.out | 9 +-------- .../suites/external_table_p0/es/test_es_query.groovy | 2 +- 4 files changed, 21 insertions(+), 9 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/external/elasticsearch/QueryBuilders.java b/fe/fe-core/src/main/java/org/apache/doris/external/elasticsearch/QueryBuilders.java index 350fd3857ea634..0205aba1287279 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/external/elasticsearch/QueryBuilders.java +++ b/fe/fe-core/src/main/java/org/apache/doris/external/elasticsearch/QueryBuilders.java @@ -40,6 +40,7 @@ import com.fasterxml.jackson.databind.ObjectMapper; import lombok.Builder; import lombok.Data; +import org.apache.commons.lang3.StringUtils; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.joda.time.format.DateTimeFormat; @@ -137,6 +138,15 @@ private static QueryBuilder parseBinaryPredicate(Expr expr, TExprOpcode opCode, case EQ_FOR_NULL: return QueryBuilders.termQuery(column, value); case NE: + // col != '' means col.length() > 0 in SQL syntax. + // The `NULL` value should not present in results. + // It equals + // '{"bool":{"must":{"bool":{"must_not":{"term":{"col":""}},"must":{"exists":{"field":"col"}}}}}}' + // in Elasticsearch + if (value instanceof String && StringUtils.isEmpty((String) value)) { + return QueryBuilders.boolQuery().mustNot(QueryBuilders.termQuery(column, value)) + .must(QueryBuilders.existsQuery(column)); + } return QueryBuilders.boolQuery().mustNot(QueryBuilders.termQuery(column, value)); case GE: return QueryBuilders.rangeQuery(column).gte(value); diff --git a/fe/fe-core/src/test/java/org/apache/doris/external/elasticsearch/QueryBuildersTest.java b/fe/fe-core/src/test/java/org/apache/doris/external/elasticsearch/QueryBuildersTest.java index d0cc111bef6fe1..184bf97c505093 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/external/elasticsearch/QueryBuildersTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/external/elasticsearch/QueryBuildersTest.java @@ -85,6 +85,15 @@ public void testBinaryPredicateConvertEsDsl() { Assertions.assertEquals("{\"term\":{\"k2\":\"2023-02-19T22:00:00.000+08:00\"}}", QueryBuilders.toEsDsl(dateTimeEqExpr, new ArrayList<>(), new HashMap<>(), BuilderOptions.builder().needCompatDateFields(Lists.newArrayList("k2")).build()).toJson()); + SlotRef k3 = new SlotRef(null, "k3"); + Expr stringLiteral = new StringLiteral(""); + Expr stringNeExpr = new BinaryPredicate(Operator.NE, k3, stringLiteral); + Assertions.assertEquals("{\"bool\":{\"must\":{\"exists\":{\"field\":\"k3\"}},\"must_not\":{\"term\":{\"k3\":\"\"}}}}", + QueryBuilders.toEsDsl(stringNeExpr).toJson()); + stringLiteral = new StringLiteral("message"); + stringNeExpr = new BinaryPredicate(Operator.NE, k3, stringLiteral); + Assertions.assertEquals("{\"bool\":{\"must_not\":{\"term\":{\"k3\":\"message\"}}}}", + QueryBuilders.toEsDsl(stringNeExpr).toJson()); } @Test diff --git a/regression-test/data/external_table_p0/es/test_es_query.out b/regression-test/data/external_table_p0/es/test_es_query.out index 78556a3cd46b5d..2e98ee6a174fa6 100644 --- a/regression-test/data/external_table_p0/es/test_es_query.out +++ b/regression-test/data/external_table_p0/es/test_es_query.out @@ -12,8 +12,6 @@ 2022-08-08 2022-08-11T12:10:10 2022-08-11T12:10:10 2022-08-11T12:10:10 2022-08-11T11:10:10 -- !sql04 -- -\N -\N I'm not null or empty -- !sql05 -- @@ -91,7 +89,6 @@ true 1 128 32768 -1 0 1.0 1.0 1.0 1.0 2020-01-01 2020-01-01T12:00 a d 192.168.0. 2022-08-08T20:10:10 -- !sql_6_16 -- -\N I'm not null or empty -- !sql_6_17 -- @@ -168,8 +165,6 @@ value1 value2 2022-08-08T20:10:10 -- !sql_7_19 -- -\N -\N I'm not null or empty -- !sql_7_20 -- @@ -188,7 +183,7 @@ I'm not null or empty [1, 0, 1, 1] [1, -2, -3, 4] ["2020-01-01", "2020-01-02"] ["2020-01-01 12:00:00", "2020-01-02 13:01:01"] [1, 2, 3, 4] [1, 1.1, 1.2, 1.3] [1, 2, 3, 4] [32768, 32769, -32769, -32770] ["192.168.0.1", "127.0.0.1"] ["a", "b", "c"] [-1, 0, 1, 2] ["{"name":"Andy","age":18}", "{"name":"Tim","age":28}"] [1, 2, 3, 4] [128, 129, -129, -130] ["d", "e", "f"] [0, 1, 2, 3] \N I'm not null or empty \N string3 2022-08-09T00:40:10 text3_4*5 5.0 2022-08-08T00:00 2022-08-10T12:10:10 1660104610000 2022-08-10T12:10:10 2022-08-10T20:10:10 3333 [1, 0, 1, 1] [1, -2, -3, 4] ["2020-01-01", "2020-01-02"] ["2020-01-01 12:00:00", "2020-01-02 13:01:01"] [1, 2, 3, 4] [1, 1.1, 1.2, 1.3] [1, 2, 3, 4] [32768, 32769, -32769, -32770] ["192.168.0.1", "127.0.0.1"] ["a", "b", "c"] [-1, 0, 1, 2] ["{"name":"Andy","age":18}", "{"name":"Tim","age":28}"] [1, 2, 3, 4] [128, 129, -129, -130] ["d", "e", "f"] [0, 1, 2, 3] debug \N This string can be quite lengthy string1 2022-08-08T20:10:10 text#1 3.14 2022-08-08T00:00 2022-08-08T12:10:10 1659931810000 2022-08-08T12:10:10 2022-08-08T20:10:10 12345 --- !sql_7_19 -- +-- !sql_7_24 -- value1 value2 -- !sql_8_01 -- @@ -255,8 +250,6 @@ value1 value2 2022-08-08T20:10:10 -- !sql_8_17 -- -\N -\N I'm not null or empty -- !sql_8_18 -- diff --git a/regression-test/suites/external_table_p0/es/test_es_query.groovy b/regression-test/suites/external_table_p0/es/test_es_query.groovy index 9acf67891e52cf..f2af00d6fe66b9 100644 --- a/regression-test/suites/external_table_p0/es/test_es_query.groovy +++ b/regression-test/suites/external_table_p0/es/test_es_query.groovy @@ -242,7 +242,7 @@ suite("test_es_query", "p0,external,es,external_docker,external_docker_es") { } assertTrue(containeHide7) - order_qt_sql_7_19 """select * from test3_20231005""" + order_qt_sql_7_24 """select * from test3_20231005""" sql """switch test_es_query_es8""" order_qt_sql_8_01 """select * from test1 where test2='text#1'"""