diff --git a/be/src/exec/es/es_scroll_parser.cpp b/be/src/exec/es/es_scroll_parser.cpp index 3d02e335787d8c..21583c2a9fdecd 100644 --- a/be/src/exec/es/es_scroll_parser.cpp +++ b/be/src/exec/es/es_scroll_parser.cpp @@ -383,31 +383,39 @@ Status insert_int_value(const rapidjson::Value& col, PrimitiveType type, return Status::OK(); } - if (pure_doc_value && col.IsArray() && !col.Empty()) { - RETURN_ERROR_IF_COL_IS_NOT_NUMBER(col[0], type); - T value = (T)(sizeof(T) < 8 ? col[0].GetInt() : col[0].GetInt64()); - col_ptr->insert_data(const_cast(reinterpret_cast(&value)), 0); + auto parse_and_insert_data = [&](const rapidjson::Value& col_value) -> Status { + StringParser::ParseResult result; + std::string val = col_value.GetString(); + // ES allows inserting numbers and characters containing decimals in numeric types. + // To parse these numbers in Doris, we remove the decimals here. + size_t pos = val.find('.'); + if (pos != std::string::npos) { + val = val.substr(0, pos); + } + size_t len = val.length(); + T v = StringParser::string_to_int(val.c_str(), len, &result); + RETURN_ERROR_IF_PARSING_FAILED(result, col_value, type); + + col_ptr->insert_data(const_cast(reinterpret_cast(&v)), 0); return Status::OK(); + }; + + if (pure_doc_value && col.IsArray() && !col.Empty()) { + if (col.IsNumber()) { + RETURN_ERROR_IF_COL_IS_NOT_NUMBER(col[0], type); + T value = (T)(sizeof(T) < 8 ? col[0].GetInt() : col[0].GetInt64()); + col_ptr->insert_data(const_cast(reinterpret_cast(&value)), 0); + return Status::OK(); + } else { + RETURN_ERROR_IF_COL_IS_ARRAY(col[0], type, true); + RETURN_ERROR_IF_COL_IS_NOT_STRING(col[0], type); + return parse_and_insert_data(col[0]); + } } RETURN_ERROR_IF_COL_IS_ARRAY(col, type, true); RETURN_ERROR_IF_COL_IS_NOT_STRING(col, type); - - StringParser::ParseResult result; - std::string val = col.GetString(); - // ES allows inserting numbers and characters containing decimals in numeric types. - // To parse these numbers in Doris, we remove the decimals here. - size_t pos = val.find("."); - if (pos != std::string::npos) { - val = val.substr(0, pos); - } - size_t len = val.length(); - T v = StringParser::string_to_int(val.c_str(), len, &result); - RETURN_ERROR_IF_PARSING_FAILED(result, col, type); - - col_ptr->insert_data(const_cast(reinterpret_cast(&v)), 0); - - return Status::OK(); + return parse_and_insert_data(col); } ScrollParser::ScrollParser(bool doc_value_mode) : _size(0), _line_index(0) {} diff --git a/regression-test/data/external_table_p0/es/test_es_query.out b/regression-test/data/external_table_p0/es/test_es_query.out index ba0c87245a10da..e820e16553c8e1 100644 --- a/regression-test/data/external_table_p0/es/test_es_query.out +++ b/regression-test/data/external_table_p0/es/test_es_query.out @@ -54,6 +54,13 @@ text3_4*5 text3_4*5 text_ignore_above_10 +-- !sql11 -- +2022-08-08T12:10:10 +2022-08-09T12:10:10 +2022-08-10T12:10:10 +2022-08-11T12:10:10 +2022-08-11T12:10:10 + -- !sql20 -- ["2020-01-01 12:00:00", "2020-01-02 13:01:01"] [-1, 0, 1, 2] [0, 1, 2, 3] ["d", "e", "f"] [128, 129, -129, -130] ["192.168.0.1", "127.0.0.1"] string1 [1, 2, 3, 4] 2022-08-08 2022-08-08T12:10:10 text#1 ["2020-01-01", "2020-01-02"] 3.14 [1, 2, 3, 4] [1, 1.1, 1.2, 1.3] [1, 2, 3, 4] ["a", "b", "c"] [{"name":"Andy","age":18},{"name":"Tim","age":28}] 2022-08-08T12:10:10 2022-08-08T12:10:10 2022-08-08T20:10:10 [1, -2, -3, 4] [1, 0, 1, 1] [32768, 32769, -32769, -32770] [{"last":"Smith","first":"John"},{"last":"White","first":"Alice"}] @@ -81,6 +88,13 @@ text_ignore_above_10 [{"name":"Andy","age":18},{"name":"Tim","age":28}] [{"last":"Smith","first":"John"},{"last":"White","first":"Alice"}] "Andy" "White" [{"name":"Andy","age":18},{"name":"Tim","age":28}] [{"last":"Smith","first":"John"},{"last":"White","first":"Alice"}] "Andy" "White" +-- !sql25 -- +2022-08-08T12:10:10 +2022-08-09T12:10:10 +2022-08-10T12:10:10 +2022-08-11T12:10:10 +2022-08-11T12:10:10 + -- !sql_5_02 -- [1, 0, 1, 1] [1, -2, -3, 4] ["2020-01-01", "2020-01-02"] ["2020-01-01 12:00:00", "2020-01-02 13:01:01"] [1, 2, 3, 4] [1, 1.1, 1.2, 1.3] [1, 2, 3, 4] [32768, 32769, -32769, -32770] ["192.168.0.1", "127.0.0.1"] ["a", "b", "c"] [-1, 0, 1, 2] [{"name":"Andy","age":18},{"name":"Tim","age":28}] [1, 2, 3, 4] [128, 129, -129, -130] ["d", "e", "f"] [0, 1, 2, 3] [{"last":"Smith","first":"John"},{"last":"White","first":"Alice"}] \N string1 text#1 3.14 2022-08-08T00:00 12345 2022-08-08T20:10:10 @@ -182,6 +196,20 @@ text2 text3_4*5 text_ignore_above_10 +-- !sql_5_25 -- +2022-08-08T12:10:10 +2022-08-08T20:10:10 +2022-08-08T20:10:10 +2022-08-08T20:10:10 + +-- !sql_5_26 -- +2022-08-08T12:10:10 +2022-08-08T12:10:10 +2022-08-08T20:10:10 +2022-08-08T20:10:10 +2022-08-08T20:10:10 +2022-08-08T20:10:10 + -- !sql_6_02 -- [1, 0, 1, 1] [1, -2, -3, 4] ["2020-01-01", "2020-01-02"] ["2020-01-01 12:00:00", "2020-01-02 13:01:01"] [1, 2, 3, 4] [1, 1.1, 1.2, 1.3] [1, 2, 3, 4] [32768, 32769, -32769, -32770] ["192.168.0.1", "127.0.0.1"] ["a", "b", "c"] [-1, 0, 1, 2] [{"name":"Andy","age":18},{"name":"Tim","age":28}] [1, 2, 3, 4] [128, 129, -129, -130] ["d", "e", "f"] [0, 1, 2, 3] [{"last":"Smith","first":"John"},{"last":"White","first":"Alice"}] \N string1 text#1 3.14 2022-08-08T00:00 12345 2022-08-08T20:10:10 @@ -283,6 +311,20 @@ text2 text3_4*5 text_ignore_above_10 +-- !sql_6_25 -- +2022-08-08T12:10:10 +2022-08-08T20:10:10 +2022-08-08T20:10:10 +2022-08-08T20:10:10 + +-- !sql_6_26 -- +2022-08-08T12:10:10 +2022-08-08T12:10:10 +2022-08-08T20:10:10 +2022-08-08T20:10:10 +2022-08-08T20:10:10 +2022-08-08T20:10:10 + -- !sql_7_02 -- [1, 0, 1, 1] [1, -2, -3, 4] ["2020-01-01", "2020-01-02"] ["2020-01-01 12:00:00", "2020-01-02 13:01:01"] [1, 2, 3, 4] [1, 1.1, 1.2, 1.3] [1, 2, 3, 4] [32768, 32769, -32769, -32770] ["192.168.0.1", "127.0.0.1"] ["a", "b", "c"] [-1, 0, 1, 2] [{"name":"Andy","age":18},{"name":"Tim","age":28}] [1, 2, 3, 4] [128, 129, -129, -130] ["d", "e", "f"] [0, 1, 2, 3] [{"last":"Smith","first":"John"},{"last":"White","first":"Alice"}] debug \N This string can be quite lengthy string1 2022-08-08T20:10:10 text#1 3.14 2022-08-08T00:00 2022-08-08T12:10:10 1659931810000 2022-08-08T12:10:10 2022-08-08T20:10:10 12345 @@ -423,6 +465,23 @@ text3_4*5 text3_4*5 text_ignore_above_10 +-- !sql_7_32 -- +1659931810000 +1660018210000 +1660104610000 +1660191010000 +1660191010000 + +-- !sql_7_33 -- +1659931810000 +1659931810000 +1660018210000 +1660018210000 +1660104610000 +1660104610000 +1660191010000 +1660191010000 + -- !sql_7_50 -- value1 value2 @@ -563,6 +622,23 @@ text3_4*5 text3_4*5 text_ignore_above_10 +-- !sql_8_30 -- +1659931810000 +1660018210000 +1660104610000 +1660191010000 +1660191010000 + +-- !sql_8_31 -- +1659931810000 +1659931810000 +1660018210000 +1660018210000 +1660104610000 +1660104610000 +1660191010000 +1660191010000 + -- !sql01 -- ["2020-01-01 12:00:00", "2020-01-02 13:01:01"] [-1, 0, 1, 2] [0, 1, 2, 3] ["d", "e", "f"] [128, 129, -129, -130] ["192.168.0.1", "127.0.0.1"] string1 [1, 2, 3, 4] 2022-08-08 2022-08-08T12:10:10 text#1 ["2020-01-01", "2020-01-02"] 3.14 [1, 2, 3, 4] [1, 1.1, 1.2, 1.3] [1, 2, 3, 4] ["a", "b", "c"] [{"name":"Andy","age":18},{"name":"Tim","age":28}] 2022-08-08T12:10:10 2022-08-08T12:10:10 2022-08-08T20:10:10 [1, -2, -3, 4] [1, 0, 1, 1] [32768, 32769, -32769, -32770] \N [{"last":"Smith","first":"John"},{"last":"White","first":"Alice"}] @@ -618,6 +694,13 @@ text3_4*5 text3_4*5 text_ignore_above_10 +-- !sql11 -- +2022-08-08T12:10:10 +2022-08-09T12:10:10 +2022-08-10T12:10:10 +2022-08-11T12:10:10 +2022-08-11T12:10:10 + -- !sql20 -- ["2020-01-01 12:00:00", "2020-01-02 13:01:01"] [-1, 0, 1, 2] [0, 1, 2, 3] ["d", "e", "f"] [128, 129, -129, -130] ["192.168.0.1", "127.0.0.1"] string1 [1, 2, 3, 4] 2022-08-08 2022-08-08T12:10:10 text#1 ["2020-01-01", "2020-01-02"] 3.14 [1, 2, 3, 4] [1, 1.1, 1.2, 1.3] [1, 2, 3, 4] ["a", "b", "c"] [{"name":"Andy","age":18},{"name":"Tim","age":28}] 2022-08-08T12:10:10 2022-08-08T12:10:10 2022-08-08T20:10:10 [1, -2, -3, 4] [1, 0, 1, 1] [32768, 32769, -32769, -32770] [{"last":"Smith","first":"John"},{"last":"White","first":"Alice"}] @@ -645,6 +728,13 @@ text_ignore_above_10 [{"name":"Andy","age":18},{"name":"Tim","age":28}] [{"last":"Smith","first":"John"},{"last":"White","first":"Alice"}] "Andy" "White" [{"name":"Andy","age":18},{"name":"Tim","age":28}] [{"last":"Smith","first":"John"},{"last":"White","first":"Alice"}] "Andy" "White" +-- !sql25 -- +2022-08-08T12:10:10 +2022-08-09T12:10:10 +2022-08-10T12:10:10 +2022-08-11T12:10:10 +2022-08-11T12:10:10 + -- !sql_5_02 -- [1, 0, 1, 1] [1, -2, -3, 4] ["2020-01-01", "2020-01-02"] ["2020-01-01 12:00:00", "2020-01-02 13:01:01"] [1, 2, 3, 4] [1, 1.1, 1.2, 1.3] [1, 2, 3, 4] [32768, 32769, -32769, -32770] ["192.168.0.1", "127.0.0.1"] ["a", "b", "c"] [-1, 0, 1, 2] [{"name":"Andy","age":18},{"name":"Tim","age":28}] [1, 2, 3, 4] [128, 129, -129, -130] ["d", "e", "f"] [0, 1, 2, 3] [{"last":"Smith","first":"John"},{"last":"White","first":"Alice"}] \N string1 text#1 3.14 2022-08-08T00:00 12345 2022-08-08T20:10:10 @@ -746,6 +836,20 @@ text2 text3_4*5 text_ignore_above_10 +-- !sql_5_25 -- +2022-08-08T12:10:10 +2022-08-08T20:10:10 +2022-08-08T20:10:10 +2022-08-08T20:10:10 + +-- !sql_5_26 -- +2022-08-08T12:10:10 +2022-08-08T12:10:10 +2022-08-08T20:10:10 +2022-08-08T20:10:10 +2022-08-08T20:10:10 +2022-08-08T20:10:10 + -- !sql_6_02 -- [1, 0, 1, 1] [1, -2, -3, 4] ["2020-01-01", "2020-01-02"] ["2020-01-01 12:00:00", "2020-01-02 13:01:01"] [1, 2, 3, 4] [1, 1.1, 1.2, 1.3] [1, 2, 3, 4] [32768, 32769, -32769, -32770] ["192.168.0.1", "127.0.0.1"] ["a", "b", "c"] [-1, 0, 1, 2] [{"name":"Andy","age":18},{"name":"Tim","age":28}] [1, 2, 3, 4] [128, 129, -129, -130] ["d", "e", "f"] [0, 1, 2, 3] [{"last":"Smith","first":"John"},{"last":"White","first":"Alice"}] \N string1 text#1 3.14 2022-08-08T00:00 12345 2022-08-08T20:10:10 @@ -847,6 +951,20 @@ text2 text3_4*5 text_ignore_above_10 +-- !sql_6_25 -- +2022-08-08T12:10:10 +2022-08-08T20:10:10 +2022-08-08T20:10:10 +2022-08-08T20:10:10 + +-- !sql_6_26 -- +2022-08-08T12:10:10 +2022-08-08T12:10:10 +2022-08-08T20:10:10 +2022-08-08T20:10:10 +2022-08-08T20:10:10 +2022-08-08T20:10:10 + -- !sql_7_02 -- [1, 0, 1, 1] [1, -2, -3, 4] ["2020-01-01", "2020-01-02"] ["2020-01-01 12:00:00", "2020-01-02 13:01:01"] [1, 2, 3, 4] [1, 1.1, 1.2, 1.3] [1, 2, 3, 4] [32768, 32769, -32769, -32770] ["192.168.0.1", "127.0.0.1"] ["a", "b", "c"] [-1, 0, 1, 2] [{"name":"Andy","age":18},{"name":"Tim","age":28}] [1, 2, 3, 4] [128, 129, -129, -130] ["d", "e", "f"] [0, 1, 2, 3] [{"last":"Smith","first":"John"},{"last":"White","first":"Alice"}] debug \N This string can be quite lengthy string1 2022-08-08T20:10:10 text#1 3.14 2022-08-08T00:00 2022-08-08T12:10:10 1659931810000 2022-08-08T12:10:10 2022-08-08T20:10:10 12345 @@ -987,6 +1105,23 @@ text3_4*5 text3_4*5 text_ignore_above_10 +-- !sql_7_32 -- +1659931810000 +1660018210000 +1660104610000 +1660191010000 +1660191010000 + +-- !sql_7_33 -- +1659931810000 +1659931810000 +1660018210000 +1660018210000 +1660104610000 +1660104610000 +1660191010000 +1660191010000 + -- !sql_7_50 -- value1 value2 @@ -1127,3 +1262,20 @@ text3_4*5 text3_4*5 text_ignore_above_10 +-- !sql_8_30 -- +1659931810000 +1660018210000 +1660104610000 +1660191010000 +1660191010000 + +-- !sql_8_31 -- +1659931810000 +1659931810000 +1660018210000 +1660018210000 +1660104610000 +1660104610000 +1660191010000 +1660191010000 + diff --git a/regression-test/suites/external_table_p0/es/test_es_query.groovy b/regression-test/suites/external_table_p0/es/test_es_query.groovy index 55547e4eb25778..af1f270fd52c8a 100644 --- a/regression-test/suites/external_table_p0/es/test_es_query.groovy +++ b/regression-test/suites/external_table_p0/es/test_es_query.groovy @@ -177,12 +177,14 @@ suite("test_es_query", "p0,external,es,external_docker,external_docker_es") { order_qt_sql08 """select c_person, c_user, json_extract(c_person, '\$.[0].name'), json_extract(c_user, '\$.[1].last') from test_v1;""" order_qt_sql09 """select test1 from test_v1;""" order_qt_sql10 """select test2 from test_v1;""" + order_qt_sql11 """select test6 from test_v1;""" order_qt_sql20 """select * from test_v2 where test2='text#1'""" order_qt_sql21 """select * from test_v2 where esquery(test2, '{"match":{"test2":"text#1"}}')""" order_qt_sql22 """select test4,test5,test6,test7,test8 from test_v2 order by test8""" order_qt_sql23 """select * from test_v2 where esquery(c_long, '{"term":{"c_long":"-1"}}');""" order_qt_sql24 """select c_person, c_user, json_extract(c_person, '\$.[0].name'), json_extract(c_user, '\$.[1].last') from test_v2;""" + order_qt_sql25 """select test6 from test_v2;""" sql """switch test_es_query_es5""" order_qt_sql_5_02 """select * from test1 where test2='text#1'""" @@ -208,6 +210,8 @@ suite("test_es_query", "p0,external,es,external_docker,external_docker_es") { order_qt_sql_5_22 """select test6, substring(test6, 1, 13) from test2 where substring(test6, 1, 13) = '2022-08-08 12' limit 4;""" order_qt_sql_5_23 """select test1 from test1;""" order_qt_sql_5_24 """select test2 from test1;""" + order_qt_sql_5_25 """select test6 from test1;""" + order_qt_sql_5_26 """select test6 from test2;""" try { sql """select * from composite_type_array;""" fail("Should not reach here") @@ -241,6 +245,8 @@ suite("test_es_query", "p0,external,es,external_docker,external_docker_es") { order_qt_sql_6_22 """select test6, substring(test6, 1, 13) from test2 where substring(test6, 1, 13) = '2022-08-08 12' limit 4;""" order_qt_sql_6_23 """select test1 from test1;""" order_qt_sql_6_24 """select test2 from test1;""" + order_qt_sql_6_25 """select test6 from test1;""" + order_qt_sql_6_26 """select test6 from test2;""" try { sql """select * from composite_type_array;""" fail("Should not reach here") @@ -299,6 +305,8 @@ suite("test_es_query", "p0,external,es,external_docker,external_docker_es") { order_qt_sql_7_29 """select test7,substring(test7, 1, 10) from test2 where substring(test7, 1, 10)='2022-08-11' limit 2;""" order_qt_sql_7_30 """select test1 from test1;""" order_qt_sql_7_31 """select test2 from test1;""" + order_qt_sql_7_32 """select test6 from test1;""" + order_qt_sql_7_33 """select test6 from test2;""" try { sql """select * from composite_type_array;""" fail("Should not reach here") @@ -357,6 +365,8 @@ suite("test_es_query", "p0,external,es,external_docker,external_docker_es") { order_qt_sql_8_27 """select test7,substring(test7, 1, 10) from test2 where substring(test7, 1, 10)='2022-08-11' limit 2;""" order_qt_sql_8_28 """select test1 from test1;""" order_qt_sql_8_29 """select test2 from test1;""" + order_qt_sql_8_30 """select test6 from test1;""" + order_qt_sql_8_31 """select test6 from test2;""" try { sql """select * from composite_type_array;""" fail("Should not reach here")