diff --git a/be/src/exec/es/es_scroll_parser.cpp b/be/src/exec/es/es_scroll_parser.cpp index f0b73c0323a34d..720bd50215ab71 100644 --- a/be/src/exec/es/es_scroll_parser.cpp +++ b/be/src/exec/es/es_scroll_parser.cpp @@ -17,6 +17,7 @@ #include "exec/es/es_scroll_parser.h" +#include #include #include @@ -138,14 +139,6 @@ static const std::string INVALID_NULL_VALUE = return Status::RuntimeError(ss.str()); \ } while (false) -#define PARSE_DATE(dt_val, col, type, is_date_str) \ - if ((is_date_str && \ - !dt_val.from_date_str(static_cast(col.GetString()).c_str(), \ - col.GetStringLength())) || \ - (!is_date_str && !dt_val.from_unixtime(col.GetInt64() / 1000, "+08:00"))) { \ - RETURN_ERROR_IF_CAST_FORMAT_ERROR(col, type); \ - } - template static Status get_int_value(const rapidjson::Value& col, PrimitiveType type, void* slot, bool pure_doc_value) { @@ -154,7 +147,7 @@ static Status get_int_value(const rapidjson::Value& col, PrimitiveType type, voi return Status::OK(); } - if (pure_doc_value && col.IsArray()) { + if (pure_doc_value && col.IsArray() && !col.Empty()) { RETURN_ERROR_IF_COL_IS_NOT_NUMBER(col[0], type); *reinterpret_cast(slot) = (T)(sizeof(T) < 8 ? col[0].GetInt() : col[0].GetInt64()); return Status::OK(); @@ -184,8 +177,54 @@ static Status get_date_value_int(const rapidjson::Value& col, PrimitiveType type RT* slot) { constexpr bool is_datetime_v1 = std::is_same_v; T dt_val; - PARSE_DATE(dt_val, col, type, is_date_str) + if (is_date_str) { + const std::string str_date = col.GetString(); + int str_length = col.GetStringLength(); + bool success = false; + // YYYY-MM-DDTHH:MM:SSZ or YYYY-MM-DDTHH:MM:SS+08:00 or 2022-08-08T12:10:10.000Z + if (str_length > 19) { + std::chrono::system_clock::time_point tp; + const bool ok = + cctz::parse("%Y-%m-%dT%H:%M:%E*S%Ez", str_date, cctz::utc_time_zone(), &tp); + if (ok) { + success = dt_val.from_unixtime(std::chrono::system_clock::to_time_t(tp), + cctz::local_time_zone()); + } + } else if (str_length == 19) { + // YYYY-MM-DDTHH:MM:SS + if (*(str_date.c_str() + 10) == 'T') { + std::chrono::system_clock::time_point tp; + const bool ok = + cctz::parse("%Y-%m-%dT%H:%M:%S", str_date, cctz::utc_time_zone(), &tp); + if (ok) { + success = dt_val.from_unixtime(std::chrono::system_clock::to_time_t(tp), + cctz::local_time_zone()); + } + } else { + // YYYY-MM-DD HH:MM:SS + success = dt_val.from_date_str(str_date.c_str(), str_length); + } + } else if (str_length == 13) { + // string long like "1677895728000" + int64_t time_long = std::atol(str_date.c_str()); + if (time_long > 0) { + success = dt_val.from_unixtime(time_long / 1000, cctz::local_time_zone()); + } + } else { + // YYYY-MM-DD or others + success = dt_val.from_date_str(str_date.c_str(), str_length); + } + + if (!success) { + RETURN_ERROR_IF_CAST_FORMAT_ERROR(col, type); + } + + } else { + if (!dt_val.from_unixtime(col.GetInt64() / 1000, cctz::local_time_zone())) { + RETURN_ERROR_IF_CAST_FORMAT_ERROR(col, type); + } + } if constexpr (is_datetime_v1) { if (type == TYPE_DATE) { dt_val.cast_to_date(); @@ -207,7 +246,7 @@ static Status get_date_int(const rapidjson::Value& col, PrimitiveType type, bool // processing date type field, if a number is encountered, Doris On ES will force it to be processed according to ms // Doris On ES needs to be consistent with ES, so just divided by 1000 because the unit for from_unixtime is seconds return get_date_value_int(col, type, false, slot); - } else if (col.IsArray() && pure_doc_value) { + } else if (col.IsArray() && pure_doc_value && !col.Empty()) { // this would happened just only when `enable_docvalue_scan = true` // ES add default format for all field after ES 6.4, if we not provided format for `date` field ES would impose // a standard date-format for date field as `2020-06-16T00:00:00.000Z` @@ -243,7 +282,7 @@ static Status get_float_value(const rapidjson::Value& col, PrimitiveType type, v return Status::OK(); } - if (pure_doc_value && col.IsArray()) { + if (pure_doc_value && col.IsArray() && !col.Empty()) { *reinterpret_cast(slot) = (T)(sizeof(T) == 4 ? col[0].GetFloat() : col[0].GetDouble()); return Status::OK(); } @@ -271,7 +310,7 @@ static Status insert_float_value(const rapidjson::Value& col, PrimitiveType type return Status::OK(); } - if (pure_doc_value && col.IsArray() && nullable) { + if (pure_doc_value && col.IsArray() && !col.Empty() && nullable) { T value = (T)(sizeof(T) == 4 ? col[0].GetFloat() : col[0].GetDouble()); col_ptr->insert_data(const_cast(reinterpret_cast(&value)), 0); return Status::OK(); @@ -300,7 +339,7 @@ static Status insert_int_value(const rapidjson::Value& col, PrimitiveType type, return Status::OK(); } - if (pure_doc_value && col.IsArray()) { + if (pure_doc_value && col.IsArray() && !col.Empty()) { RETURN_ERROR_IF_COL_IS_NOT_NUMBER(col[0], type); T value = (T)(sizeof(T) < 8 ? col[0].GetInt() : col[0].GetInt64()); col_ptr->insert_data(const_cast(reinterpret_cast(&value)), 0); @@ -438,7 +477,9 @@ Status ScrollParser::fill_columns(const TupleDescriptor* tuple_desc, // this may be a tricky, but we can workaround this issue std::string val; if (pure_doc_value) { - if (!col[0].IsString()) { + if (col.Empty()) { + break; + } else if (!col[0].IsString()) { val = json_value_to_string(col[0]); } else { val = col[0].GetString(); @@ -507,11 +548,11 @@ Status ScrollParser::fill_columns(const TupleDescriptor* tuple_desc, } bool is_nested_str = false; - if (pure_doc_value && col.IsArray() && col[0].IsBool()) { + if (pure_doc_value && col.IsArray() && !col.Empty() && col[0].IsBool()) { int8_t val = col[0].GetBool(); col_ptr->insert_data(const_cast(reinterpret_cast(&val)), 0); break; - } else if (pure_doc_value && col.IsArray() && col[0].IsString()) { + } else if (pure_doc_value && col.IsArray() && !col.Empty() && col[0].IsString()) { is_nested_str = true; } else if (pure_doc_value && col.IsArray()) { return Status::InternalError(ERROR_INVALID_COL_DATA, "BOOLEAN"); @@ -537,7 +578,9 @@ Status ScrollParser::fill_columns(const TupleDescriptor* tuple_desc, } else { std::string val; if (pure_doc_value) { - if (!col[0].IsString()) { + if (col.Empty()) { + break; + } else if (!col[0].IsString()) { val = json_value_to_string(col[0]); } else { val = col[0].GetString(); @@ -580,19 +623,11 @@ Status ScrollParser::fill_columns(const TupleDescriptor* tuple_desc, case TYPE_VARCHAR: case TYPE_STRING: { std::string val; - if (pure_doc_value) { - if (!sub_col[0].IsString()) { - val = json_value_to_string(sub_col[0]); - } else { - val = sub_col[0].GetString(); - } + RETURN_ERROR_IF_COL_IS_ARRAY(sub_col, sub_type); + if (!sub_col.IsString()) { + val = json_value_to_string(sub_col); } else { - RETURN_ERROR_IF_COL_IS_ARRAY(sub_col, type); - if (!sub_col.IsString()) { - val = json_value_to_string(sub_col); - } else { - val = sub_col.GetString(); - } + val = sub_col.GetString(); } array.push_back(val); break; @@ -656,10 +691,12 @@ Status ScrollParser::fill_columns(const TupleDescriptor* tuple_desc, } bool is_nested_str = false; - if (pure_doc_value && sub_col.IsArray() && sub_col[0].IsBool()) { + if (pure_doc_value && sub_col.IsArray() && !sub_col.Empty() && + sub_col[0].IsBool()) { array.push_back(sub_col[0].GetBool()); break; - } else if (pure_doc_value && sub_col.IsArray() && sub_col[0].IsString()) { + } else if (pure_doc_value && sub_col.IsArray() && !sub_col.Empty() && + sub_col[0].IsString()) { is_nested_str = true; } else if (pure_doc_value && sub_col.IsArray()) { return Status::InternalError(ERROR_INVALID_COL_DATA, "BOOLEAN"); diff --git a/be/src/runtime/thread_context.cpp b/be/src/runtime/thread_context.cpp index 371067279164bd..6b422aca6fdf2a 100644 --- a/be/src/runtime/thread_context.cpp +++ b/be/src/runtime/thread_context.cpp @@ -59,7 +59,7 @@ AttachTask::~AttachTask() { SwitchThreadMemTrackerLimiter::SwitchThreadMemTrackerLimiter( const std::shared_ptr& mem_tracker) { - SwitchBthreadLocal::switch_to_bthread_local(); + SwitchBthreadLocal::switch_to_bthread_local(); _old_mem_tracker = thread_context()->thread_mem_tracker_mgr->limiter_mem_tracker(); thread_context()->thread_mem_tracker_mgr->attach_limiter_tracker(mem_tracker, TUniqueId()); } diff --git a/docker/thirdparties/docker-compose/elasticsearch/scripts/data/data1.json b/docker/thirdparties/docker-compose/elasticsearch/scripts/data/data1.json index 12bcbb0d5c8072..030246786820ed 100755 --- a/docker/thirdparties/docker-compose/elasticsearch/scripts/data/data1.json +++ b/docker/thirdparties/docker-compose/elasticsearch/scripts/data/data1.json @@ -6,6 +6,7 @@ "test5": "2022-08-08 12:10:10", "test6": 1659931810000, "test7": 1659931810000, + "test8": "2022-08-08T12:10:10Z", "c_bool": [true, false, true, true], "c_byte": [1, -2, -3, 4], "c_short": [128, 129, -129, -130], diff --git a/docker/thirdparties/docker-compose/elasticsearch/scripts/data/data2.json b/docker/thirdparties/docker-compose/elasticsearch/scripts/data/data2.json index 3b9ebdc6f2f369..c6f7d3e2f6ada6 100755 --- a/docker/thirdparties/docker-compose/elasticsearch/scripts/data/data2.json +++ b/docker/thirdparties/docker-compose/elasticsearch/scripts/data/data2.json @@ -6,6 +6,7 @@ "test5": "2022-08-09 12:10:10", "test6": 1660018210000, "test7": "2022-08-09 12:10:10", + "test8": 1660018210000, "c_bool": [true, false, true, true], "c_byte": [1, -2, -3, 4], "c_short": [128, 129, -129, -130], diff --git a/docker/thirdparties/docker-compose/elasticsearch/scripts/data/data3.json b/docker/thirdparties/docker-compose/elasticsearch/scripts/data/data3.json index 9c10c2cf2a9ac7..73631d826185c7 100755 --- a/docker/thirdparties/docker-compose/elasticsearch/scripts/data/data3.json +++ b/docker/thirdparties/docker-compose/elasticsearch/scripts/data/data3.json @@ -4,8 +4,9 @@ "test3": 5.0, "test4": "2022-08-08", "test5": "2022-08-10 12:10:10", - "test6": 1660018210000, - "test7": "2022-08-10 12:10:10", + "test6": 1660104610000, + "test7": 1660104610000, + "test8": "2022-08-10T12:10:10", "c_bool": [true, false, true, true], "c_byte": [1, -2, -3, 4], "c_short": [128, 129, -129, -130], diff --git a/docker/thirdparties/docker-compose/elasticsearch/scripts/data/data4.json b/docker/thirdparties/docker-compose/elasticsearch/scripts/data/data4.json new file mode 100755 index 00000000000000..954ca884d61567 --- /dev/null +++ b/docker/thirdparties/docker-compose/elasticsearch/scripts/data/data4.json @@ -0,0 +1,29 @@ +{ + "test1": "string4", + "test2": "text3_4*5", + "test3": 6.0, + "test4": "2022-08-08", + "test5": "2022-08-11 12:10:10", + "test6": 1660191010000, + "test7": "2022-08-11 12:10:10", + "test8": "2022-08-11T12:10:10+09:00", + "c_bool": [true, false, true, true], + "c_byte": [1, -2, -3, 4], + "c_short": [128, 129, -129, -130], + "c_integer": [32768, 32769, -32769, -32770], + "c_long": [-1, 0, 1, 2], + "c_unsigned_long": [0, 1, 2, 3], + "c_float": [1.0, 1.1, 1.2, 1.3], + "c_half_float": [1, 2, 3, 4], + "c_double": [1, 2, 3, 4], + "c_scaled_float": [1, 2, 3, 4], + "c_date": ["2020-01-01", "2020-01-02"], + "c_datetime": ["2020-01-01 12:00:00", "2020-01-02 13:01:01"], + "c_keyword": ["a", "b", "c"], + "c_text": ["d", "e", "f"], + "c_ip": ["192.168.0.1", "127.0.0.1"], + "c_person": [ + {"name": "Andy", "age": 18}, + {"name": "Tim", "age": 28} + ] +} \ No newline at end of file diff --git a/docker/thirdparties/docker-compose/elasticsearch/scripts/es_init.sh b/docker/thirdparties/docker-compose/elasticsearch/scripts/es_init.sh index 13947a064c0792..8ddb125e327ff2 100755 --- a/docker/thirdparties/docker-compose/elasticsearch/scripts/es_init.sh +++ b/docker/thirdparties/docker-compose/elasticsearch/scripts/es_init.sh @@ -20,12 +20,13 @@ # create index test1 # shellcheck disable=SC2154 curl "http://${ES_6_HOST}:9200/test1" -H "Content-Type:application/json" -X PUT -d "@/mnt/scripts/index/es6_test1.json" -# create index test2 +# create index test2_20220808 curl "http://${ES_6_HOST}:9200/test2_20220808" -H "Content-Type:application/json" -X PUT -d '@/mnt/scripts/index/es6_test2.json' -# put data +# put data for test1 curl "http://${ES_6_HOST}:9200/test1/doc/1" -H "Content-Type:application/json" -X POST -d '@/mnt/scripts/data/data1_es6.json' curl "http://${ES_6_HOST}:9200/test1/doc/2" -H "Content-Type:application/json" -X POST -d '@/mnt/scripts/data/data2_es6.json' curl "http://${ES_6_HOST}:9200/test1/doc/3" -H "Content-Type:application/json" -X POST -d '@/mnt/scripts/data/data3_es6.json' +# put data for test2_20220808 curl "http://${ES_6_HOST}:9200/test2_20220808/doc/1" -H "Content-Type:application/json" -X POST -d '@/mnt/scripts/data/data1_es6.json' curl "http://${ES_6_HOST}:9200/test2_20220808/doc/2" -H "Content-Type:application/json" -X POST -d '@/mnt/scripts/data/data2_es6.json' curl "http://${ES_6_HOST}:9200/test2_20220808/doc/3" -H "Content-Type:application/json" -X POST -d '@/mnt/scripts/data/data3_es6.json' @@ -36,15 +37,19 @@ curl "http://${ES_6_HOST}:9200/test2_20220808/doc/_mapping" -H "Content-Type:app # es7 # create index test1 curl "http://${ES_7_HOST}:9200/test1" -H "Content-Type:application/json" -X PUT -d "@/mnt/scripts/index/es7_test1.json" -# create index test2 +# create index test2_20220808 curl "http://${ES_7_HOST}:9200/test2_20220808" -H "Content-Type:application/json" -X PUT -d '@/mnt/scripts/index/es7_test2.json' -# put data +# put data for tese1 curl "http://${ES_7_HOST}:9200/test1/_doc/1" -H "Content-Type:application/json" -X POST -d '@/mnt/scripts/data/data1.json' curl "http://${ES_7_HOST}:9200/test1/_doc/2" -H "Content-Type:application/json" -X POST -d '@/mnt/scripts/data/data2.json' curl "http://${ES_7_HOST}:9200/test1/_doc/3" -H "Content-Type:application/json" -X POST -d '@/mnt/scripts/data/data3.json' +curl "http://${ES_7_HOST}:9200/test1/_doc/4" -H "Content-Type:application/json" -X POST -d '@/mnt/scripts/data/data4.json' +# put data for test2_20220808 curl "http://${ES_7_HOST}:9200/test2_20220808/_doc/1" -H "Content-Type:application/json" -X POST -d '@/mnt/scripts/data/data1.json' curl "http://${ES_7_HOST}:9200/test2_20220808/_doc/2" -H "Content-Type:application/json" -X POST -d '@/mnt/scripts/data/data2.json' curl "http://${ES_7_HOST}:9200/test2_20220808/_doc/3" -H "Content-Type:application/json" -X POST -d '@/mnt/scripts/data/data3.json' +curl "http://${ES_7_HOST}:9200/test2_20220808/_doc/4" -H "Content-Type:application/json" -X POST -d '@/mnt/scripts/data/data4.json' + # put _meta for array curl "http://${ES_7_HOST}:9200/test1/_mapping" -H "Content-Type:application/json" -X PUT -d "@/mnt/scripts/index/array_meta.json" curl "http://${ES_7_HOST}:9200/test2_20220808/_mapping" -H "Content-Type:application/json" -X PUT -d "@/mnt/scripts/index/array_meta.json" @@ -52,15 +57,19 @@ curl "http://${ES_7_HOST}:9200/test2_20220808/_mapping" -H "Content-Type:applica # es8 # create index test1 curl "http://${ES_8_HOST}:9200/test1" -H "Content-Type:application/json" -X PUT -d "@/mnt/scripts/index/es7_test1.json" -# create index test2 +# create index test2_20220808 curl "http://${ES_8_HOST}:9200/test2_20220808" -H "Content-Type:application/json" -X PUT -d '@/mnt/scripts/index/es7_test2.json' -# put data +# put data for tese1 curl "http://${ES_8_HOST}:9200/test1/_doc/1" -H "Content-Type:application/json" -X POST -d '@/mnt/scripts/data/data1.json' curl "http://${ES_8_HOST}:9200/test1/_doc/2" -H "Content-Type:application/json" -X POST -d '@/mnt/scripts/data/data2.json' curl "http://${ES_8_HOST}:9200/test1/_doc/3" -H "Content-Type:application/json" -X POST -d '@/mnt/scripts/data/data3.json' +curl "http://${ES_8_HOST}:9200/test1/_doc/4" -H "Content-Type:application/json" -X POST -d '@/mnt/scripts/data/data4.json' +# put data for test2_20220808 curl "http://${ES_8_HOST}:9200/test2_20220808/_doc/1" -H "Content-Type:application/json" -X POST -d '@/mnt/scripts/data/data1.json' curl "http://${ES_8_HOST}:9200/test2_20220808/_doc/2" -H "Content-Type:application/json" -X POST -d '@/mnt/scripts/data/data2.json' curl "http://${ES_8_HOST}:9200/test2_20220808/_doc/3" -H "Content-Type:application/json" -X POST -d '@/mnt/scripts/data/data3.json' +curl "http://${ES_8_HOST}:9200/test2_20220808/_doc/4" -H "Content-Type:application/json" -X POST -d '@/mnt/scripts/data/data4.json' + # put _meta for array curl "http://${ES_8_HOST}:9200/test1/_mapping" -H "Content-Type:application/json" -X PUT -d "@/mnt/scripts/index/array_meta.json" curl "http://${ES_8_HOST}:9200/test2_20220808/_mapping" -H "Content-Type:application/json" -X PUT -d "@/mnt/scripts/index/array_meta.json" diff --git a/docker/thirdparties/docker-compose/elasticsearch/scripts/index/es7_test1.json b/docker/thirdparties/docker-compose/elasticsearch/scripts/index/es7_test1.json index ebc4227a203b7b..4f49a1db8b4ad8 100755 --- a/docker/thirdparties/docker-compose/elasticsearch/scripts/index/es7_test1.json +++ b/docker/thirdparties/docker-compose/elasticsearch/scripts/index/es7_test1.json @@ -35,6 +35,9 @@ "type": "date", "format": "yyyy-MM-dd HH:mm:ss || epoch_millis" }, + "test8": { + "type": "date" + }, "c_bool": { "type": "boolean" }, diff --git a/docker/thirdparties/docker-compose/elasticsearch/scripts/index/es7_test2.json b/docker/thirdparties/docker-compose/elasticsearch/scripts/index/es7_test2.json index 00143131d001a3..166331f5ccfdc1 100755 --- a/docker/thirdparties/docker-compose/elasticsearch/scripts/index/es7_test2.json +++ b/docker/thirdparties/docker-compose/elasticsearch/scripts/index/es7_test2.json @@ -38,6 +38,9 @@ "type": "date", "format": "yyyy-MM-dd HH:mm:ss || epoch_millis" }, + "test8": { + "type": "date" + }, "c_bool": { "type": "boolean" }, diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/EsResource.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/EsResource.java index 91cb4529ad73c7..84f51cb820b7d2 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/EsResource.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/EsResource.java @@ -136,6 +136,15 @@ public static void valid(Map properties, boolean isAlter) throws } } + public static void fillUrlsWithSchema(String[] urls, boolean isSslEnabled) { + for (int i = 0; i < urls.length; i++) { + String seed = urls[i].trim(); + if (!seed.startsWith("http://") && !seed.startsWith("https://")) { + urls[i] = (isSslEnabled ? "https://" : "http://") + seed; + } + } + } + private Map processCompatibleProperties(Map props) { // Compatible with ES catalog properties Map properties = Maps.newHashMap(props); diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/EsTable.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/EsTable.java index 5d771638577c67..b9dbc6ff645916 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/EsTable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/EsTable.java @@ -151,8 +151,6 @@ public List needCompatDateFields() { private void validate(Map properties) throws DdlException { EsResource.valid(properties, false); - hosts = properties.get(EsResource.HOSTS).trim(); - seeds = hosts.split(","); if (properties.containsKey(EsResource.USER)) { userName = properties.get(EsResource.USER).trim(); } @@ -198,6 +196,12 @@ private void validate(Map properties) throws DdlException { maxDocValueFields = DEFAULT_MAX_DOCVALUE_FIELDS; } } + + hosts = properties.get(EsResource.HOSTS).trim(); + seeds = hosts.split(","); + // parse httpSslEnabled before use it here. + EsResource.fillUrlsWithSchema(seeds, httpSslEnabled); + tableContext.put("hosts", hosts); tableContext.put("userName", userName); tableContext.put("passwd", passwd); diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/EsExternalCatalog.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/EsExternalCatalog.java index 650e5374e5fb6d..52f9bc984fa031 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/EsExternalCatalog.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/EsExternalCatalog.java @@ -70,7 +70,11 @@ private Map processCompatibleProperties(Map prop public String[] getNodes() { String hosts = catalogProperty.getOrDefault(EsResource.HOSTS, ""); - return hosts.trim().split(","); + String sslEnabled = + catalogProperty.getOrDefault(EsResource.HTTP_SSL_ENABLED, EsResource.HTTP_SSL_ENABLED_DEFAULT_VALUE); + String[] hostUrls = hosts.trim().split(","); + EsResource.fillUrlsWithSchema(hostUrls, Boolean.parseBoolean(sslEnabled)); + return hostUrls; } public String getUsername() { diff --git a/fe/fe-core/src/main/java/org/apache/doris/external/elasticsearch/EsUtil.java b/fe/fe-core/src/main/java/org/apache/doris/external/elasticsearch/EsUtil.java index ef26eaf0e43365..f2980133528e5b 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/external/elasticsearch/EsUtil.java +++ b/fe/fe-core/src/main/java/org/apache/doris/external/elasticsearch/EsUtil.java @@ -303,12 +303,13 @@ private static Type parseEsDateType(Column column, ObjectNode field) { boolean bigIntFlag = false; for (String format : formats) { // pre-check format - if (!ALLOW_DATE_FORMATS.contains(format)) { + String trimFormat = format.trim(); + if (!ALLOW_DATE_FORMATS.contains(trimFormat)) { column.setComment( - "Elasticsearch type is date, format is " + format + " not support, use String type"); + "Elasticsearch type is date, format is " + trimFormat + " not support, use String type"); return ScalarType.createStringType(); } - switch (format) { + switch (trimFormat) { case "yyyy-MM-dd HH:mm:ss": dateTimeFlag = true; break; diff --git a/fe/fe-core/src/main/java/org/apache/doris/external/elasticsearch/QueryBuilders.java b/fe/fe-core/src/main/java/org/apache/doris/external/elasticsearch/QueryBuilders.java index a0a805747a53e0..8dc8d4abea9574 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/external/elasticsearch/QueryBuilders.java +++ b/fe/fe-core/src/main/java/org/apache/doris/external/elasticsearch/QueryBuilders.java @@ -224,6 +224,11 @@ public static QueryBuilder toEsDsl(Expr expr, List notPushDownList, Map NULL, `test6` datetime NULL, `test7` datetime NULL, + `test8` datetime NULL, `c_byte` array NULL, `c_bool` array NULL, `c_integer` array NULL @@ -94,7 +95,9 @@ suite("test_es_query", "p0") { "http_ssl_enabled"="false" ); """ - order_qt_sql52 """select * from test_v1 where test2='text#1'""" + order_qt_sql51 """select * from test_v1 where test2='text#1'""" + order_qt_sql52 """select * from test_v1 where esquery(test2, '{"match":{"test2":"text#1"}}')""" + order_qt_sql53 """select test4,test5,test6,test7,test8 from test_v1 order by test8""" sql """ CREATE TABLE `test_v2` ( @@ -118,6 +121,7 @@ suite("test_es_query", "p0") { `c_person` array NULL, `test6` datetimev2 NULL, `test7` datetimev2 NULL, + `test8` datetimev2 NULL, `c_byte` array NULL, `c_bool` array NULL, `c_integer` array NULL @@ -131,8 +135,9 @@ suite("test_es_query", "p0") { "http_ssl_enabled"="false" ); """ - order_qt_sql51 """select * from test_v2 where test2='text#1'""" - + order_qt_sql53 """select * from test_v2 where test2='text#1'""" + order_qt_sql54 """select * from test_v2 where esquery(test2, '{"match":{"test2":"text#1"}}')""" + order_qt_sql55 """select test4,test5,test6,test7,test8 from test_v2 order by test8""" sql """create catalog if not exists es6 with resource es6_resource;""" sql """create catalog if not exists es7 with resource es7_resource;""" @@ -140,22 +145,31 @@ suite("test_es_query", "p0") { sql """switch es6""" // order_qt_sql61 """show tables""" order_qt_sql62 """select * from test1 where test2='text#1'""" - order_qt_sql63 """select * from test2_20220808 where test4='2022-08-08'""" + order_qt_sql63 """select * from test2_20220808 where test4 >= '2022-08-08 00:00:00' and test4 < '2022-08-08 23:59:59'""" order_qt_sql64 """select * from test2_20220808 where substring(test2, 2) = 'ext2'""" order_qt_sql65 """select c_bool[1], c_byte[1], c_short[1], c_integer[1], c_long[1], c_unsigned_long[1], c_float[1], c_half_float[1], c_double[1], c_scaled_float[1], c_date[1], c_datetime[1], c_keyword[1], c_text[1], c_ip[1], c_person[1] from test1""" order_qt_sql66 """select c_bool[1], c_byte[1], c_short[1], c_integer[1], c_long[1], c_unsigned_long[1], c_float[1], c_half_float[1], c_double[1], c_scaled_float[1], c_date[1], c_datetime[1], c_keyword[1], c_text[1], c_ip[1], c_person[1] from test2_20220808""" + order_qt_sql67 """select * from test1 where esquery(test2, '{"match":{"test2":"text#1"}}')""" + order_qt_sql68 """select c_bool, c_byte, c_short, c_integer, c_long, c_unsigned_long, c_float, c_half_float, c_double, c_scaled_float, c_date, c_datetime, c_keyword, c_text, c_ip, c_person from test1""" + order_qt_sql69 """select c_bool, c_byte, c_short, c_integer, c_long, c_unsigned_long, c_float, c_half_float, c_double, c_scaled_float, c_date, c_datetime, c_keyword, c_text, c_ip, c_person from test2_20220808""" sql """switch es7""" // order_qt_sql71 """show tables""" order_qt_sql72 """select * from test1 where test2='text#1'""" - order_qt_sql73 """select * from test2_20220808 where test4='2022-08-08'""" + order_qt_sql73 """select * from test2_20220808 where test4 >= '2022-08-08 00:00:00' and test4 < '2022-08-08 23:59:59'""" order_qt_sql74 """select * from test2_20220808 where substring(test2, 2) = 'ext2'""" order_qt_sql75 """select c_bool[1], c_byte[1], c_short[1], c_integer[1], c_long[1], c_unsigned_long[1], c_float[1], c_half_float[1], c_double[1], c_scaled_float[1], c_date[1], c_datetime[1], c_keyword[1], c_text[1], c_ip[1], c_person[1] from test1""" order_qt_sql76 """select c_bool[1], c_byte[1], c_short[1], c_integer[1], c_long[1], c_unsigned_long[1], c_float[1], c_half_float[1], c_double[1], c_scaled_float[1], c_date[1], c_datetime[1], c_keyword[1], c_text[1], c_ip[1], c_person[1] from test2""" + order_qt_sql77 """select * from test1 where esquery(test2, '{"match":{"test2":"text#1"}}')""" + order_qt_sql78 """select c_bool, c_byte, c_short, c_integer, c_long, c_unsigned_long, c_float, c_half_float, c_double, c_scaled_float, c_date, c_datetime, c_keyword, c_text, c_ip, c_person from test1""" + order_qt_sql79 """select c_bool, c_byte, c_short, c_integer, c_long, c_unsigned_long, c_float, c_half_float, c_double, c_scaled_float, c_date, c_datetime, c_keyword, c_text, c_ip, c_person from test2""" sql """switch es8""" order_qt_sql81 """select * from test1 where test2='text#1'""" - order_qt_sql82 """select * from test2_20220808 where test4='2022-08-08'""" + order_qt_sql82 """select * from test2_20220808 where test4 >= '2022-08-08 00:00:00' and test4 < '2022-08-08 23:59:59'""" order_qt_sql83 """select c_bool[1], c_byte[1], c_short[1], c_integer[1], c_long[1], c_unsigned_long[1], c_float[1], c_half_float[1], c_double[1], c_scaled_float[1], c_date[1], c_datetime[1], c_keyword[1], c_text[1], c_ip[1], c_person[1] from test1""" order_qt_sql84 """select c_bool[1], c_byte[1], c_short[1], c_integer[1], c_long[1], c_unsigned_long[1], c_float[1], c_half_float[1], c_double[1], c_scaled_float[1], c_date[1], c_datetime[1], c_keyword[1], c_text[1], c_ip[1], c_person[1] from test2""" + order_qt_sql85 """select * from test1 where esquery(test2, '{"match":{"test2":"text#1"}}')""" + order_qt_sql86 """select c_bool, c_byte, c_short, c_integer, c_long, c_unsigned_long, c_float, c_half_float, c_double, c_scaled_float, c_date, c_datetime, c_keyword, c_text, c_ip, c_person from test1""" + order_qt_sql87 """select c_bool, c_byte, c_short, c_integer, c_long, c_unsigned_long, c_float, c_half_float, c_double, c_scaled_float, c_date, c_datetime, c_keyword, c_text, c_ip, c_person from test2""" sql """drop catalog if exists es6;""" diff --git a/regression-test/suites/es_p0/test_es_query_no_http_url.groovy b/regression-test/suites/es_p0/test_es_query_no_http_url.groovy new file mode 100644 index 00000000000000..26516266723a5b --- /dev/null +++ b/regression-test/suites/es_p0/test_es_query_no_http_url.groovy @@ -0,0 +1,160 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_es_query_no_http_url", "p0") { + + String enabled = context.config.otherConfigs.get("enableEsTest") + if (enabled != null && enabled.equalsIgnoreCase("true")) { + String es_6_port = context.config.otherConfigs.get("es_6_port") + String es_7_port = context.config.otherConfigs.get("es_7_port") + String es_8_port = context.config.otherConfigs.get("es_8_port") + + sql """drop catalog if exists es6;""" + sql """drop catalog if exists es7;""" + sql """drop catalog if exists es8;""" + sql """drop resource if exists es6_resource;""" + sql """drop resource if exists es7_resource;""" + sql """drop resource if exists es8_resource;""" + sql """drop table if exists test_v1;""" + sql """drop table if exists test_v2;""" + + // test old create-catalog syntax for compatibility + sql """ + create catalog es6 + properties ( + "type"="es", + "elasticsearch.hosts"="127.0.0.1:$es_6_port", + "elasticsearch.nodes_discovery"="false", + "elasticsearch.keyword_sniff"="true" + ); + """ + + // test new create catalog syntax + sql """create resource if not exists es7_resource properties( + "type"="es", + "hosts"="127.0.0.1:$es_7_port", + "nodes_discovery"="false", + "enable_keyword_sniff"="true" + ); + """ + + sql """create resource if not exists es8_resource properties( + "type"="es", + "hosts"="127.0.0.1:$es_8_port", + "nodes_discovery"="false", + "enable_keyword_sniff"="true" + ); + """ + + // test external table for datetime + sql """ + CREATE TABLE `test_v1` ( + `c_datetime` array NULL, + `c_long` array NULL, + `c_unsigned_long` array NULL, + `c_text` array NULL, + `c_short` array NULL, + `c_ip` array NULL, + `test1` text NULL, + `c_half_float` array NULL, + `test4` date NULL, + `test5` datetime NULL, + `test2` text NULL, + `c_date` array NULL, + `test3` double NULL, + `c_scaled_float` array NULL, + `c_float` array NULL, + `c_double` array NULL, + `c_keyword` array NULL, + `c_person` array NULL, + `test6` datetime NULL, + `test7` datetime NULL, + `test8` datetime NULL, + `c_byte` array NULL, + `c_bool` array NULL, + `c_integer` array NULL + ) ENGINE=ELASTICSEARCH + COMMENT 'ELASTICSEARCH' + PROPERTIES ( + "hosts" = "127.0.0.1:$es_7_port", + "index" = "test1", + "nodes_discovery"="false", + "enable_keyword_sniff"="true", + "http_ssl_enabled"="false" + ); + """ + order_qt_sql51 """select * from test_v1 where test2='text#1'""" + + sql """ + CREATE TABLE `test_v2` ( + `c_datetime` array NULL, + `c_long` array NULL, + `c_unsigned_long` array NULL, + `c_text` array NULL, + `c_short` array NULL, + `c_ip` array NULL, + `test1` text NULL, + `c_half_float` array NULL, + `test4` datev2 NULL, + `test5` datetimev2 NULL, + `test2` text NULL, + `c_date` array NULL, + `test3` double NULL, + `c_scaled_float` array NULL, + `c_float` array NULL, + `c_double` array NULL, + `c_keyword` array NULL, + `c_person` array NULL, + `test6` datetimev2 NULL, + `test7` datetimev2 NULL, + `test8` datetimev2 NULL, + `c_byte` array NULL, + `c_bool` array NULL, + `c_integer` array NULL + ) ENGINE=ELASTICSEARCH + COMMENT 'ELASTICSEARCH' + PROPERTIES ( + "hosts" = "127.0.0.1:$es_8_port", + "index" = "test1", + "nodes_discovery"="false", + "enable_keyword_sniff"="true", + "http_ssl_enabled"="false" + ); + """ + order_qt_sql52 """select * from test_v2 where test2='text#1'""" + + sql """create catalog if not exists es6 with resource es6_resource;""" + sql """create catalog if not exists es7 with resource es7_resource;""" + sql """create catalog if not exists es8 with resource es8_resource;""" + // es6 + sql """switch es6""" + order_qt_sql61 """select * from test1 where test2='text#1'""" + // es7 + sql """switch es7""" + order_qt_sql71 """select * from test1 where test2='text#1'""" + // es8 + sql """switch es8""" + order_qt_sql81 """select * from test1 where test2='text#1'""" + + sql """drop catalog if exists es6;""" + sql """drop catalog if exists es7;""" + sql """drop catalog if exists es8;""" + sql """drop resource if exists es6_resource;""" + sql """drop resource if exists es7_resource;""" + sql """drop resource if exists es8_resource;""" + } +}