From ad36a98b4438f138bfcc66cde8309b995e11130b Mon Sep 17 00:00:00 2001 From: Jerry Hu Date: Wed, 16 Jul 2025 11:58:05 +0800 Subject: [PATCH] [fix](json) incorrect results of json_contains (#53291) Fix the incorrect results and remove unused code. Related PR: #xxx Problem Summary: None - Test - [ ] Regression test - [ ] Unit Test - [ ] Manual test (add detailed scripts or steps below) - [ ] No need to test or manual test. Explain why: - [ ] This is a refactor/code format and no logic has been changed. - [ ] Previous test can cover this change. - [ ] No code files have been changed. - [ ] Other reason - Behavior changed: - [ ] No. - [ ] Yes. - Does this need documentation? - [ ] No. - [ ] Yes. - [ ] Confirm the release note - [ ] Confirm test cases - [ ] Confirm document - [ ] Add branch pick label --- be/src/util/jsonb_document.h | 12 +- be/src/vec/functions/function_json.cpp | 132 ------------------ .../jsonb_p0/test_jsonb_load_and_function.out | 10 +- ...est_jsonb_load_unique_key_and_function.out | 4 +- .../json_functions/test_json_function.out | 17 +++ .../json_functions/test_json_function.groovy | 19 +++ 6 files changed, 49 insertions(+), 145 deletions(-) diff --git a/be/src/util/jsonb_document.h b/be/src/util/jsonb_document.h index 7f50c4012baff8..6d269f6dd52ddd 100644 --- a/be/src/util/jsonb_document.h +++ b/be/src/util/jsonb_document.h @@ -1326,12 +1326,12 @@ inline bool JsonbValue::contains(JsonbValue* rhs) const { } case JsonbType::T_Object: { if (rhs->isObject()) { - auto* str_value1 = (ObjectVal*)this; - auto* str_value2 = (ObjectVal*)rhs; - for (int i = 0; i < str_value2->numElem(); ++i) { - JsonbKeyValue* key = str_value2->getJsonbKeyValue(i); - JsonbValue* value = str_value1->find(key->getKeyStr(), key->klen()); - if (key != nullptr && value != nullptr && !value->contains(key->value())) { + const auto* obj_value1 = (ObjectVal*)this; + const auto* obj_value2 = (ObjectVal*)rhs; + for (int i = 0; i < obj_value2->numElem(); ++i) { + JsonbKeyValue* key = obj_value2->getJsonbKeyValue(i); + JsonbValue* value = obj_value1->find(key->getKeyStr(), key->klen()); + if (value == nullptr || !value->contains(key->value())) { return false; } } diff --git a/be/src/vec/functions/function_json.cpp b/be/src/vec/functions/function_json.cpp index ab1ff616f2b1e7..7ce4049249d943 100644 --- a/be/src/vec/functions/function_json.cpp +++ b/be/src/vec/functions/function_json.cpp @@ -1105,137 +1105,6 @@ class FunctionJsonValid : public IFunction { } }; -class FunctionJsonContains : public IFunction { -public: - static constexpr auto name = "json_contains"; - static FunctionPtr create() { return std::make_shared(); } - - String get_name() const override { return name; } - - size_t get_number_of_arguments() const override { return 3; } - - DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { - return make_nullable(std::make_shared()); - } - - DataTypes get_variadic_argument_types_impl() const override { - return {std::make_shared(), std::make_shared(), - std::make_shared()}; - } - - bool use_default_implementation_for_nulls() const override { return false; } - - bool json_contains_object(const rapidjson::Value& target, - const rapidjson::Value& search_value) const { - if (!target.IsObject() || !search_value.IsObject()) { - return false; - } - - for (auto itr = search_value.MemberBegin(); itr != search_value.MemberEnd(); ++itr) { - if (!target.HasMember(itr->name) || !json_contains(target[itr->name], itr->value)) { - return false; - } - } - - return true; - } - - bool json_contains_array(const rapidjson::Value& target, - const rapidjson::Value& search_value) const { - if (!target.IsArray() || !search_value.IsArray()) { - return false; - } - - for (auto itr = search_value.Begin(); itr != search_value.End(); ++itr) { - bool found = false; - for (auto target_itr = target.Begin(); target_itr != target.End(); ++target_itr) { - if (json_contains(*target_itr, *itr)) { - found = true; - break; - } - } - if (!found) { - return false; - } - } - - return true; - } - - bool json_contains(const rapidjson::Value& target, const rapidjson::Value& search_value) const { - if (target == search_value) { - return true; - } - - if (target.IsObject() && search_value.IsObject()) { - return json_contains_object(target, search_value); - } - - if (target.IsArray() && search_value.IsArray()) { - return json_contains_array(target, search_value); - } - - return false; - } - - Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { - const IColumn& col_json = *(block.get_by_position(arguments[0]).column); - const IColumn& col_search = *(block.get_by_position(arguments[1]).column); - const IColumn& col_path = *(block.get_by_position(arguments[2]).column); - - auto null_map = ColumnUInt8::create(input_rows_count, 0); - - const ColumnString* col_json_string = check_and_get_column(col_json); - const ColumnString* col_search_string = check_and_get_column(col_search); - const ColumnString* col_path_string = check_and_get_column(col_path); - - if (!col_json_string || !col_search_string || !col_path_string) { - return Status::RuntimeError("Illegal column should be ColumnString"); - } - - auto col_to = ColumnVector::create(); - auto& vec_to = col_to->get_data(); - size_t size = col_json.size(); - vec_to.resize(size); - - for (size_t i = 0; i < input_rows_count; ++i) { - if (col_json.is_null_at(i) || col_search.is_null_at(i) || col_path.is_null_at(i)) { - null_map->get_data()[i] = 1; - vec_to[i] = 0; - continue; - } - - const auto& json_val = col_json_string->get_data_at(i); - const auto& search_val = col_search_string->get_data_at(i); - const auto& path_val = col_path_string->get_data_at(i); - - std::string_view json_string(json_val.data, json_val.size); - std::string_view search_string(search_val.data, search_val.size); - std::string_view path_string(path_val.data, path_val.size); - - rapidjson::Document document; - auto target_val = get_json_object(json_string, path_string, &document); - if (target_val == nullptr || target_val->IsNull()) { - vec_to[i] = 0; - } else { - rapidjson::Document search_doc; - search_doc.Parse(search_string.data(), search_string.size()); - if (json_contains(*target_val, search_doc)) { - vec_to[i] = 1; - } else { - vec_to[i] = 0; - } - } - } - - block.replace_by_position(result, - ColumnNullable::create(std::move(col_to), std::move(null_map))); - - return Status::OK(); - } -}; - class FunctionJsonUnquote : public IFunction { public: static constexpr auto name = "json_unquote"; @@ -1658,7 +1527,6 @@ void register_function_json(SimpleFunctionFactory& factory) { FunctionJsonNullable>>(); factory.register_function(); - factory.register_function(); factory.register_function>(); factory.register_function>(); diff --git a/regression-test/data/jsonb_p0/test_jsonb_load_and_function.out b/regression-test/data/jsonb_p0/test_jsonb_load_and_function.out index d7d7611931fdb3..a27fa14581f292 100644 --- a/regression-test/data/jsonb_p0/test_jsonb_load_and_function.out +++ b/regression-test/data/jsonb_p0/test_jsonb_load_and_function.out @@ -8528,7 +8528,7 @@ true 8 1152921504606846976 false 9 6.18 false 10 "abcd" false -11 {} true +11 {} false 12 {"k1":"v31","k2":300} true 13 [] false 14 [123,456] false @@ -8538,13 +8538,13 @@ true 18 {"k1":"v31","k2":300,"a1":[{"k1":"v41","k2":400},1,"a",3.14]} true 26 \N \N 27 {"k1":"v1","k2":200} false -28 {"a.b.c":{"k1.a1":"v31","k2":300},"a":"niu"} true +28 {"a.b.c":{"k1.a1":"v31","k2":300},"a":"niu"} false 29 12524337771678448270 false 30 -9223372036854775808 false 31 18446744073709551615 false -32 {"":"v1"} true -33 {"":1," ":"v1"} true -34 {"":1,"ab":"v1"," ":"v1"," ":2} true +32 {"":"v1"} false +33 {"":1," ":"v1"} false +34 {"":1,"ab":"v1"," ":"v1"," ":2} false -- !select_json_contains -- 1 \N \N diff --git a/regression-test/data/jsonb_p0/test_jsonb_load_unique_key_and_function.out b/regression-test/data/jsonb_p0/test_jsonb_load_unique_key_and_function.out index 427b6426d6816f..36d14661668d1f 100644 --- a/regression-test/data/jsonb_p0/test_jsonb_load_unique_key_and_function.out +++ b/regression-test/data/jsonb_p0/test_jsonb_load_unique_key_and_function.out @@ -6047,7 +6047,7 @@ true 8 1152921504606846976 false 9 6.18 false 10 "abcd" false -11 {} true +11 {} false 12 {"k1":"v31","k2":300} true 13 [] false 14 [123,456] false @@ -6057,7 +6057,7 @@ true 18 {"k1":"v31","k2":300,"a1":[{"k1":"v41","k2":400},1,"a",3.14]} true 26 \N \N 27 {"k1":"v1","k2":200} false -28 {"a.b.c":{"k1.a1":"v31","k2":300},"a":"niu"} true +28 {"a.b.c":{"k1.a1":"v31","k2":300},"a":"niu"} false -- !select_json_contains -- 1 \N \N diff --git a/regression-test/data/query_p0/sql_functions/json_functions/test_json_function.out b/regression-test/data/query_p0/sql_functions/json_functions/test_json_function.out index c22acc171345a5..47da4ec2bbef1c 100644 --- a/regression-test/data/query_p0/sql_functions/json_functions/test_json_function.out +++ b/regression-test/data/query_p0/sql_functions/json_functions/test_json_function.out @@ -187,3 +187,20 @@ doris -- !sql -- \N +-- !json_contains1 -- +false + +-- !json_contains2 -- +true + +-- !json_contains3 -- +false + +-- !json_contains4 -- +true + +-- !json_contains5 -- +false + +-- !json_contains6 -- +\N diff --git a/regression-test/suites/query_p0/sql_functions/json_functions/test_json_function.groovy b/regression-test/suites/query_p0/sql_functions/json_functions/test_json_function.groovy index fe4dfc55315b91..517db385cc2964 100644 --- a/regression-test/suites/query_p0/sql_functions/json_functions/test_json_function.groovy +++ b/regression-test/suites/query_p0/sql_functions/json_functions/test_json_function.groovy @@ -91,4 +91,23 @@ suite("test_json_function", "arrow_flight_sql") { qt_sql """select get_json_string('{"name\\k" : 123}', '\$.name\\k')""" qt_sql """select get_json_string('{"name\\k" : 123}', '\$.name\\\\k')""" qt_sql """select get_json_string('{"name\\k" : 123}', '\$.name\\\\\\k')""" + + qt_json_contains1 """ + SELECT JSON_CONTAINS('{"age": 30, "name": "John", "hobbies": ["reading", "swimming"]}', '{"invalid": "format"}'); + """ + qt_json_contains2 """ + SELECT JSON_CONTAINS('{"age": 25, "name": "Alice", "hobbies": ["painting", "music"]}', '{"age": 25}'); + """ + qt_json_contains3 """ + SELECT JSON_CONTAINS('{"age": 25, "name": "Alice", "hobbies": ["painting", "music"]}', '{"age": "25"}'); + """ + qt_json_contains4 """ + SELECT JSON_CONTAINS('{"age": 25, "name": "Alice", "hobbies": ["painting", "music"]}', '"music"', '\$.hobbies[1]'); + """ + qt_json_contains5 """ + SELECT JSON_CONTAINS('{"age": 25, "name": "Alice", "hobbies": ["painting", "music"]}', '"music"', '\$.hobbies[0]'); + """ + qt_json_contains6 """ + SELECT JSON_CONTAINS(NULL, '"music"', '{"age": 25}'); + """ }