From b6fd419f271b2b9a7d38712b124e41d30f20c804 Mon Sep 17 00:00:00 2001 From: amorynan Date: Wed, 25 Sep 2024 18:01:19 +0800 Subject: [PATCH 1/3] fix new_json_reader core --- .../vec/exec/format/json/new_json_reader.cpp | 4 ++ .../test_json_reader_without_object.groovy | 57 +++++++++++++++++++ 2 files changed, 61 insertions(+) create mode 100644 regression-test/suites/jsonb_p0/test_json_reader_without_object.groovy diff --git a/be/src/vec/exec/format/json/new_json_reader.cpp b/be/src/vec/exec/format/json/new_json_reader.cpp index 2aff2cb4e7efdb..3c0ad32cb03f99 100644 --- a/be/src/vec/exec/format/json/new_json_reader.cpp +++ b/be/src/vec/exec/format/json/new_json_reader.cpp @@ -330,6 +330,10 @@ Status NewJsonReader::get_parsed_schema(std::vector* col_names, objectValue = _json_doc; } + if (!objectValue->IsObject()) { + return Status::DataQualityError("JSON data is not an object."); + } + // use jsonpaths to col_names if (!_parsed_jsonpaths.empty()) { for (auto& _parsed_jsonpath : _parsed_jsonpaths) { diff --git a/regression-test/suites/jsonb_p0/test_json_reader_without_object.groovy b/regression-test/suites/jsonb_p0/test_json_reader_without_object.groovy new file mode 100644 index 00000000000000..82304dce317653 --- /dev/null +++ b/regression-test/suites/jsonb_p0/test_json_reader_without_object.groovy @@ -0,0 +1,57 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +import org.codehaus.groovy.runtime.IOGroovyMethods + +suite("test_json_reader_without_object", "p0") { + String ak = getS3AK() + String sk = getS3SK() + String s3_endpoint = getS3Endpoint() + String bucket = context.config.otherConfigs.get("s3BucketName"); + + sql """ set enable_nereids_timeout=false; """ + sql """ set max_scan_key_num = 48 """ + sql """ set max_pushdown_conditions_per_column=1024 """ + + def dataFilePath = "https://"+"${bucket}"+"."+"${s3_endpoint}"+"/regression/datalake" + def dataSimpleNumber = "json_reader_without_object.json" + def dataSimpleArray = "json_reader_without_object_array.json" + + // select expect error + test { + sql """ select * from s3( + "uri" = "${dataFilePath}/${dataSimpleNumber}", + "s3.access_key"= "${ak}", + "s3.secret_key" = "${sk}", + "format" = "json", + "provider" = "${getS3Provider()}", + "read_json_by_line"="true"); """ + exception "DATA_QUALITY_ERROR" + } + + test { + sql """ select * from s3( + "uri" = "${dataFilePath}/${dataSimpleArray}", + "s3.access_key"= "${ak}", + "s3.secret_key" = "${sk}", + "format" = "json", + "provider" = "${getS3Provider()}", + "strip_outer_array" = "true", + "read_json_by_line"="true"); """ + exception "DATA_QUALITY_ERROR" + } +} \ No newline at end of file From 01a601be04b00903bb6a860536b6364344e89186 Mon Sep 17 00:00:00 2001 From: amory Date: Fri, 1 Nov 2024 16:06:14 +0800 Subject: [PATCH 2/3] Update test_json_reader_without_object.groovy --- .../suites/jsonb_p0/test_json_reader_without_object.groovy | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/regression-test/suites/jsonb_p0/test_json_reader_without_object.groovy b/regression-test/suites/jsonb_p0/test_json_reader_without_object.groovy index 82304dce317653..ea1ccb3ba55882 100644 --- a/regression-test/suites/jsonb_p0/test_json_reader_without_object.groovy +++ b/regression-test/suites/jsonb_p0/test_json_reader_without_object.groovy @@ -27,7 +27,7 @@ suite("test_json_reader_without_object", "p0") { sql """ set max_scan_key_num = 48 """ sql """ set max_pushdown_conditions_per_column=1024 """ - def dataFilePath = "https://"+"${bucket}"+"."+"${s3_endpoint}"+"/regression/datalake" + def dataFilePath = "https://"+"${bucket}"+"."+"${s3_endpoint}"+"/regression/jsondata" def dataSimpleNumber = "json_reader_without_object.json" def dataSimpleArray = "json_reader_without_object_array.json" @@ -54,4 +54,4 @@ suite("test_json_reader_without_object", "p0") { "read_json_by_line"="true"); """ exception "DATA_QUALITY_ERROR" } -} \ No newline at end of file +} From 9a95c48857dcc2b12443b116d7d047b6dbb6187c Mon Sep 17 00:00:00 2001 From: amory Date: Sun, 3 Nov 2024 19:00:21 +0800 Subject: [PATCH 3/3] add error msg for data type --- be/src/vec/exec/format/json/new_json_reader.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/be/src/vec/exec/format/json/new_json_reader.cpp b/be/src/vec/exec/format/json/new_json_reader.cpp index 9281da3c0928cd..7dfc3c528cd88e 100644 --- a/be/src/vec/exec/format/json/new_json_reader.cpp +++ b/be/src/vec/exec/format/json/new_json_reader.cpp @@ -334,7 +334,8 @@ Status NewJsonReader::get_parsed_schema(std::vector* col_names, } if (!objectValue->IsObject()) { - return Status::DataQualityError("JSON data is not an object."); + return Status::DataQualityError("JSON data is not an object. but: {}", + objectValue->GetType()); } // use jsonpaths to col_names