From d135df9fdb3be433306d099617cba62a30f6a7d4 Mon Sep 17 00:00:00 2001 From: TieweiFang Date: Mon, 22 Jul 2024 15:59:09 +0800 Subject: [PATCH 1/3] fix --- .../apache/doris/analysis/OutFileClause.java | 8 ++++ .../outfile/test_outfile_exception.groovy | 38 +++++++++++++++++++ 2 files changed, 46 insertions(+) diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/OutFileClause.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/OutFileClause.java index a658bec93af519..819186a8bcc64a 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/OutFileClause.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/OutFileClause.java @@ -30,6 +30,7 @@ import org.apache.doris.common.FeNameFormat; import org.apache.doris.common.Pair; import org.apache.doris.common.UserException; +import org.apache.doris.common.util.BrokerUtil; import org.apache.doris.common.util.FileFormatConstants; import org.apache.doris.common.util.ParseUtil; import org.apache.doris.common.util.PrintableMap; @@ -37,6 +38,7 @@ import org.apache.doris.datasource.property.PropertyConverter; import org.apache.doris.datasource.property.constants.S3Properties; import org.apache.doris.qe.ConnectContext; +import org.apache.doris.thrift.TBrokerFileStatus; import org.apache.doris.thrift.TFileCompressType; import org.apache.doris.thrift.TFileFormatType; import org.apache.doris.thrift.TParquetCompressionType; @@ -670,6 +672,12 @@ private void analyzeBrokerDesc(Set processedPropKeys) throws UserExcepti } } brokerDesc = new BrokerDesc(brokerName, storageType, brokerProps); + try { + List fileStatuses = Lists.newArrayList(); + BrokerUtil.parseFile(filePath, brokerDesc, fileStatuses); + } catch (UserException e) { + throw new AnalysisException("parse file failed, err: " + e.getMessage(), e); + } } public static String getFsName(String path) { diff --git a/regression-test/suites/nereids_p0/outfile/test_outfile_exception.groovy b/regression-test/suites/nereids_p0/outfile/test_outfile_exception.groovy index 4230ace566f1ba..c4f78030a46f60 100644 --- a/regression-test/suites/nereids_p0/outfile/test_outfile_exception.groovy +++ b/regression-test/suites/nereids_p0/outfile/test_outfile_exception.groovy @@ -163,4 +163,42 @@ suite("test_outfile_exception") { // check exception exception "NoSuchBucket" } + + // check hdfs url with fs.defaultFS + test { + String hdfs_port = context.config.otherConfigs.get("hive2HdfsPort") + String externalEnvIp = context.config.otherConfigs.get("externalEnvIp") + def defaultFS = "hdfs://${externalEnvIp}:${hdfs_port}" + sql """ + select * from ${tableName} t ORDER BY user_id + into outfile "${defaultFS}//tmp/ftw/export/exp_" + format as csv_with_names_and_types + properties( + "fs.defaultFS"="${defaultFS}", + "hadoop.username" = "hadoop" + ); + """ + + // check exception + exception "errors while get file status Wrong FS: hdfs://tmp/ftw/export/exp_" + } + + // check hdfs url with fs.defaultFS + / + test { + String hdfs_port = context.config.otherConfigs.get("hive2HdfsPort") + String externalEnvIp = context.config.otherConfigs.get("externalEnvIp") + def defaultFS = "hdfs://${externalEnvIp}:${hdfs_port}" + sql """ + select * from ${tableName} t ORDER BY user_id + into outfile "${defaultFS}//tmp/ftw/export/exp_" + format as csv_with_names_and_types + properties( + "fs.defaultFS"="${defaultFS}/", + "hadoop.username" = "hadoop" + ); + """ + + // check exception + exception "errors while get file status Wrong FS: hdfs://tmp/ftw/export/exp_" + } } From fe1fec1c9c914a4afdb1617ad36f0d8a3c72706d Mon Sep 17 00:00:00 2001 From: TieweiFang Date: Tue, 23 Jul 2024 17:24:46 +0800 Subject: [PATCH 2/3] fix 2 --- .../apache/doris/analysis/OutFileClause.java | 16 +-- .../outfile/hdfs/test_outfile_hdfs.out} | 64 ++++++++++ .../outfile/hdfs/test_outfile_hdfs.groovy | 97 ++++++++++++++ .../export_p2/test_export_with_hdfs.groovy | 118 ------------------ 4 files changed, 169 insertions(+), 126 deletions(-) rename regression-test/data/{export_p2/test_export_with_hdfs.out => export_p0/outfile/hdfs/test_outfile_hdfs.out} (56%) create mode 100644 regression-test/suites/export_p0/outfile/hdfs/test_outfile_hdfs.groovy delete mode 100644 regression-test/suites/export_p2/test_export_with_hdfs.groovy diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/OutFileClause.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/OutFileClause.java index 819186a8bcc64a..8dd91c3fd8a395 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/OutFileClause.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/OutFileClause.java @@ -30,7 +30,6 @@ import org.apache.doris.common.FeNameFormat; import org.apache.doris.common.Pair; import org.apache.doris.common.UserException; -import org.apache.doris.common.util.BrokerUtil; import org.apache.doris.common.util.FileFormatConstants; import org.apache.doris.common.util.ParseUtil; import org.apache.doris.common.util.PrintableMap; @@ -38,7 +37,6 @@ import org.apache.doris.datasource.property.PropertyConverter; import org.apache.doris.datasource.property.constants.S3Properties; import org.apache.doris.qe.ConnectContext; -import org.apache.doris.thrift.TBrokerFileStatus; import org.apache.doris.thrift.TFileCompressType; import org.apache.doris.thrift.TFileFormatType; import org.apache.doris.thrift.TParquetCompressionType; @@ -57,6 +55,8 @@ import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; +import java.net.URI; +import java.net.URISyntaxException; import java.util.ArrayList; import java.util.Iterator; import java.util.List; @@ -537,6 +537,12 @@ private void analyzeFilePath() throws AnalysisException { filePath = filePath.replace(HDFS_FILE_PREFIX, HDFS_FILE_PREFIX + dfsNameServices); } } + // delete repeated '/' + try { + filePath = new URI(filePath).normalize().toString(); + } catch (URISyntaxException e) { + throw new AnalysisException("Can not normalize the URI, error: " + e.getMessage()); + } if (Strings.isNullOrEmpty(filePath)) { throw new AnalysisException("Must specify file in OUTFILE clause"); } @@ -672,12 +678,6 @@ private void analyzeBrokerDesc(Set processedPropKeys) throws UserExcepti } } brokerDesc = new BrokerDesc(brokerName, storageType, brokerProps); - try { - List fileStatuses = Lists.newArrayList(); - BrokerUtil.parseFile(filePath, brokerDesc, fileStatuses); - } catch (UserException e) { - throw new AnalysisException("parse file failed, err: " + e.getMessage(), e); - } } public static String getFsName(String path) { diff --git a/regression-test/data/export_p2/test_export_with_hdfs.out b/regression-test/data/export_p0/outfile/hdfs/test_outfile_hdfs.out similarity index 56% rename from regression-test/data/export_p2/test_export_with_hdfs.out rename to regression-test/data/export_p0/outfile/hdfs/test_outfile_hdfs.out index 457bcfced30549..b0159c52964e61 100644 --- a/regression-test/data/export_p2/test_export_with_hdfs.out +++ b/regression-test/data/export_p0/outfile/hdfs/test_outfile_hdfs.out @@ -47,6 +47,70 @@ 8 ftw-8 26 9 ftw-9 27 +-- !select -- + +-- !select -- + +-- !select -- +1 ftw-1 19 +10 \N \N +2 ftw-2 20 +3 ftw-3 21 +4 ftw-4 22 +5 ftw-5 23 +6 ftw-6 24 +7 ftw-7 25 +8 ftw-8 26 +9 ftw-9 27 + +-- !select -- +1 ftw-1 19 +10 \N \N +2 ftw-2 20 +3 ftw-3 21 +4 ftw-4 22 +5 ftw-5 23 +6 ftw-6 24 +7 ftw-7 25 +8 ftw-8 26 +9 ftw-9 27 + +-- !select -- +1 ftw-1 19 +10 \N \N +2 ftw-2 20 +3 ftw-3 21 +4 ftw-4 22 +5 ftw-5 23 +6 ftw-6 24 +7 ftw-7 25 +8 ftw-8 26 +9 ftw-9 27 + +-- !select -- +1 ftw-1 19 +10 \N \N +2 ftw-2 20 +3 ftw-3 21 +4 ftw-4 22 +5 ftw-5 23 +6 ftw-6 24 +7 ftw-7 25 +8 ftw-8 26 +9 ftw-9 27 + +-- !select -- +1 ftw-1 19 +10 \N \N +2 ftw-2 20 +3 ftw-3 21 +4 ftw-4 22 +5 ftw-5 23 +6 ftw-6 24 +7 ftw-7 25 +8 ftw-8 26 +9 ftw-9 27 + -- !select -- 1 ftw-1 19 10 \N \N diff --git a/regression-test/suites/export_p0/outfile/hdfs/test_outfile_hdfs.groovy b/regression-test/suites/export_p0/outfile/hdfs/test_outfile_hdfs.groovy new file mode 100644 index 00000000000000..004016d7730db6 --- /dev/null +++ b/regression-test/suites/export_p0/outfile/hdfs/test_outfile_hdfs.groovy @@ -0,0 +1,97 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_outfile_with_hdfs", "p0") { + String enabled = context.config.otherConfigs.get("enableHiveTest") + if (enabled != null && enabled.equalsIgnoreCase("true")) { + def table_export_name = "test_outfile_with_hdfs" + // create table and insert + sql """ DROP TABLE IF EXISTS ${table_export_name} """ + sql """ + CREATE TABLE IF NOT EXISTS ${table_export_name} ( + `id` int(11) NULL, + `name` string NULL, + `age` int(11) NULL + ) + PARTITION BY RANGE(id) + ( + PARTITION less_than_20 VALUES LESS THAN ("20"), + PARTITION between_20_70 VALUES [("20"),("70")), + PARTITION more_than_70 VALUES LESS THAN ("151") + ) + DISTRIBUTED BY HASH(id) BUCKETS 3 + PROPERTIES("replication_num" = "1"); + """ + StringBuilder sb = new StringBuilder() + int i = 1 + for (; i < 10; i ++) { + sb.append(""" + (${i}, 'ftw-${i}', ${i + 18}), + """) + } + sb.append(""" + (${i}, NULL, NULL) + """) + sql """ INSERT INTO ${table_export_name} VALUES + ${sb.toString()} + """ + qt_select_export """ SELECT * FROM ${table_export_name} t ORDER BY id; """ + + String hdfs_port = context.config.otherConfigs.get("hive2HdfsPort") + String externalEnvIp = context.config.otherConfigs.get("externalEnvIp") + // It's okay to use random `hdfsUser`, but can not be empty. + def hdfsUserName = "doris" + def defaultFS = "hdfs://${externalEnvIp}:${hdfs_port}" + + + // test outfile + def test_outfile = {format, uri -> + def res = sql """ + SELECT * FROM ${table_export_name} t ORDER BY id + INTO OUTFILE "${defaultFS}${uri}" + FORMAT AS ${format} + PROPERTIES ( + "fs.defaultFS"="${defaultFS}", + "hadoop.username" = "${hdfsUserName}" + ); + """ + + def outfile_url = res[0][3] + // check data correctness + order_qt_select """ select * from hdfs( + "uri" = "${outfile_url}.${format}", + "hadoop.username" = "${hdfsUserName}", + "format" = "${format}"); + """ + } + + test_outfile('csv', '/tmp/ftw/export/exp_'); + test_outfile('parquet', '/tmp/ftw/export/exp_'); + test_outfile('orc', '/tmp/ftw/export/exp_'); + test_outfile('csv_with_names', '/tmp/ftw/export/exp_'); + test_outfile('csv_with_names_and_types', '/tmp/ftw/export/exp_'); + + // test uri with multi '/' + test_outfile('parquet', '//tmp/ftw/export/exp_'); + test_outfile('parquet', '//tmp//ftw/export/exp_'); + test_outfile('parquet', '//tmp/ftw/export//exp_'); + test_outfile('parquet', '//tmp/ftw//export//exp_'); + test_outfile('parquet', '//tmp/ftw//export/exp_'); + test_outfile('parquet', '///tmp/ftw/export/exp_'); + test_outfile('parquet', '////tmp/ftw/export/exp_'); + } +} diff --git a/regression-test/suites/export_p2/test_export_with_hdfs.groovy b/regression-test/suites/export_p2/test_export_with_hdfs.groovy deleted file mode 100644 index e523fdf5a47237..00000000000000 --- a/regression-test/suites/export_p2/test_export_with_hdfs.groovy +++ /dev/null @@ -1,118 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -suite("test_export_with_hdfs", "p2") { - // open nereids - sql """ set enable_nereids_planner=true """ - sql """ set enable_fallback_to_original_planner=false """ - - - String nameNodeHost = context.config.otherConfigs.get("extHiveHmsHost") - String hdfsPort = context.config.otherConfigs.get("extHdfsPort") - String fs = "hdfs://${nameNodeHost}:${hdfsPort}" - String user_name = context.config.otherConfigs.get("extHiveHmsUser") - - - def table_export_name = "test_export_with_hdfs" - // create table and insert - sql """ DROP TABLE IF EXISTS ${table_export_name} """ - sql """ - CREATE TABLE IF NOT EXISTS ${table_export_name} ( - `id` int(11) NULL, - `name` string NULL, - `age` int(11) NULL - ) - PARTITION BY RANGE(id) - ( - PARTITION less_than_20 VALUES LESS THAN ("20"), - PARTITION between_20_70 VALUES [("20"),("70")), - PARTITION more_than_70 VALUES LESS THAN ("151") - ) - DISTRIBUTED BY HASH(id) BUCKETS 3 - PROPERTIES("replication_num" = "1"); - """ - StringBuilder sb = new StringBuilder() - int i = 1 - for (; i < 10; i ++) { - sb.append(""" - (${i}, 'ftw-${i}', ${i + 18}), - """) - } - sb.append(""" - (${i}, NULL, NULL) - """) - sql """ INSERT INTO ${table_export_name} VALUES - ${sb.toString()} - """ - qt_select_export """ SELECT * FROM ${table_export_name} t ORDER BY id; """ - - - def waiting_export = { export_label -> - while (true) { - def res = sql """ show export where label = "${export_label}" """ - logger.info("export state: " + res[0][2]) - if (res[0][2] == "FINISHED") { - def json = parseJson(res[0][11]) - assert json instanceof List - assertEquals("1", json.fileNumber[0][0]) - log.info("outfile_path: ${json.url[0][0]}") - return json.url[0][0]; - } else if (res[0][2] == "CANCELLED") { - throw new IllegalStateException("""export failed: ${res[0][10]}""") - } else { - sleep(5000) - } - } - } - - def outFilePath = """/user/export_test/export/exp_""" - - // 1. csv test - def test_export = {format, file_suffix, isDelete -> - def uuid = UUID.randomUUID().toString() - // exec export - sql """ - EXPORT TABLE ${table_export_name} TO "${fs}${outFilePath}" - PROPERTIES( - "label" = "${uuid}", - "format" = "${format}", - "column_separator"=",", - "delete_existing_files"="${isDelete}" - ) - with HDFS ( - "fs.defaultFS"="${fs}", - "hadoop.username" = "${user_name}" - ); - """ - - def outfile_url = waiting_export.call(uuid) - - // check data correctness - order_qt_select """ select * from hdfs( - "uri" = "${outfile_url}0.${file_suffix}", - "hadoop.username" = "${user_name}", - "column_separator" = ",", - "format" = "${format}"); - """ - } - - test_export('csv', 'csv', true); - test_export('parquet', 'parquet', true); - test_export('orc', 'orc', true); - test_export('csv_with_names', 'csv', true); - test_export('csv_with_names_and_types', 'csv', true); -} From f822b1bff202d7b0a6bc4c07d9934015bfc2ca35 Mon Sep 17 00:00:00 2001 From: TieweiFang Date: Thu, 25 Jul 2024 11:05:13 +0800 Subject: [PATCH 3/3] fix 3 --- .../outfile/hdfs/test_outfile_hdfs.groovy | 2 +- .../outfile/test_outfile_exception.groovy | 38 ------------------- 2 files changed, 1 insertion(+), 39 deletions(-) diff --git a/regression-test/suites/export_p0/outfile/hdfs/test_outfile_hdfs.groovy b/regression-test/suites/export_p0/outfile/hdfs/test_outfile_hdfs.groovy index 004016d7730db6..fccf5800e6aa41 100644 --- a/regression-test/suites/export_p0/outfile/hdfs/test_outfile_hdfs.groovy +++ b/regression-test/suites/export_p0/outfile/hdfs/test_outfile_hdfs.groovy @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -suite("test_outfile_with_hdfs", "p0") { +suite("test_outfile_with_hdfs", "external,hive,external_docker") { String enabled = context.config.otherConfigs.get("enableHiveTest") if (enabled != null && enabled.equalsIgnoreCase("true")) { def table_export_name = "test_outfile_with_hdfs" diff --git a/regression-test/suites/nereids_p0/outfile/test_outfile_exception.groovy b/regression-test/suites/nereids_p0/outfile/test_outfile_exception.groovy index c4f78030a46f60..4230ace566f1ba 100644 --- a/regression-test/suites/nereids_p0/outfile/test_outfile_exception.groovy +++ b/regression-test/suites/nereids_p0/outfile/test_outfile_exception.groovy @@ -163,42 +163,4 @@ suite("test_outfile_exception") { // check exception exception "NoSuchBucket" } - - // check hdfs url with fs.defaultFS - test { - String hdfs_port = context.config.otherConfigs.get("hive2HdfsPort") - String externalEnvIp = context.config.otherConfigs.get("externalEnvIp") - def defaultFS = "hdfs://${externalEnvIp}:${hdfs_port}" - sql """ - select * from ${tableName} t ORDER BY user_id - into outfile "${defaultFS}//tmp/ftw/export/exp_" - format as csv_with_names_and_types - properties( - "fs.defaultFS"="${defaultFS}", - "hadoop.username" = "hadoop" - ); - """ - - // check exception - exception "errors while get file status Wrong FS: hdfs://tmp/ftw/export/exp_" - } - - // check hdfs url with fs.defaultFS + / - test { - String hdfs_port = context.config.otherConfigs.get("hive2HdfsPort") - String externalEnvIp = context.config.otherConfigs.get("externalEnvIp") - def defaultFS = "hdfs://${externalEnvIp}:${hdfs_port}" - sql """ - select * from ${tableName} t ORDER BY user_id - into outfile "${defaultFS}//tmp/ftw/export/exp_" - format as csv_with_names_and_types - properties( - "fs.defaultFS"="${defaultFS}/", - "hadoop.username" = "hadoop" - ); - """ - - // check exception - exception "errors while get file status Wrong FS: hdfs://tmp/ftw/export/exp_" - } }