diff --git a/fe/fe-core/src/main/java/org/apache/doris/tablefunction/ExternalFileTableValuedFunction.java b/fe/fe-core/src/main/java/org/apache/doris/tablefunction/ExternalFileTableValuedFunction.java index 1e89fd41a5d399..eabe82804c3107 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/tablefunction/ExternalFileTableValuedFunction.java +++ b/fe/fe-core/src/main/java/org/apache/doris/tablefunction/ExternalFileTableValuedFunction.java @@ -37,7 +37,6 @@ import org.apache.doris.common.util.BrokerUtil; import org.apache.doris.common.util.FileFormatConstants; import org.apache.doris.common.util.FileFormatUtils; -import org.apache.doris.common.util.NetUtils; import org.apache.doris.common.util.Util; import org.apache.doris.planner.PlanNodeId; import org.apache.doris.planner.ScanNode; @@ -316,23 +315,27 @@ public List getTableColumns() throws AnalysisException { TNetworkAddress address = new TNetworkAddress(be.getHost(), be.getBrpcPort()); try { PFetchTableSchemaRequest request = getFetchTableStructureRequest(); - Future future = BackendServiceProxy.getInstance() - .fetchTableStructureAsync(address, request); - - InternalService.PFetchTableSchemaResult result = future.get(); - TStatusCode code = TStatusCode.findByValue(result.getStatus().getStatusCode()); - String errMsg; - if (code != TStatusCode.OK) { - if (!result.getStatus().getErrorMsgsList().isEmpty()) { - errMsg = result.getStatus().getErrorMsgsList().get(0); - } else { - errMsg = "fetchTableStructureAsync failed. backend address: " - + NetUtils - .getHostPortInAccessibleFormat(address.getHostname(), address.getPort()); + InternalService.PFetchTableSchemaResult result = null; + + // `request == null` means we don't need to get schemas from BE, + // and we fill a dummy col for this table. + if (request != null) { + Future future = BackendServiceProxy.getInstance() + .fetchTableStructureAsync(address, request); + + result = future.get(); + TStatusCode code = TStatusCode.findByValue(result.getStatus().getStatusCode()); + String errMsg; + if (code != TStatusCode.OK) { + if (!result.getStatus().getErrorMsgsList().isEmpty()) { + errMsg = result.getStatus().getErrorMsgsList().get(0); + } else { + errMsg = "fetchTableStructureAsync failed. backend address: " + + address.getHostname() + ":" + address.getPort(); + } + throw new AnalysisException(errMsg); } - throw new AnalysisException(errMsg); } - fillColumns(result); } catch (RpcException e) { throw new AnalysisException("fetchTableStructureResult rpc exception", e); @@ -393,10 +396,12 @@ private Pair getColumnType(List typeNodes, int start) return Pair.of(type, parsedNodes); } - private void fillColumns(InternalService.PFetchTableSchemaResult result) - throws AnalysisException { - if (result.getColumnNums() == 0) { - throw new AnalysisException("The amount of column is 0"); + private void fillColumns(InternalService.PFetchTableSchemaResult result) { + // `result == null` means we don't need to get schemas from BE, + // and we fill a dummy col for this table. + if (result == null) { + columns.add(new Column("__dummy_col", ScalarType.createStringType(), true)); + return; } // add fetched file columns for (int idx = 0; idx < result.getColumnNums(); ++idx) { @@ -412,7 +417,7 @@ private void fillColumns(InternalService.PFetchTableSchemaResult result) } } - private PFetchTableSchemaRequest getFetchTableStructureRequest() throws AnalysisException, TException { + private PFetchTableSchemaRequest getFetchTableStructureRequest() throws TException { // set TFileScanRangeParams TFileScanRangeParams fileScanRangeParams = new TFileScanRangeParams(); fileScanRangeParams.setFormatType(fileFormatType); @@ -429,14 +434,19 @@ private PFetchTableSchemaRequest getFetchTableStructureRequest() throws Analysis // get first file, used to parse table schema TBrokerFileStatus firstFile = null; for (TBrokerFileStatus fileStatus : fileStatuses) { - if (fileStatus.isIsDir()) { + if (fileStatus.isIsDir() || fileStatus.size == 0) { continue; } firstFile = fileStatus; break; } + + // `firstFile == null` means: + // 1. No matching file path exists + // 2. All matched files have a size of 0 + // For these two situations, we don't need to get schema from BE if (firstFile == null) { - throw new AnalysisException("Can not get first file, please check uri."); + return null; } // set TFileRangeDesc diff --git a/regression-test/data/external_table_p0/tvf/test_hdfs_tvf_error_uri.out b/regression-test/data/external_table_p0/tvf/test_hdfs_tvf_error_uri.out new file mode 100644 index 00000000000000..115f42f2a0e23f --- /dev/null +++ b/regression-test/data/external_table_p0/tvf/test_hdfs_tvf_error_uri.out @@ -0,0 +1,6 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !select1 -- + +-- !desc1 -- +__dummy_col TEXT Yes false \N NONE + diff --git a/regression-test/data/load_p0/tvf/test_tvf_empty_file.out b/regression-test/data/load_p0/tvf/test_tvf_empty_file.out new file mode 100644 index 00000000000000..59822770e2cd0c --- /dev/null +++ b/regression-test/data/load_p0/tvf/test_tvf_empty_file.out @@ -0,0 +1,17 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !select -- + +-- !desc -- +__dummy_col TEXT Yes false \N NONE + +-- !select2 -- +1 doris 18 +2 nereids 20 +3 xxx 22 +4 yyy 21 + +-- !des2 -- +c1 TEXT Yes false \N NONE +c2 TEXT Yes false \N NONE +c3 TEXT Yes false \N NONE + diff --git a/regression-test/data/load_p0/tvf/test_tvf_error_url.out b/regression-test/data/load_p0/tvf/test_tvf_error_url.out new file mode 100644 index 00000000000000..468a50ff85d073 --- /dev/null +++ b/regression-test/data/load_p0/tvf/test_tvf_error_url.out @@ -0,0 +1,11 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !select -- + +-- !desc -- +__dummy_col TEXT Yes false \N NONE + +-- !select2 -- + +-- !desc2 -- +__dummy_col TEXT Yes false \N NONE + diff --git a/regression-test/suites/external_table_p0/tvf/test_hdfs_tvf_error_uri.groovy b/regression-test/suites/external_table_p0/tvf/test_hdfs_tvf_error_uri.groovy new file mode 100644 index 00000000000000..3f663c25e730a1 --- /dev/null +++ b/regression-test/suites/external_table_p0/tvf/test_hdfs_tvf_error_uri.groovy @@ -0,0 +1,43 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_hdfs_tvf_error_uri","external,hive,tvf,external_docker") { + String hdfs_port = context.config.otherConfigs.get("hdfs_port") + String externalEnvIp = context.config.otherConfigs.get("externalEnvIp") + + // It's okay to use random `hdfsUser`, but can not be empty. + def hdfsUserName = "doris" + def format = "csv" + def defaultFS = "hdfs://${externalEnvIp}:${hdfs_port}" + def uri = "" + + String enabled = context.config.otherConfigs.get("enableHiveTest") + if (enabled != null && enabled.equalsIgnoreCase("true")) { + // test csv format + uri = "${defaultFS}" + "/user/doris/preinstalled_data/csv_format_test/no_exist_file.csv" + format = "csv" + order_qt_select1 """ select * from HDFS( + "uri" = "${uri}", + "hadoop.username" = "${hdfsUserName}", + "format" = "${format}"); """ + + order_qt_desc1 """ desc function HDFS( + "uri" = "${uri}", + "hadoop.username" = "${hdfsUserName}", + "format" = "${format}"); """ + } +} \ No newline at end of file diff --git a/regression-test/suites/load_p0/tvf/test_tvf_empty_file.groovy b/regression-test/suites/load_p0/tvf/test_tvf_empty_file.groovy new file mode 100644 index 00000000000000..9877716ae8c679 --- /dev/null +++ b/regression-test/suites/load_p0/tvf/test_tvf_empty_file.groovy @@ -0,0 +1,69 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_tvf_empty_file", "p0") { + String ak = getS3AK() + String sk = getS3SK() + String s3_endpoint = getS3Endpoint() + String region = getS3Region() + String bucket = context.config.otherConfigs.get("s3BucketName"); + + String path = "regression/datalake" + + // ${path}/empty_file_test.csv is an empty file + // so it should return empty sets. + order_qt_select """ SELECT * FROM S3 ( + "uri" = "http://${bucket}.${s3_endpoint}/${path}/empty_file_test.csv", + "ACCESS_KEY"= "${ak}", + "SECRET_KEY" = "${sk}", + "format" = "csv", + "region" = "${region}" + ); + """ + + order_qt_desc """ desc function S3 ( + "uri" = "http://${bucket}.${s3_endpoint}/${path}/empty_file_test.csv", + "ACCESS_KEY"= "${ak}", + "SECRET_KEY" = "${sk}", + "format" = "csv", + "region" = "${region}" + ); + """ + + // ${path}/empty_file_test*.csv matches 3 files: + // empty_file_test.csv, empty_file_test_1.csv, empty_file_test_2.csv + // empty_file_test.csv is an empty file, but + // empty_file_test_1.csv and empty_file_test_2.csv have data + // so it should return data of empty_file_test_1.csv and empty_file_test_2.cs + order_qt_select2 """ SELECT * FROM S3 ( + "uri" = "http://${bucket}.${s3_endpoint}/${path}/empty_file_test*.csv", + "ACCESS_KEY"= "${ak}", + "SECRET_KEY" = "${sk}", + "format" = "csv", + "region" = "${region}" + ) order by c1; + """ + + order_qt_des2 """ desc function S3 ( + "uri" = "http://${bucket}.${s3_endpoint}/${path}/empty_file_test*.csv", + "ACCESS_KEY"= "${ak}", + "SECRET_KEY" = "${sk}", + "format" = "csv", + "region" = "${region}" + ); + """ +} diff --git a/regression-test/suites/load_p0/tvf/test_tvf_error_url.groovy b/regression-test/suites/load_p0/tvf/test_tvf_error_url.groovy new file mode 100644 index 00000000000000..d1dcff4d5300b4 --- /dev/null +++ b/regression-test/suites/load_p0/tvf/test_tvf_error_url.groovy @@ -0,0 +1,61 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_tvf_error_url", "p0") { + String ak = getS3AK() + String sk = getS3SK() + String s3_endpoint = getS3Endpoint() + String region = getS3Region() + String bucket = context.config.otherConfigs.get("s3BucketName"); + + String path = "select_tvf/no_exists_file_test" + order_qt_select """ SELECT * FROM S3 ( + "uri" = "http://${s3_endpoint}/${bucket}/${path}/no_exist_file1.csv", + "ACCESS_KEY"= "${ak}", + "SECRET_KEY" = "${sk}", + "format" = "csv", + "region" = "${region}" + ); + """ + + order_qt_desc """ desc function S3 ( + "uri" = "http://${s3_endpoint}/${bucket}/${path}/no_exist_file1.csv", + "ACCESS_KEY"= "${ak}", + "SECRET_KEY" = "${sk}", + "format" = "csv", + "region" = "${region}" + ); + """ + + order_qt_select2 """ SELECT * FROM S3 ( + "uri" = "http://${s3_endpoint}/${bucket}/${path}/*.csv", + "ACCESS_KEY"= "${ak}", + "SECRET_KEY" = "${sk}", + "format" = "csv", + "region" = "${region}" + ); + """ + + order_qt_desc2 """ desc function S3 ( + "uri" = "http://${s3_endpoint}/${bucket}/${path}/*.csv", + "ACCESS_KEY"= "${ak}", + "SECRET_KEY" = "${sk}", + "format" = "csv", + "region" = "${region}" + ); + """ +}