From 80b3bd43a4637ef3335bb19f92bd6434fff5df6e Mon Sep 17 00:00:00 2001 From: Socrates Date: Thu, 22 Aug 2024 22:45:08 +0800 Subject: [PATCH] [fix](hive) report error with escape char and null format (#39700) ## Proposed changes Because be did not process escape char and null format when reading the hive text table, an error was reported when fe found that this value was not the default value. --- .../serde_prop/some_serde_table.hql | 27 +++++++++++++++++++ .../datasource/hive/source/HiveScanNode.java | 20 ++++++++++++++ .../hive/test_hive_serde_prop.groovy | 18 +++++++++++++ 3 files changed, 65 insertions(+) diff --git a/docker/thirdparties/docker-compose/hive/scripts/data/regression/serde_prop/some_serde_table.hql b/docker/thirdparties/docker-compose/hive/scripts/data/regression/serde_prop/some_serde_table.hql index 13e7cb86e0390f..b5d963a1c2b84c 100644 --- a/docker/thirdparties/docker-compose/hive/scripts/data/regression/serde_prop/some_serde_table.hql +++ b/docker/thirdparties/docker-compose/hive/scripts/data/regression/serde_prop/some_serde_table.hql @@ -82,6 +82,31 @@ STORED AS INPUTFORMAT OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'; +CREATE TABLE `serde_test7`( + `id` int, + `name` string) +ROW FORMAT SERDE + 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' +WITH SERDEPROPERTIES ( + 'escape.delim' = '|' +) +STORED AS INPUTFORMAT + 'org.apache.hadoop.mapred.TextInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'; + +CREATE TABLE `serde_test8`( + `id` int, + `name` string) +ROW FORMAT SERDE + 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' +WITH SERDEPROPERTIES ( + 'serialization.null.format' = 'null' +) +STORED AS INPUTFORMAT + 'org.apache.hadoop.mapred.TextInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'; insert into serde_test1 values(1, "abc"),(2, "def"); insert into serde_test2 values(1, "abc"),(2, "def"); @@ -89,3 +114,5 @@ insert into serde_test3 values(1, "abc"),(2, "def"); insert into serde_test4 values(1, "abc"),(2, "def"); insert into serde_test5 values(1, "abc"),(2, "def"); insert into serde_test6 values(1, "abc"),(2, "def"); +insert into serde_test7 values(1, "abc"),(2, "def"); +insert into serde_test8 values(1, "abc"),(2, "def"); diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/source/HiveScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/source/HiveScanNode.java index abb8cc8dda3c13..211dd7dbb4e6f1 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/source/HiveScanNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/source/HiveScanNode.java @@ -95,6 +95,11 @@ public class HiveScanNode extends FileQueryScanNode { public static final String PROP_MAP_KV_DELIMITER = "mapkey.delim"; public static final String DEFAULT_MAP_KV_DELIMITER = "\003"; + public static final String PROP_ESCAPE_DELIMITER = "escape.delim"; + public static final String DEFAULT_ESCAPE_DELIMIER = "\\"; + public static final String PROP_NULL_FORMAT = "serialization.null.format"; + public static final String DEFAULT_NULL_FORMAT = "\\N"; + protected final HMSExternalTable hmsTable; private HiveTransaction hiveTransaction = null; @@ -480,6 +485,21 @@ protected TFileAttributes getFileAttributes() throws UserException { textParams.setEnclose(serdeParams.get(PROP_QUOTE_CHAR).getBytes()[0]); } + // TODO: support escape char and null format in csv_reader + Optional escapeChar = HiveMetaStoreClientHelper.getSerdeProperty(hmsTable.getRemoteTable(), + PROP_ESCAPE_DELIMITER); + if (escapeChar.isPresent() && !escapeChar.get().equals(DEFAULT_ESCAPE_DELIMIER)) { + throw new UserException( + "not support serde prop " + PROP_ESCAPE_DELIMITER + " in hive text reading"); + } + + Optional nullFormat = HiveMetaStoreClientHelper.getSerdeProperty(hmsTable.getRemoteTable(), + PROP_NULL_FORMAT); + if (nullFormat.isPresent() && !nullFormat.get().equals(DEFAULT_NULL_FORMAT)) { + throw new UserException( + "not support serde prop " + PROP_NULL_FORMAT + " in hive text reading"); + } + TFileAttributes fileAttributes = new TFileAttributes(); fileAttributes.setTextParams(textParams); fileAttributes.setHeaderType(""); diff --git a/regression-test/suites/external_table_p0/hive/test_hive_serde_prop.groovy b/regression-test/suites/external_table_p0/hive/test_hive_serde_prop.groovy index 0da2eb3160ac83..8aa97e63123a15 100644 --- a/regression-test/suites/external_table_p0/hive/test_hive_serde_prop.groovy +++ b/regression-test/suites/external_table_p0/hive/test_hive_serde_prop.groovy @@ -45,6 +45,24 @@ suite("test_hive_serde_prop", "external_docker,hive,external_docker_hive,p0,exte qt_5 """select * from ${catalog_name}.regression.serde_test4 order by id;""" qt_6 """select * from ${catalog_name}.regression.serde_test5 order by id;""" qt_7 """select * from ${catalog_name}.regression.serde_test6 order by id;""" + + def success = true; + try { + sql """select * from ${catalog_name}.regression.serde_test7 order by id;""" + } catch(Exception e) { + assertTrue(e.getMessage().contains("not support serde prop"), e.getMessage()) + success = false; + } + assertEquals(success, false) + + success = true; + try { + sql """select * from ${catalog_name}.regression.serde_test8 order by id;""" + } catch(Exception e) { + assertTrue(e.getMessage().contains("not support serde prop"), e.getMessage()) + success = false; + } + assertEquals(success, false) } }