Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion be/src/vec/exec/format/csv/csv_reader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -622,7 +622,7 @@ template <bool from_json>
Status CsvReader::deserialize_nullable_string(IColumn& column, Slice& slice) {
auto& null_column = assert_cast<ColumnNullable&>(column);
if (!(from_json && _options.converted_from_string && slice.trim_double_quotes())) {
if (slice.size == 2 && slice[0] == '\\' && slice[1] == 'N') {
if (slice.compare(Slice(_options.null_format, _options.null_len)) == 0) {
null_column.insert_data(nullptr, 0);
return Status::OK();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -560,7 +560,14 @@ CREATE TABLE `all_types_text`(
`t_array_string_all_nulls` array<string>,
`dt` int)
stored as textfile
TBLPROPERTIES("line.delim"="\n", "field.delim"="\1");
TBLPROPERTIES(
'field.delim'='\t',
'line.delim'='\n',
'collection.delim'=',',
'mapkey.delim'=':',
'escape.delim'='|',
'serialization.null.format'='null'
);

CREATE TABLE all_types_par_text(
`boolean_col` boolean,
Expand Down Expand Up @@ -628,4 +635,11 @@ CREATE TABLE all_types_par_text(
PARTITIONED BY (
`dt` int)
stored as textfile
TBLPROPERTIES("line.delim"="\n", "field.delim"="\1");
TBLPROPERTIES(
'field.delim'='\t',
'line.delim'='\n',
'collection.delim'=',',
'mapkey.delim'=':',
'escape.delim'='|',
'serialization.null.format'='null'
);
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
-- This file is automatically generated. You should know what you did if you want to edit this
-- !default_properties --
1 Alice ["tag1", "tag2"] {"key1":"value1", "key2":"value2"}
2 Bob ["tagA", "tagB"] {"keyA":"valueA", "keyB":"valueB"}
3 Charlie \N {"keyC":"valueC", "keyD":"valueD"}

-- !hive_docker_default_properties --
1 Alice ["tag1","tag2"] {"key1":"value1","key2":"value2"}
2 Bob ["tagA","tagB"] {"keyA":"valueA","keyB":"valueB"}
3 Charlie \N {"keyC":"valueC","keyD":"valueD"}

-- !standard_properties --
1 Alice ["tag1", "tag2"] {"key1":"value1", "key2":"value2"}
2 Bob ["tagA", "tagB"] {"keyA":"valueA", "keyB":"valueB"}
3 Charlie \N {"keyC":"valueC", "keyD":"valueD"}

-- !hive_docker_standard_properties --
1 Alice ["tag1","tag2"] {"key1":"value1","key2":"value2"}
2 Bob ["tagA","tagB"] {"keyA":"valueA","keyB":"valueB"}
3 Charlie \N {"keyC":"valueC","keyD":"valueD"}

-- !different_properties --
1 Alice ["tag1", "tag2"] {"key1":"value1", "key2":"value2"}
2 Bob ["tagA", "tagB"] {"keyA":"valueA", "keyB":"valueB"}
3 Charlie \N {"keyC":"valueC", "keyD":"valueD"}

-- !hive_docker_different_properties --
1 Alice ["tag1,tag2"] {"key1":"value1,key2:value2\\u00042"}

-- !default_properties --
1 Alice ["tag1", "tag2"] {"key1":"value1", "key2":"value2"}
2 Bob ["tagA", "tagB"] {"keyA":"valueA", "keyB":"valueB"}
3 Charlie \N {"keyC":"valueC", "keyD":"valueD"}

-- !hive_docker_default_properties --
1 Alice ["tag1","tag2"] {"key1":"value1","key2":"value2"}
2 Bob ["tagA","tagB"] {"keyA":"valueA","keyB":"valueB"}
3 Charlie \N {"keyC":"valueC","keyD":"valueD"}

-- !standard_properties --
1 Alice ["tag1", "tag2"] {"key1":"value1", "key2":"value2"}
2 Bob ["tagA", "tagB"] {"keyA":"valueA", "keyB":"valueB"}
3 Charlie \N {"keyC":"valueC", "keyD":"valueD"}

-- !hive_docker_standard_properties --
1 Alice ["tag1","tag2"] {"key1":"value1","key2":"value2"}
2 Bob ["tagA","tagB"] {"keyA":"valueA","keyB":"valueB"}
3 Charlie \N {"keyC":"valueC","keyD":"valueD"}

-- !different_properties --
1 Alice ["tag1", "tag2"] {"key1":"value1", "key2":"value2"}
2 Bob ["tagA", "tagB"] {"keyA":"valueA", "keyB":"valueB"}
3 Charlie \N {"keyC":"valueC", "keyD":"valueD"}

-- !hive_docker_different_properties --
1 Alice ["tag1","tag2"] {"key1":"value1","key2":"value2\\u00042"}

Original file line number Diff line number Diff line change
Expand Up @@ -17,62 +17,137 @@

suite("test_hive_ddl_text_format", "p0,external,hive,external_docker,external_docker_hive") {
String enabled = context.config.otherConfigs.get("enableHiveTest")
if (enabled != null && enabled.equalsIgnoreCase("true")) {
String externalEnvIp = context.config.otherConfigs.get("externalEnvIp")
String hms_port = context.config.otherConfigs.get("hive3HmsPort")
String hdfs_port = context.config.otherConfigs.get("hive3HdfsPort")
String catalog_name = "test_hive_ddl_text_format"
String table_name = "table_with_pars";
if (enabled == null || !enabled.equalsIgnoreCase("true")) {
logger.info("diable Hive test.")
return;
}

for (String hivePrefix : ["hive2", "hive3"]) {
setHivePrefix(hivePrefix)
try{
String externalEnvIp = context.config.otherConfigs.get("externalEnvIp")
String hms_port = context.config.otherConfigs.get(hivePrefix + "HmsPort")
String hdfs_port = context.config.otherConfigs.get(hivePrefix + "HdfsPort")
String catalog_name = "test_hive_ddl_text_format"
String table_name = "table_with_pars";

sql """drop catalog if exists ${catalog_name};"""
sql """drop catalog if exists ${catalog_name};"""

sql """
create catalog if not exists ${catalog_name} properties (
'type'='hms',
'hive.metastore.uris' = 'thrift://${externalEnvIp}:${hms_port}',
'fs.defaultFS' = 'hdfs://${externalEnvIp}:${hdfs_port}',
'use_meta_cache' = 'true'
sql """
create catalog if not exists ${catalog_name} properties (
'type'='hms',
'hive.metastore.uris' = 'thrift://${externalEnvIp}:${hms_port}',
'fs.defaultFS' = 'hdfs://${externalEnvIp}:${hdfs_port}',
'use_meta_cache' = 'true'
);
"""
logger.info("catalog " + catalog_name + " created")
sql """switch ${catalog_name};"""
logger.info("switched to catalog " + catalog_name)
sql """use `default`;"""

sql """ drop table if exists text_table_default_properties """
sql """
create table text_table_default_properties (
id int,
`name` string,
tags array<string>,
attributes map<string, string>
) PROPERTIES (
'file_format'='text'
);
"""
logger.info("catalog " + catalog_name + " created")
sql """switch ${catalog_name};"""
logger.info("switched to catalog " + catalog_name)
sql """use `default`;"""
"""
sql """
INSERT INTO text_table_default_properties VALUES
(1, 'Alice', array('tag1', 'tag2'), map('key1', 'value1', 'key2', 'value2')),
(2, 'Bob', array('tagA', 'tagB'), map('keyA', 'valueA', 'keyB', 'valueB')),
(3, 'Charlie', NULL, map('keyC', 'valueC', 'keyD', 'valueD'));
"""
order_qt_default_properties """ select * from text_table_default_properties """

order_qt_hive_docker_default_properties""" select * from text_table_default_properties """

sql """ drop table if exists tb_text """
sql """
create table tb_text (
id int,
`name` string
) PROPERTIES (
'compression'='gzip',
'file_format'='text',
'field.delim'='\t',
'line.delim'='\n',
'collection.delim'=';',
'mapkey.delim'=':',
'serialization.null.format'='\\N'
);
"""
sql """ drop table if exists text_table_standard_properties """
// Escape characters need to be considered in groovy scripts
sql """
create table text_table_standard_properties (
id int,
`name` string,
tags array<string>,
attributes map<string, string>
) PROPERTIES (
'compression'='plain',
'file_format'='text',
'field.delim'='\\1',
'line.delim'='\\n',
'collection.delim'='\\2',
'mapkey.delim'='\\3',
'escape.delim'= '\\\\',
'serialization.null.format'='\\\\N'
);
"""
sql """
INSERT INTO text_table_standard_properties VALUES
(1, 'Alice', array('tag1', 'tag2'), map('key1', 'value1', 'key2', 'value2')),
(2, 'Bob', array('tagA', 'tagB'), map('keyA', 'valueA', 'keyB', 'valueB')),
(3, 'Charlie', NULL, map('keyC', 'valueC', 'keyD', 'valueD'));
"""
order_qt_standard_properties """ select * from text_table_standard_properties """
order_qt_hive_docker_standard_properties """ select * from text_table_standard_properties order by id; """

sql """ drop table if exists text_table_different_properties """
sql """
create table text_table_different_properties (
id int,
`name` string,
tags array<string>,
attributes map<string, string>
) PROPERTIES (
'compression'='gzip',
'file_format'='text',
'field.delim'='A',
'line.delim'='\\4',
'collection.delim'=',',
'mapkey.delim'=':',
'escape.delim'='|',
'serialization.null.format'='null'
);
"""
sql """
INSERT INTO text_table_different_properties VALUES
(1, 'Alice', array('tag1', 'tag2'), map('key1', 'value1', 'key2', 'value2')),
(2, 'Bob', array('tagA', 'tagB'), map('keyA', 'valueA', 'keyB', 'valueB')),
(3, 'Charlie', NULL, map('keyC', 'valueC', 'keyD', 'valueD'));
"""
order_qt_different_properties """ select * from text_table_different_properties """
order_qt_hive_docker_different_properties """ select * from text_table_different_properties order by id; """

String serde = "'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'"
String input_format = "'org.apache.hadoop.mapred.TextInputFormat'"
String output_format = "'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'"
String doris_fileformat = "'doris.file_format'='text'"
String filed_delim = "'field.delim'"
String line_delim = "'line.delim'"
String mapkey_delim = "'mapkey.delim'"
String serde = "'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'"
String input_format = "'org.apache.hadoop.mapred.TextInputFormat'"
String output_format = "'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'"
String doris_fileformat = "'doris.file_format'='text'"
String filed_delim = "'field.delim'"
String line_delim = "'line.delim'"
String mapkey_delim = "'mapkey.delim'"
String collection_delim = "'collection.delim'"
String escape_delim = "'escape.delim'"
String serialization_null_format = "'serialization.null.format'"

def create_tbl_res = sql """ show create table tb_text """
String res = create_tbl_res.toString()
logger.info("${res}")
assertTrue(res.containsIgnoreCase("${serde}"))
assertTrue(res.containsIgnoreCase("${input_format}"))
assertTrue(res.containsIgnoreCase("${output_format}"))
assertTrue(res.containsIgnoreCase("${doris_fileformat}"))
assertTrue(res.containsIgnoreCase("${filed_delim}"))
assertTrue(res.containsIgnoreCase("${filed_delim}"))
assertTrue(res.containsIgnoreCase("${line_delim}"))
assertTrue(res.containsIgnoreCase("${mapkey_delim}"))
def create_tbl_res = sql """ show create table text_table_standard_properties """
String res = create_tbl_res.toString()
logger.info("${res}")
assertTrue(res.containsIgnoreCase("${serde}"))
assertTrue(res.containsIgnoreCase("${input_format}"))
assertTrue(res.containsIgnoreCase("${output_format}"))
assertTrue(res.containsIgnoreCase("${doris_fileformat}"))
assertTrue(res.containsIgnoreCase("${filed_delim}"))
assertTrue(res.containsIgnoreCase("${filed_delim}"))
assertTrue(res.containsIgnoreCase("${line_delim}"))
assertTrue(res.containsIgnoreCase("${mapkey_delim}"))
assertTrue(res.containsIgnoreCase("${collection_delim}"))
assertTrue(res.containsIgnoreCase("${escape_delim}"))
assertTrue(res.containsIgnoreCase("${serialization_null_format}"))
} finally {
}
}
}