From 1f8faa7f1b2337fd18c0425ad0605e1896115b11 Mon Sep 17 00:00:00 2001 From: Socrates Date: Tue, 11 Feb 2025 12:02:44 +0800 Subject: [PATCH] [fix](orc) remove unnecessary fields of orc_reader (#47506) remove unnecessary fields of orc_reader: - remove `_col_name_to_file_col_name_low_case` by storing original field name in `type_map` - add comment to describe the the functionality of these mappings --- be/src/vec/exec/format/orc/vorc_reader.cpp | 4 ++-- be/src/vec/exec/format/orc/vorc_reader.h | 2 ++ 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/be/src/vec/exec/format/orc/vorc_reader.cpp b/be/src/vec/exec/format/orc/vorc_reader.cpp index 891ec81e992fef..293e6e9b98f9db 100644 --- a/be/src/vec/exec/format/orc/vorc_reader.cpp +++ b/be/src/vec/exec/format/orc/vorc_reader.cpp @@ -403,9 +403,9 @@ void OrcReader::_init_orc_cols(const orc::Type& type, std::vector& if (hive1_orc) { hive1_orc = _is_hive1_col_name(filed_name_lower_case); } - auto filed_name_lower_case_copy = filed_name_lower_case; orc_cols_lower_case.emplace_back(std::move(filed_name_lower_case)); - type_map.emplace(std::move(filed_name_lower_case_copy), type.getSubtype(i)); + auto file_name = type.getFieldName(i); + type_map.emplace(std::move(file_name), type.getSubtype(i)); if (_is_acid) { const orc::Type* sub_type = type.getSubtype(i); if (sub_type->getKind() == orc::TypeKind::STRUCT) { diff --git a/be/src/vec/exec/format/orc/vorc_reader.h b/be/src/vec/exec/format/orc/vorc_reader.h index dc9d565e8031c3..0f09d4e7e28837 100644 --- a/be/src/vec/exec/format/orc/vorc_reader.h +++ b/be/src/vec/exec/format/orc/vorc_reader.h @@ -576,7 +576,9 @@ class OrcReader : public GenericReader { // 2. If true, use indexes instead of column names when reading orc tables. bool _is_hive1_orc_or_use_idx = false; + // map col name in metastore to col name in orc file std::unordered_map _col_name_to_file_col_name; + // map col name in orc file to orc type std::unordered_map _type_map; std::vector _col_orc_type; std::unique_ptr _file_input_stream;