diff --git a/src/Core/Block.cpp b/src/Core/Block.cpp index 582e10b1c372..6ee10afc8b07 100644 --- a/src/Core/Block.cpp +++ b/src/Core/Block.cpp @@ -289,7 +289,7 @@ const ColumnWithTypeAndName & Block::safeGetByPosition(size_t position) const } -const ColumnWithTypeAndName * Block::findByName(const std::string & name, bool case_insensitive) const +const ColumnWithTypeAndName * Block::findByName(const std::string_view & name, bool case_insensitive) const { if (case_insensitive) { @@ -309,6 +309,11 @@ const ColumnWithTypeAndName * Block::findByName(const std::string & name, bool c return &data[it->second]; } +const ColumnWithTypeAndName * Block::findByName(const std::string & name, bool case_insensitive) const +{ + return findByName(std::string_view{name}, case_insensitive); +} + std::optional Block::findSubcolumnByName(const std::string & name) const { auto [name_in_storage, subcolumn_name] = Nested::splitName(name); diff --git a/src/Core/Block.h b/src/Core/Block.h index ae907c5ff624..8b11234b409b 100644 --- a/src/Core/Block.h +++ b/src/Core/Block.h @@ -5,6 +5,8 @@ #include #include +#include + #include #include @@ -30,7 +32,7 @@ class Block { private: using Container = ColumnsWithTypeAndName; - using IndexByName = std::unordered_map; + using IndexByName = std::unordered_map; Container data; IndexByName index_by_name; @@ -70,6 +72,13 @@ class Block const_cast(this)->findByName(name, case_insensitive)); } + ColumnWithTypeAndName* findByName(const std::string_view & name, bool case_insensitive = false) + { + return const_cast( + const_cast(this)->findByName(name, case_insensitive)); + } + + const ColumnWithTypeAndName * findByName(const std::string_view & name, bool case_insensitive) const; const ColumnWithTypeAndName * findByName(const std::string & name, bool case_insensitive = false) const; std::optional findSubcolumnByName(const std::string & name) const; std::optional findColumnOrSubcolumnByName(const std::string & name) const; diff --git a/src/Storages/VirtualColumnUtils.cpp b/src/Storages/VirtualColumnUtils.cpp index dd03f200d81c..3ba49411cd7f 100644 --- a/src/Storages/VirtualColumnUtils.cpp +++ b/src/Storages/VirtualColumnUtils.cpp @@ -54,6 +54,8 @@ #include #include +#include + namespace DB { @@ -144,26 +146,26 @@ NameSet getVirtualNamesForFileLikeStorage() return getCommonVirtualsForFileLikeStorage().getNameSet(); } -static std::unordered_map parseHivePartitioningKeysAndValues(const String & path) +using HivePartitioningKeysAndValues = absl::flat_hash_map; +static HivePartitioningKeysAndValues parseHivePartitioningKeysAndValues(const String & path) { - std::string pattern = "([^/]+)=([^/]+)/"; + const static RE2 pattern_re("([^/]+)=([^/]*)/"); re2::StringPiece input_piece(path); - std::unordered_map key_values; - std::string key; - std::string value; - std::unordered_map used_keys; - while (RE2::FindAndConsume(&input_piece, pattern, &key, &value)) + HivePartitioningKeysAndValues result; + std::string_view key; + std::string_view value; + + while (RE2::FindAndConsume(&input_piece, pattern_re, &key, &value)) { - auto it = used_keys.find(key); - if (it != used_keys.end() && it->second != value) + auto it = result.find(key); + if (it != result.end() && it->second != value) throw Exception(ErrorCodes::INCORRECT_DATA, "Path '{}' to file with enabled hive-style partitioning contains duplicated partition key {} with different values, only unique keys are allowed", path, key); - used_keys.insert({key, value}); auto col_name = key; - key_values[col_name] = value; + result[col_name] = value; } - return key_values; + return result; } VirtualColumnsDescription getVirtualsForFileLikeStorage(ColumnsDescription & storage_columns, const ContextPtr & context, const std::string & path, std::optional format_settings_) @@ -199,13 +201,15 @@ VirtualColumnsDescription getVirtualsForFileLikeStorage(ColumnsDescription & sto auto format_settings = format_settings_ ? *format_settings_ : getFormatSettings(context); for (auto & item : map) { - auto type = tryInferDataTypeByEscapingRule(item.second, format_settings, FormatSettings::EscapingRule::Raw); + const std::string name(item.second); + + auto type = tryInferDataTypeByEscapingRule(name, format_settings, FormatSettings::EscapingRule::Raw); if (type == nullptr) type = std::make_shared(); if (type->canBeInsideLowCardinality()) - add_virtual({item.first, std::make_shared(type)}, true); + add_virtual({name, std::make_shared(type)}, true); else - add_virtual({item.first, type}, true); + add_virtual({name, type}, true); } } @@ -231,7 +235,7 @@ static void addPathAndFileToVirtualColumns(Block & block, const String & path, s if (use_hive_partitioning) { - auto keys_and_values = parseHivePartitioningKeysAndValues(path); + const auto keys_and_values = parseHivePartitioningKeysAndValues(path); for (const auto & [key, value] : keys_and_values) { if (const auto * column = block.findByName(key)) @@ -285,7 +289,7 @@ void addRequestedFileLikeStorageVirtualsToChunk( Chunk & chunk, const NamesAndTypesList & requested_virtual_columns, VirtualsForFileLikeStorage virtual_values, ContextPtr context) { - std::unordered_map hive_map; + HivePartitioningKeysAndValues hive_map; if (context->getSettingsRef()[Setting::use_hive_partitioning]) hive_map = parseHivePartitioningKeysAndValues(virtual_values.path);