Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion src/Core/Block.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -290,7 +290,7 @@ const ColumnWithTypeAndName & Block::safeGetByPosition(size_t position) const
}


const ColumnWithTypeAndName * Block::findByName(const std::string & name, bool case_insensitive) const
const ColumnWithTypeAndName * Block::findByName(const std::string_view & name, bool case_insensitive) const
{
if (case_insensitive)
{
Expand All @@ -310,6 +310,11 @@ const ColumnWithTypeAndName * Block::findByName(const std::string & name, bool c
return &data[it->second];
}

const ColumnWithTypeAndName * Block::findByName(const std::string & name, bool case_insensitive) const
{
return findByName(std::string_view{name}, case_insensitive);
}

std::optional<ColumnWithTypeAndName> Block::findSubcolumnByName(const std::string & name) const
{
auto [name_in_storage, subcolumn_name] = Nested::splitName(name);
Expand Down
11 changes: 10 additions & 1 deletion src/Core/Block.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

#include <initializer_list>
#include <vector>
#include <Common/StringHashForHeterogeneousLookup.h>


class SipHash;
Expand All @@ -30,7 +31,7 @@ class Block
{
private:
using Container = ColumnsWithTypeAndName;
using IndexByName = std::unordered_map<String, size_t>;
using IndexByName = std::unordered_map<String, size_t, StringHashForHeterogeneousLookup, StringHashForHeterogeneousLookup::transparent_key_equal>;

Container data;
IndexByName index_by_name;
Expand Down Expand Up @@ -70,6 +71,14 @@ class Block
const_cast<const Block *>(this)->findByName(name, case_insensitive));
}

ColumnWithTypeAndName* findByName(const std::string_view & name, bool case_insensitive = false)
{
return const_cast<ColumnWithTypeAndName *>(
const_cast<const Block *>(this)->findByName(name, case_insensitive));
}

const ColumnWithTypeAndName * findByName(const std::string_view & name, bool case_insensitive) const;

const ColumnWithTypeAndName * findByName(const std::string & name, bool case_insensitive = false) const;
std::optional<ColumnWithTypeAndName> findSubcolumnByName(const std::string & name) const;
std::optional<ColumnWithTypeAndName> findColumnOrSubcolumnByName(const std::string & name) const;
Expand Down
19 changes: 10 additions & 9 deletions src/Functions/keyvaluepair/extractKeyValuePairs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@

#include <Interpreters/Context.h>

#include <Functions/keyvaluepair/impl/KeyValuePairExtractor.h>
#include <Functions/keyvaluepair/impl/KeyValuePairExtractorBuilder.h>
#include <Functions/keyvaluepair/ArgumentExtractor.h>

Expand All @@ -29,11 +28,6 @@ class ExtractKeyValuePairs : public IFunction
{
auto builder = KeyValuePairExtractorBuilder();

if constexpr (WITH_ESCAPING)
{
builder.withEscaping();
}

if (parsed_arguments.key_value_delimiter)
{
builder.withKeyValueDelimiter(parsed_arguments.key_value_delimiter.value());
Expand All @@ -56,10 +50,17 @@ class ExtractKeyValuePairs : public IFunction
builder.withMaxNumberOfPairs(context->getSettingsRef()[Setting::extract_key_value_pairs_max_pairs_per_row]);
}

return builder.build();
if constexpr (WITH_ESCAPING)
{
return builder.buildWithEscaping();
}
else
{
return builder.buildWithoutEscaping();
}
}

ColumnPtr extract(ColumnPtr data_column, std::shared_ptr<KeyValuePairExtractor> extractor, size_t input_rows_count) const
ColumnPtr extract(ColumnPtr data_column, auto & extractor, size_t input_rows_count) const
{
auto offsets = ColumnUInt64::create();

Expand All @@ -72,7 +73,7 @@ class ExtractKeyValuePairs : public IFunction
{
auto row = data_column->getDataAt(i).toView();

auto pairs_count = extractor->extract(row, keys, values);
auto pairs_count = extractor.extract(row, keys, values);

offset += pairs_count;

Expand Down
93 changes: 66 additions & 27 deletions src/Functions/keyvaluepair/impl/CHKeyValuePairExtractor.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@
#include <Columns/ColumnsNumber.h>

#include <Functions/keyvaluepair/impl/StateHandler.h>
#include <Functions/keyvaluepair/impl/KeyValuePairExtractor.h>
#include <Functions/keyvaluepair/impl/StateHandlerImpl.h>
#include <absl/container/flat_hash_map.h>

namespace DB
{
Expand All @@ -16,37 +17,36 @@ namespace ErrorCodes
extern const int LIMIT_EXCEEDED;
}

namespace extractKV
{
/*
* Handle state transitions and a few states like `FLUSH_PAIR` and `END`.
* */
template <typename StateHandler>
class CHKeyValuePairExtractor : public KeyValuePairExtractor
class KeyValuePairExtractor
{
using State = typename DB::extractKV::StateHandler::State;
using NextState = DB::extractKV::StateHandler::NextState;

public:
explicit CHKeyValuePairExtractor(StateHandler state_handler_, uint64_t max_number_of_pairs_)
: state_handler(std::move(state_handler_)), max_number_of_pairs(max_number_of_pairs_)
{}
using PairWriter = typename StateHandler::PairWriter;

uint64_t extract(const std::string & data, ColumnString::MutablePtr & keys, ColumnString::MutablePtr & values) override
KeyValuePairExtractor(const Configuration & configuration_, uint64_t max_number_of_pairs_)
: state_handler(StateHandler(configuration_))
, max_number_of_pairs(max_number_of_pairs_)
{
return extract(std::string_view {data}, keys, values);
}

uint64_t extract(std::string_view data, ColumnString::MutablePtr & keys, ColumnString::MutablePtr & values) override
protected:
uint64_t extractImpl(std::string_view data, typename StateHandler::PairWriter & pair_writer)
{
auto state = State::WAITING_KEY;

auto key = typename StateHandler::StringWriter(*keys);
auto value = typename StateHandler::StringWriter(*values);

uint64_t row_offset = 0;

while (state != State::END)
{
auto next_state = processState(data, state, key, value, row_offset);
auto next_state = processState(data, state, pair_writer, row_offset);

if (next_state.position_in_string > data.size() && next_state.state != State::END)
{
Expand All @@ -61,14 +61,13 @@ class CHKeyValuePairExtractor : public KeyValuePairExtractor
}

// below reset discards invalid keys and values
reset(key, value);
reset(pair_writer);

return row_offset;
}

private:

NextState processState(std::string_view file, State state, auto & key, auto & value, uint64_t & row_offset)
NextState processState(std::string_view file, State state, auto & pair_writer, uint64_t & row_offset)
{
switch (state)
{
Expand All @@ -78,11 +77,11 @@ class CHKeyValuePairExtractor : public KeyValuePairExtractor
}
case State::READING_KEY:
{
return state_handler.readKey(file, key);
return state_handler.readKey(file, pair_writer);
}
case State::READING_QUOTED_KEY:
{
return state_handler.readQuotedKey(file, key);
return state_handler.readQuotedKey(file, pair_writer);
}
case State::READING_KV_DELIMITER:
{
Expand All @@ -94,15 +93,15 @@ class CHKeyValuePairExtractor : public KeyValuePairExtractor
}
case State::READING_VALUE:
{
return state_handler.readValue(file, value);
return state_handler.readValue(file, pair_writer);
}
case State::READING_QUOTED_VALUE:
{
return state_handler.readQuotedValue(file, value);
return state_handler.readQuotedValue(file, pair_writer);
}
case State::FLUSH_PAIR:
{
return flushPair(file, key, value, row_offset);
return flushPair(file, pair_writer, row_offset);
}
case State::END:
{
Expand All @@ -111,8 +110,7 @@ class CHKeyValuePairExtractor : public KeyValuePairExtractor
}
}

NextState flushPair(const std::string_view & file, auto & key,
auto & value, uint64_t & row_offset)
NextState flushPair(const std::string_view & file, auto & pair_writer, uint64_t & row_offset)
{
row_offset++;

Expand All @@ -121,20 +119,61 @@ class CHKeyValuePairExtractor : public KeyValuePairExtractor
throw Exception(ErrorCodes::LIMIT_EXCEEDED, "Number of pairs produced exceeded the limit of {}", max_number_of_pairs);
}

key.commit();
value.commit();
pair_writer.commitKey();
pair_writer.commitValue();

return {0, file.empty() ? State::END : State::WAITING_KEY};
}

void reset(auto & key, auto & value)
void reset(auto & pair_writer)
{
key.reset();
value.reset();
pair_writer.resetKey();
pair_writer.resetValue();
}

StateHandler state_handler;
uint64_t max_number_of_pairs;
};

}

struct KeyValuePairExtractorNoEscaping : extractKV::KeyValuePairExtractor<extractKV::NoEscapingStateHandler>
{
using StateHandler = extractKV::NoEscapingStateHandler;
explicit KeyValuePairExtractorNoEscaping(const extractKV::Configuration & configuration_, std::size_t max_number_of_pairs_)
: KeyValuePairExtractor(configuration_, max_number_of_pairs_) {}

uint64_t extract(std::string_view data, ColumnString::MutablePtr & keys, ColumnString::MutablePtr & values)
{
auto pair_writer = typename StateHandler::PairWriter(*keys, *values);
return extractImpl(data, pair_writer);
}
};

struct KeyValuePairExtractorInlineEscaping : extractKV::KeyValuePairExtractor<extractKV::InlineEscapingStateHandler>
{
using StateHandler = extractKV::InlineEscapingStateHandler;
explicit KeyValuePairExtractorInlineEscaping(const extractKV::Configuration & configuration_, std::size_t max_number_of_pairs_)
: KeyValuePairExtractor(configuration_, max_number_of_pairs_) {}

uint64_t extract(std::string_view data, ColumnString::MutablePtr & keys, ColumnString::MutablePtr & values)
{
auto pair_writer = typename StateHandler::PairWriter(*keys, *values);
return extractImpl(data, pair_writer);
}
};

struct KeyValuePairExtractorReferenceMap : extractKV::KeyValuePairExtractor<extractKV::ReferencesMapStateHandler>
{
using StateHandler = extractKV::ReferencesMapStateHandler;
explicit KeyValuePairExtractorReferenceMap(const extractKV::Configuration & configuration_, std::size_t max_number_of_pairs_)
: KeyValuePairExtractor(configuration_, max_number_of_pairs_) {}

uint64_t extract(std::string_view data, absl::flat_hash_map<std::string_view, std::string_view> & map)
{
auto pair_writer = typename StateHandler::PairWriter(map);
return extractImpl(data, pair_writer);
}
};

}
20 changes: 20 additions & 0 deletions src/Functions/keyvaluepair/impl/DuplicateKeyFoundException.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
#pragma once

#include <Common/Exception.h>

namespace DB
{

namespace extractKV
{

struct DuplicateKeyFoundException : Exception
{
explicit DuplicateKeyFoundException(std::string_view key_) : key(key_) {}

std::string_view key;
};

}

}
20 changes: 0 additions & 20 deletions src/Functions/keyvaluepair/impl/KeyValuePairExtractor.h

This file was deleted.

44 changes: 0 additions & 44 deletions src/Functions/keyvaluepair/impl/KeyValuePairExtractorBuilder.cpp
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
#include <Functions/keyvaluepair/impl/KeyValuePairExtractorBuilder.h>

#include <Functions/keyvaluepair/impl/CHKeyValuePairExtractor.h>
#include <Functions/keyvaluepair/impl/Configuration.h>
#include <Functions/keyvaluepair/impl/StateHandlerImpl.h>

namespace DB
Expand All @@ -25,52 +23,10 @@ KeyValuePairExtractorBuilder & KeyValuePairExtractorBuilder::withQuotingCharacte
return *this;
}

KeyValuePairExtractorBuilder & KeyValuePairExtractorBuilder::withEscaping()
{
with_escaping = true;
return *this;
}

KeyValuePairExtractorBuilder & KeyValuePairExtractorBuilder::withMaxNumberOfPairs(uint64_t max_number_of_pairs_)
{
max_number_of_pairs = max_number_of_pairs_;
return *this;
}

std::shared_ptr<KeyValuePairExtractor> KeyValuePairExtractorBuilder::build() const
{
if (with_escaping)
{
return buildWithEscaping();
}

return buildWithoutEscaping();
}

namespace
{
using namespace extractKV;

template <typename T>
auto makeStateHandler(const T && handler, uint64_t max_number_of_pairs)
{
return std::make_shared<CHKeyValuePairExtractor<T>>(handler, max_number_of_pairs);
}

}

std::shared_ptr<KeyValuePairExtractor> KeyValuePairExtractorBuilder::buildWithoutEscaping() const
{
auto configuration = ConfigurationFactory::createWithoutEscaping(key_value_delimiter, quoting_character, item_delimiters);

return makeStateHandler(NoEscapingStateHandler(configuration), max_number_of_pairs);
}

std::shared_ptr<KeyValuePairExtractor> KeyValuePairExtractorBuilder::buildWithEscaping() const
{
auto configuration = ConfigurationFactory::createWithEscaping(key_value_delimiter, quoting_character, item_delimiters);

return makeStateHandler(InlineEscapingStateHandler(configuration), max_number_of_pairs);
}

}
Loading
Loading