Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion be/src/vec/columns/column_object.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ size_t getNumberOfDimensions(const IDataType& type) {
DataTypePtr get_data_type_by_column(const IColumn& column) {
auto idx = column.get_data_type();
if (WhichDataType(idx).is_simple()) {
return DataTypeFactory::instance().get(String(getTypeName(idx)));
return DataTypeFactory::instance().create_data_type(idx);
}
if (WhichDataType(idx).is_nothing()) {
return std::make_shared<DataTypeNothing>();
Expand Down
3 changes: 2 additions & 1 deletion be/src/vec/columns/column_struct.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,8 @@ void ColumnStruct::insert(const Field& x) {
const auto& tuple = x.get<const Tuple&>();
const size_t tuple_size = columns.size();
if (tuple.size() != tuple_size) {
LOG(FATAL) << "Cannot insert value of different size into tuple.";
LOG(FATAL) << "Cannot insert value of different size into tuple. field tuple size"
<< tuple.size() << ", columns size " << tuple_size;
}

for (size_t i = 0; i < tuple_size; ++i) {
Expand Down
6 changes: 6 additions & 0 deletions be/src/vec/columns/column_vector.h
Original file line number Diff line number Diff line change
Expand Up @@ -295,6 +295,12 @@ class ColumnVector final : public COWHelper<ColumnVectorHelper, ColumnVector<T>>

Int64 get_int(size_t n) const override { return Int64(data[n]); }

// For example, during create column_const(1, uint8), will use NearestFieldType
// to cast a uint8 to int64, so that the Field is int64, but the column is created
// using data_type, so that T == uint8. After the field is created, it will be inserted
// into the column, but its type is different from column's data type, so that during column
// insert method, should use NearestFieldType<T> to get the Field and get it actual
// uint8 value and then insert into column.
void insert(const Field& x) override {
data.push_back(doris::vectorized::get<NearestFieldType<T>>(x));
}
Expand Down
86 changes: 44 additions & 42 deletions be/src/vec/core/types.h
Original file line number Diff line number Diff line change
Expand Up @@ -45,50 +45,52 @@ namespace vectorized {

struct Null {};

// The identifier should be less than int16, because castexpr using the identifier
// instead of type name as type parameter. It will using int16 as column type.
enum class TypeIndex {
Nothing = 0,
UInt8,
UInt16,
UInt32,
UInt64,
UInt128,
Int8,
Int16,
Int32,
Int64,
Int128,
Float32,
Float64,
Date,
DateTime,
String,
FixedString,
Enum8,
Enum16,
Decimal32,
Decimal64,
Decimal128,
UUID,
Array,
Tuple,
Set,
Interval,
Nullable,
Function,
AggregateFunction,
LowCardinality,
BitMap,
HLL,
DateV2,
DateTimeV2,
TimeV2,
FixedLengthObject,
JSONB,
Decimal128I,
Map,
Struct,
VARIANT,
QuantileState,
UInt8 = 1,
UInt16 = 2,
UInt32 = 3,
UInt64 = 4,
UInt128 = 5,
Int8 = 6,
Int16 = 7,
Int32 = 8,
Int64 = 9,
Int128 = 10,
Float32 = 11,
Float64 = 12,
Date = 13,
DateTime = 14,
String = 15,
FixedString = 16,
Enum8 = 17,
Enum16 = 18,
Decimal32 = 19,
Decimal64 = 20,
Decimal128 = 21,
UUID = 22,
Array = 23,
Tuple = 24,
Set = 25,
Interval = 26,
Nullable = 27,
Function = 28,
AggregateFunction = 29,
LowCardinality = 30,
BitMap = 31,
HLL = 32,
DateV2 = 33,
DateTimeV2 = 34,
TimeV2 = 35,
FixedLengthObject = 36,
JSONB = 37,
Decimal128I = 38,
Map = 39,
Struct = 40,
VARIANT = 41,
QuantileState = 42,
};

struct Consted {
Expand Down
91 changes: 91 additions & 0 deletions be/src/vec/data_types/data_type_factory.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,97 @@ DataTypePtr DataTypeFactory::create_data_type(const TypeDescriptor& col_desc, bo
return nested;
}

DataTypePtr DataTypeFactory::create_data_type(const TypeIndex& type_index, bool is_nullable) {
DataTypePtr nested = nullptr;
switch (type_index) {
case TypeIndex::UInt8:
nested = std::make_shared<vectorized::DataTypeUInt8>();
break;
case TypeIndex::Int8:
nested = std::make_shared<vectorized::DataTypeInt8>();
break;
case TypeIndex::UInt16:
nested = std::make_shared<vectorized::DataTypeUInt16>();
break;
case TypeIndex::Int16:
nested = std::make_shared<vectorized::DataTypeInt16>();
break;
case TypeIndex::UInt32:
nested = std::make_shared<vectorized::DataTypeUInt32>();
break;
case TypeIndex::Int32:
nested = std::make_shared<vectorized::DataTypeInt32>();
break;
case TypeIndex::UInt64:
nested = std::make_shared<vectorized::DataTypeUInt64>();
break;
case TypeIndex::Int64:
nested = std::make_shared<vectorized::DataTypeInt64>();
break;
case TypeIndex::Int128:
nested = std::make_shared<vectorized::DataTypeInt128>();
break;
case TypeIndex::Float32:
nested = std::make_shared<vectorized::DataTypeFloat32>();
break;
case TypeIndex::Float64:
nested = std::make_shared<vectorized::DataTypeFloat64>();
break;
case TypeIndex::Date:
nested = std::make_shared<vectorized::DataTypeDate>();
break;
case TypeIndex::DateV2:
nested = std::make_shared<vectorized::DataTypeDateV2>();
break;
case TypeIndex::DateTimeV2:
nested = std::make_shared<DataTypeDateTimeV2>();
break;
case TypeIndex::DateTime:
nested = std::make_shared<vectorized::DataTypeDateTime>();
break;
case TypeIndex::String:
nested = std::make_shared<vectorized::DataTypeString>();
break;
case TypeIndex::Decimal32:
nested = std::make_shared<DataTypeDecimal<Decimal32>>(BeConsts::MAX_DECIMAL32_PRECISION, 0);
break;
case TypeIndex::Decimal64:
nested = std::make_shared<DataTypeDecimal<Decimal64>>(BeConsts::MAX_DECIMAL64_PRECISION, 0);
break;
case TypeIndex::Decimal128:
nested = std::make_shared<DataTypeDecimal<Decimal128>>(BeConsts::MAX_DECIMAL128_PRECISION,
0);
break;
case TypeIndex::Decimal128I:
nested = std::make_shared<DataTypeDecimal<Decimal128I>>(BeConsts::MAX_DECIMAL128_PRECISION,
0);
break;
case TypeIndex::JSONB:
nested = std::make_shared<vectorized::DataTypeJsonb>();
break;
case TypeIndex::BitMap:
nested = std::make_shared<vectorized::DataTypeBitMap>();
break;
case TypeIndex::HLL:
nested = std::make_shared<vectorized::DataTypeHLL>();
break;
case TypeIndex::QuantileState:
nested = std::make_shared<vectorized::DataTypeQuantileStateDouble>();
break;
case TypeIndex::TimeV2:
nested = std::make_shared<vectorized::DataTypeTime>();
break;
default:
DCHECK(false) << "invalid typeindex:" << static_cast<int16_t>(type_index);
break;
}

if (nested && is_nullable) {
return std::make_shared<vectorized::DataTypeNullable>(nested);
}
return nested;
}

DataTypePtr DataTypeFactory::_create_primitive_data_type(const FieldType& type, int precision,
int scale) const {
DataTypePtr result = nullptr;
Expand Down
121 changes: 1 addition & 120 deletions be/src/vec/data_types/data_type_factory.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -54,124 +54,12 @@ class DataTypeFactory {

public:
static DataTypeFactory& instance() {
static std::once_flag oc;
static DataTypeFactory instance;
std::call_once(oc, []() {
std::unordered_map<std::string, DataTypePtr> base_type_map {
{"UInt8", std::make_shared<DataTypeUInt8>()},
{"UInt16", std::make_shared<DataTypeUInt16>()},
{"UInt32", std::make_shared<DataTypeUInt32>()},
{"UInt64", std::make_shared<DataTypeUInt64>()},
{"Int8", std::make_shared<DataTypeInt8>()},
{"Int16", std::make_shared<DataTypeInt16>()},
{"Int32", std::make_shared<DataTypeInt32>()},
{"Int64", std::make_shared<DataTypeInt64>()},
{"Int128", std::make_shared<DataTypeInt128>()},
{"Float32", std::make_shared<DataTypeFloat32>()},
{"Float64", std::make_shared<DataTypeFloat64>()},
{"Date", std::make_shared<DataTypeDate>()},
{"DateV2", std::make_shared<DataTypeDateV2>()},
{"DateTime", std::make_shared<DataTypeDateTime>()},
{"DateTimeV2", std::make_shared<DataTypeDateTimeV2>()},
{"String", std::make_shared<DataTypeString>()},
{"Decimal", std::make_shared<DataTypeDecimal<Decimal128>>(27, 9)},
{"Decimal32", std::make_shared<DataTypeDecimal<Decimal32>>(
BeConsts::MAX_DECIMAL32_PRECISION, 0)},
{"Decimal64", std::make_shared<DataTypeDecimal<Decimal64>>(
BeConsts::MAX_DECIMAL64_PRECISION, 0)},
{"Decimal128", std::make_shared<DataTypeDecimal<Decimal128>>(
BeConsts::MAX_DECIMAL128_PRECISION, 0)},
{"Decimal128I", std::make_shared<DataTypeDecimal<Decimal128I>>(
BeConsts::MAX_DECIMAL128_PRECISION, 0)},
{"Jsonb", std::make_shared<DataTypeJsonb>()},
{"BitMap", std::make_shared<DataTypeBitMap>()},
{"Hll", std::make_shared<DataTypeHLL>()},
{"QuantileState", std::make_shared<DataTypeQuantileStateDouble>()},
};
for (auto const& [key, val] : base_type_map) {
instance.register_data_type(key, val);
instance.register_data_type("Array(" + key + ")",
std::make_shared<vectorized::DataTypeArray>(val));
instance.register_data_type(
"Array(Nullable(" + key + "))",
std::make_shared<vectorized::DataTypeArray>(
std::make_shared<vectorized::DataTypeNullable>(val)));
}
});
return instance;
}

// TODO(xy): support creator to create dynamic struct type
DataTypePtr get(const std::string& name) { return _data_type_map[name]; }
// TODO(xy): support creator to create dynamic struct type
const std::string& get(const DataTypePtr& data_type) const {
auto type_ptr = data_type->is_nullable()
? ((DataTypeNullable*)(data_type.get()))->get_nested_type()
: data_type;
for (const auto& entity : _invert_data_type_map) {
if (entity.first->equals(*type_ptr)) {
return entity.second;
}
if (is_decimal(type_ptr) && type_ptr->get_type_id() == entity.first->get_type_id()) {
return entity.second;
}
if (is_array(type_ptr) && is_array(entity.first)) {
auto nested_nullable_type_ptr =
(assert_cast<const DataTypeArray*>(type_ptr.get()))->get_nested_type();
auto nested_nullable_entity_ptr =
(assert_cast<const DataTypeArray*>(entity.first.get()))->get_nested_type();
// There must be nullable inside array type.
if (nested_nullable_type_ptr->is_nullable() &&
nested_nullable_entity_ptr->is_nullable()) {
auto nested_type_ptr = ((DataTypeNullable*)(nested_nullable_type_ptr.get()))
->get_nested_type();
auto nested_entity_ptr = ((DataTypeNullable*)(nested_nullable_entity_ptr.get()))
->get_nested_type();
if (is_decimal(nested_type_ptr) &&
nested_type_ptr->get_type_id() == nested_entity_ptr->get_type_id()) {
return entity.second;
}
}
}
}
if (type_ptr->get_type_id() == TypeIndex::Struct ||
type_ptr->get_type_id() == TypeIndex::Map) {
DataTypeFactory::instance().register_data_type(type_ptr->get_name(), type_ptr);
for (const auto& entity : _invert_data_type_map) {
if (entity.first->equals(*type_ptr)) {
return entity.second;
}
}
} else if (type_ptr->get_type_id() == TypeIndex::Array) {
// register the Array<Struct<>>/Array<Map<>>
auto nested_type = ((DataTypeArray*)type_ptr.get())->get_nested_type();
nested_type = nested_type->is_nullable()
? ((DataTypeNullable*)(nested_type.get()))->get_nested_type()
: nested_type;

if (nested_type->get_type_id() == TypeIndex::Struct ||
nested_type->get_type_id() == TypeIndex::Map) {
auto key = nested_type->get_name();
auto val = nested_type;
DataTypeFactory::instance().register_data_type(key, val);
DataTypeFactory::instance().register_data_type(
"Array(" + key + ")", std::make_shared<vectorized::DataTypeArray>(val));
DataTypeFactory::instance().register_data_type(
"Array(Nullable(" + key + "))",
std::make_shared<vectorized::DataTypeArray>(
std::make_shared<vectorized::DataTypeNullable>(val)));
}

for (const auto& entity : _invert_data_type_map) {
if (entity.first->equals(*type_ptr)) {
return entity.second;
}
}
}
return _empty_string;
}

DataTypePtr create_data_type(const doris::Field& col_desc);
DataTypePtr create_data_type(const TypeIndex& type_index, bool is_nullable = false);
DataTypePtr create_data_type(const TabletColumn& col_desc, bool is_nullable = false);

DataTypePtr create_data_type(const TypeDescriptor& col_desc, bool is_nullable = true);
Expand All @@ -191,13 +79,6 @@ class DataTypeFactory {
private:
DataTypePtr _create_primitive_data_type(const FieldType& type, int precision, int scale) const;

void register_data_type(const std::string& name, const DataTypePtr& data_type) {
_data_type_map.emplace(name, data_type);
_invert_data_type_map.emplace_back(data_type, name);
}
// TODO: Here is a little trick here, use bimap to replace map and vector
DataTypeMap _data_type_map;
InvertedDataTypeMap _invert_data_type_map;
std::string _empty_string;
};
} // namespace doris::vectorized
2 changes: 1 addition & 1 deletion be/src/vec/data_types/data_type_struct.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -345,7 +345,7 @@ MutableColumnPtr DataTypeStruct::create_column() const {

Field DataTypeStruct::get_default() const {
size_t size = elems.size();
Tuple t(size);
Tuple t;
for (size_t i = 0; i < size; ++i) {
t.push_back(elems[i]->get_default());
}
Expand Down
11 changes: 8 additions & 3 deletions be/src/vec/exec/vjdbc_connector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -643,9 +643,14 @@ Status JdbcConnector::_register_func_id(JNIEnv* env) {
Status JdbcConnector::_cast_string_to_array(const SlotDescriptor* slot_desc, Block* block,
int column_index, int rows) {
DataTypePtr _target_data_type = slot_desc->get_data_type_ptr();
std::string _target_data_type_name = DataTypeFactory::instance().get(_target_data_type);
DataTypePtr _cast_param_data_type = std::make_shared<DataTypeString>();
ColumnPtr _cast_param = _cast_param_data_type->create_column_const(1, _target_data_type_name);
std::string _target_data_type_name = _target_data_type->get_name();
DataTypePtr _cast_param_data_type = std::make_shared<DataTypeInt16>();
ColumnPtr _cast_param = _cast_param_data_type->create_column_const(
1, static_cast<int16_t>(_target_data_type->is_nullable()
? ((DataTypeNullable*)(_target_data_type.get()))
->get_nested_type()
->get_type_id()
: _target_data_type->get_type_id()));

ColumnsWithTypeAndName argument_template;
argument_template.reserve(2);
Expand Down
Loading