From 9341f9d98e61aac19e87184ba15811335952c4bd Mon Sep 17 00:00:00 2001 From: amorynan Date: Wed, 9 Aug 2023 21:02:53 +0800 Subject: [PATCH 1/3] update --- .../format/parquet/vparquet_column_reader.cpp | 17 ----------------- 1 file changed, 17 deletions(-) diff --git a/be/src/vec/exec/format/parquet/vparquet_column_reader.cpp b/be/src/vec/exec/format/parquet/vparquet_column_reader.cpp index 7499a636759361..8a9707f72510d7 100644 --- a/be/src/vec/exec/format/parquet/vparquet_column_reader.cpp +++ b/be/src/vec/exec/format/parquet/vparquet_column_reader.cpp @@ -123,11 +123,6 @@ Status ParquetColumnReader::create(io::FileReaderSPtr file, FieldSchema* field, size_t max_buf_size) { if (field->type.type == TYPE_ARRAY) { std::unique_ptr element_reader; - if (field->children[0].type.type == TYPE_MAP || - field->children[0].type.type == TYPE_STRUCT) { - return Status::InternalError( - "Array does not support nested map/struct type in column {}", field->name); - } RETURN_IF_ERROR(create(file, &field->children[0], row_group, row_ranges, ctz, io_ctx, element_reader, max_buf_size)); element_reader->set_nested_column(); @@ -135,13 +130,6 @@ Status ParquetColumnReader::create(io::FileReaderSPtr file, FieldSchema* field, RETURN_IF_ERROR(array_reader->init(std::move(element_reader), field)); reader.reset(array_reader); } else if (field->type.type == TYPE_MAP) { - auto key_type = field->children[0].children[0].type.type; - auto value_type = field->children[0].children[1].type.type; - if (key_type == TYPE_ARRAY || key_type == TYPE_MAP || key_type == TYPE_STRUCT || - value_type == TYPE_ARRAY || value_type == TYPE_MAP || value_type == TYPE_STRUCT) { - return Status::InternalError("Map does not support nested complex type in column {}", - field->name); - } std::unique_ptr key_reader; std::unique_ptr value_reader; RETURN_IF_ERROR(create(file, &field->children[0].children[0], row_group, row_ranges, ctz, @@ -156,11 +144,6 @@ Status ParquetColumnReader::create(io::FileReaderSPtr file, FieldSchema* field, } else if (field->type.type == TYPE_STRUCT) { std::vector> child_readers; for (int i = 0; i < field->children.size(); ++i) { - auto child_type = field->children[i].type.type; - if (child_type == TYPE_ARRAY || child_type == TYPE_MAP || child_type == TYPE_STRUCT) { - return Status::InternalError( - "Struct does not support nested complex type in column {}", field->name); - } std::unique_ptr child_reader; RETURN_IF_ERROR(create(file, &field->children[i], row_group, row_ranges, ctz, io_ctx, child_reader, max_buf_size)); From 84517fd8bb1c2062af28ebce5339644e56943d53 Mon Sep 17 00:00:00 2001 From: amorynan Date: Thu, 10 Aug 2023 10:17:48 +0800 Subject: [PATCH 2/3] fix with unique ptr --- .../exec/format/parquet/vparquet_column_reader.cpp | 13 +++++-------- .../exec/format/parquet/vparquet_column_reader.h | 4 ++++ 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/be/src/vec/exec/format/parquet/vparquet_column_reader.cpp b/be/src/vec/exec/format/parquet/vparquet_column_reader.cpp index 8a9707f72510d7..27e1723287c877 100644 --- a/be/src/vec/exec/format/parquet/vparquet_column_reader.cpp +++ b/be/src/vec/exec/format/parquet/vparquet_column_reader.cpp @@ -126,9 +126,9 @@ Status ParquetColumnReader::create(io::FileReaderSPtr file, FieldSchema* field, RETURN_IF_ERROR(create(file, &field->children[0], row_group, row_ranges, ctz, io_ctx, element_reader, max_buf_size)); element_reader->set_nested_column(); - ArrayColumnReader* array_reader = new ArrayColumnReader(row_ranges, ctz, io_ctx); + auto array_reader = ArrayColumnReader::create_unique(row_ranges, ctz, io_ctx); RETURN_IF_ERROR(array_reader->init(std::move(element_reader), field)); - reader.reset(array_reader); + reader.reset(); } else if (field->type.type == TYPE_MAP) { std::unique_ptr key_reader; std::unique_ptr value_reader; @@ -138,9 +138,8 @@ Status ParquetColumnReader::create(io::FileReaderSPtr file, FieldSchema* field, io_ctx, value_reader, max_buf_size)); key_reader->set_nested_column(); value_reader->set_nested_column(); - MapColumnReader* map_reader = new MapColumnReader(row_ranges, ctz, io_ctx); + auto map_reader = MapColumnReader::create_unique(row_ranges, ctz, io_ctx); RETURN_IF_ERROR(map_reader->init(std::move(key_reader), std::move(value_reader), field)); - reader.reset(map_reader); } else if (field->type.type == TYPE_STRUCT) { std::vector> child_readers; for (int i = 0; i < field->children.size(); ++i) { @@ -150,14 +149,12 @@ Status ParquetColumnReader::create(io::FileReaderSPtr file, FieldSchema* field, child_reader->set_nested_column(); child_readers.emplace_back(std::move(child_reader)); } - StructColumnReader* struct_reader = new StructColumnReader(row_ranges, ctz, io_ctx); + auto struct_reader = StructColumnReader::create_unique(row_ranges, ctz, io_ctx); RETURN_IF_ERROR(struct_reader->init(std::move(child_readers), field)); - reader.reset(struct_reader); } else { const tparquet::ColumnChunk& chunk = row_group.columns[field->physical_column_index]; - ScalarColumnReader* scalar_reader = new ScalarColumnReader(row_ranges, chunk, ctz, io_ctx); + auto scalar_reader = ScalarColumnReader::create_unique(row_ranges, chunk, ctz, io_ctx); RETURN_IF_ERROR(scalar_reader->init(file, field, max_buf_size)); - reader.reset(scalar_reader); } return Status::OK(); } diff --git a/be/src/vec/exec/format/parquet/vparquet_column_reader.h b/be/src/vec/exec/format/parquet/vparquet_column_reader.h index f60958b252e680..6d0e2e3f6e8b62 100644 --- a/be/src/vec/exec/format/parquet/vparquet_column_reader.h +++ b/be/src/vec/exec/format/parquet/vparquet_column_reader.h @@ -156,6 +156,7 @@ class ParquetColumnReader { }; class ScalarColumnReader : public ParquetColumnReader { + ENABLE_FACTORY_CREATOR(ScalarColumnReader) public: ScalarColumnReader(const std::vector& row_ranges, const tparquet::ColumnChunk& chunk_meta, cctz::time_zone* ctz, @@ -195,6 +196,7 @@ class ScalarColumnReader : public ParquetColumnReader { }; class ArrayColumnReader : public ParquetColumnReader { + ENABLE_FACTORY_CREATOR(ArrayColumnReader) public: ArrayColumnReader(const std::vector& row_ranges, cctz::time_zone* ctz, io::IOContext* io_ctx) @@ -218,6 +220,7 @@ class ArrayColumnReader : public ParquetColumnReader { }; class MapColumnReader : public ParquetColumnReader { + ENABLE_FACTORY_CREATOR(MapColumnReader) public: MapColumnReader(const std::vector& row_ranges, cctz::time_zone* ctz, io::IOContext* io_ctx) @@ -252,6 +255,7 @@ class MapColumnReader : public ParquetColumnReader { }; class StructColumnReader : public ParquetColumnReader { + ENABLE_FACTORY_CREATOR(StructColumnReader) public: StructColumnReader(const std::vector& row_ranges, cctz::time_zone* ctz, io::IOContext* io_ctx) From 22ac834820eab1f6ae1055695139dd32972882ac Mon Sep 17 00:00:00 2001 From: amorynan Date: Thu, 10 Aug 2023 11:17:16 +0800 Subject: [PATCH 3/3] fixed --- be/src/vec/exec/format/parquet/vparquet_column_reader.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/be/src/vec/exec/format/parquet/vparquet_column_reader.cpp b/be/src/vec/exec/format/parquet/vparquet_column_reader.cpp index 27e1723287c877..0a688f6ed32ede 100644 --- a/be/src/vec/exec/format/parquet/vparquet_column_reader.cpp +++ b/be/src/vec/exec/format/parquet/vparquet_column_reader.cpp @@ -128,7 +128,7 @@ Status ParquetColumnReader::create(io::FileReaderSPtr file, FieldSchema* field, element_reader->set_nested_column(); auto array_reader = ArrayColumnReader::create_unique(row_ranges, ctz, io_ctx); RETURN_IF_ERROR(array_reader->init(std::move(element_reader), field)); - reader.reset(); + reader.reset(array_reader.release()); } else if (field->type.type == TYPE_MAP) { std::unique_ptr key_reader; std::unique_ptr value_reader; @@ -140,8 +140,10 @@ Status ParquetColumnReader::create(io::FileReaderSPtr file, FieldSchema* field, value_reader->set_nested_column(); auto map_reader = MapColumnReader::create_unique(row_ranges, ctz, io_ctx); RETURN_IF_ERROR(map_reader->init(std::move(key_reader), std::move(value_reader), field)); + reader.reset(map_reader.release()); } else if (field->type.type == TYPE_STRUCT) { std::vector> child_readers; + child_readers.reserve(field->children.size()); for (int i = 0; i < field->children.size(); ++i) { std::unique_ptr child_reader; RETURN_IF_ERROR(create(file, &field->children[i], row_group, row_ranges, ctz, io_ctx, @@ -151,10 +153,12 @@ Status ParquetColumnReader::create(io::FileReaderSPtr file, FieldSchema* field, } auto struct_reader = StructColumnReader::create_unique(row_ranges, ctz, io_ctx); RETURN_IF_ERROR(struct_reader->init(std::move(child_readers), field)); + reader.reset(struct_reader.release()); } else { const tparquet::ColumnChunk& chunk = row_group.columns[field->physical_column_index]; auto scalar_reader = ScalarColumnReader::create_unique(row_ranges, chunk, ctz, io_ctx); RETURN_IF_ERROR(scalar_reader->init(file, field, max_buf_size)); + reader.reset(scalar_reader.release()); } return Status::OK(); }