diff --git a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp index 496468277c96..0f8b9e73f9e6 100644 --- a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp +++ b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp @@ -1024,6 +1024,16 @@ static ColumnWithTypeAndName readNonNullableColumnFromArrowColumn( auto tmp_lc_column = lc_type->createColumn(); auto tmp_dict_column = IColumn::mutate(assert_cast(tmp_lc_column.get())->getDictionaryPtr()); dynamic_cast(tmp_dict_column.get())->uniqueInsertRangeFrom(*dict_column.column, 0, dict_column.column->size()); + size_t expected_dictionary_size = dict_column.column->size() + (dict_info.default_value_index == -1) + is_lc_nullable; + if (tmp_dict_column->size() != expected_dictionary_size) + { + throw Exception( + ErrorCodes::INCORRECT_DATA, + "Expected Dictionary size {}, real Dictionary size is {}. The discrepancy probably caused by duplicated values", + expected_dictionary_size, + tmp_dict_column->size()); + } + dict_column.column = std::move(tmp_dict_column); dict_info.values = std::make_shared(std::move(dict_column)); dict_info.dictionary_size = arrow_dict_column->length(); diff --git a/tests/queries/0_stateless/02904_arrow_dictionary_indexes.sh b/tests/queries/0_stateless/02904_arrow_dictionary_indexes.sh index 3335008c120b..a7530727daeb 100755 --- a/tests/queries/0_stateless/02904_arrow_dictionary_indexes.sh +++ b/tests/queries/0_stateless/02904_arrow_dictionary_indexes.sh @@ -18,3 +18,4 @@ $CLICKHOUSE_LOCAL -q "select uniqExact(a) from file('$CLICKHOUSE_TMP/$CLICKHOUSE $CLICKHOUSE_LOCAL -q "select * from file('$CUR_DIR/data_arrow/different_dicts.arrowstream') order by x" +$CLICKHOUSE_LOCAL -q "select * from file('$CUR_DIR/data_arrow/non_unique_dict.arrowstream') -- { serverError INCORRECT_DATA }" diff --git a/tests/queries/0_stateless/data_arrow/non_unique_dict.arrowstream b/tests/queries/0_stateless/data_arrow/non_unique_dict.arrowstream new file mode 100644 index 000000000000..60f8f303ae54 Binary files /dev/null and b/tests/queries/0_stateless/data_arrow/non_unique_dict.arrowstream differ