diff --git a/cpp/src/parquet/arrow/arrow_reader_writer_test.cc b/cpp/src/parquet/arrow/arrow_reader_writer_test.cc index bdfd0fe07dc..6585d286d1a 100644 --- a/cpp/src/parquet/arrow/arrow_reader_writer_test.cc +++ b/cpp/src/parquet/arrow/arrow_reader_writer_test.cc @@ -3443,8 +3443,8 @@ TEST(ArrowReadWrite, Decimal256AsInt) { auto table = ::arrow::Table::Make(::arrow::schema({field("root", type)}), {array}); parquet::WriterProperties::Builder builder; - // Enforce integer type to annotate decimal type - auto writer_properties = builder.enable_integer_annotate_decimal()->build(); + // Allow small decimals to be stored as int32 or int64. + auto writer_properties = builder.enable_store_decimal_as_integer()->build(); auto props_store_schema = ArrowWriterProperties::Builder().store_schema()->build(); CheckConfiguredRoundtrip(table, table, writer_properties, props_store_schema); @@ -4821,8 +4821,8 @@ class TestIntegerAnnotateDecimalTypeParquetIO : public TestParquetIO { auto arrow_schema = ::arrow::schema({::arrow::field("a", values->type())}); parquet::WriterProperties::Builder builder; - // Enforce integer type to annotate decimal type - auto writer_properties = builder.enable_integer_annotate_decimal()->build(); + // Allow small decimals to be stored as int32 or int64. + auto writer_properties = builder.enable_store_decimal_as_integer()->build(); std::shared_ptr parquet_schema; ASSERT_OK_NO_THROW(ToParquetSchema(arrow_schema.get(), *writer_properties, *default_arrow_writer_properties(), diff --git a/cpp/src/parquet/arrow/schema.cc b/cpp/src/parquet/arrow/schema.cc index 267b892e4b4..07d481ba8e2 100644 --- a/cpp/src/parquet/arrow/schema.cc +++ b/cpp/src/parquet/arrow/schema.cc @@ -357,7 +357,7 @@ Status FieldToNode(const std::string& name, const std::shared_ptr& field, const auto& decimal_type = static_cast(*field->type()); precision = decimal_type.precision(); scale = decimal_type.scale(); - if (properties.integer_annotate_decimal() && 1 <= precision && precision <= 18) { + if (properties.store_decimal_as_integer() && 1 <= precision && precision <= 18) { type = precision <= 9 ? ParquetType ::INT32 : ParquetType ::INT64; } else { type = ParquetType::FIXED_LEN_BYTE_ARRAY; diff --git a/cpp/src/parquet/properties.h b/cpp/src/parquet/properties.h index d45dcfe69fc..ef4ae124a29 100644 --- a/cpp/src/parquet/properties.h +++ b/cpp/src/parquet/properties.h @@ -201,7 +201,7 @@ class PARQUET_EXPORT WriterProperties { version_(ParquetVersion::PARQUET_2_4), data_page_version_(ParquetDataPageVersion::V1), created_by_(DEFAULT_CREATED_BY), - integer_annotate_decimal_(false) {} + store_decimal_as_integer_(false) {} virtual ~Builder() {} /// Specify the memory pool for the writer. Default default_memory_pool. @@ -452,19 +452,35 @@ class PARQUET_EXPORT WriterProperties { return this->disable_statistics(path->ToDotString()); } - /// Enable integer type to annotate decimal type as below: - /// int32: 1 <= precision <= 9 - /// int64: 10 <= precision <= 18 - /// Default disabled. - Builder* enable_integer_annotate_decimal() { - integer_annotate_decimal_ = true; + /// Allow decimals with 1 <= precision <= 18 to be stored as integers. + /// + /// In Parquet, DECIMAL can be stored in any of the following physical types: + /// - int32: for 1 <= precision <= 9. + /// - int64: for 10 <= precision <= 18. + /// - fixed_len_byte_array: precision is limited by the array size. + /// Length n can store <= floor(log_10(2^(8*n - 1) - 1)) base-10 digits. + /// - binary: precision is unlimited. The minimum number of bytes to store + /// the unscaled value is used. + /// + /// By default, this is DISABLED and all decimal types annotate fixed_len_byte_array. + /// + /// When enabled, the C++ writer will use following physical types to store decimals: + /// - int32: for 1 <= precision <= 9. + /// - int64: for 10 <= precision <= 18. + /// - fixed_len_byte_array: for precision > 18. + /// + /// As a consequence, decimal columns stored in integer types are more compact. + Builder* enable_store_decimal_as_integer() { + store_decimal_as_integer_ = true; return this; } - /// Disable integer type to annotate decimal type. + /// Disable decimal logical type with 1 <= precision <= 18 to be stored as + /// integer physical type. + /// /// Default disabled. - Builder* disable_integer_annotate_decimal() { - integer_annotate_decimal_ = false; + Builder* disable_store_decimal_as_integer() { + store_decimal_as_integer_ = false; return this; } @@ -493,7 +509,7 @@ class PARQUET_EXPORT WriterProperties { pool_, dictionary_pagesize_limit_, write_batch_size_, max_row_group_length_, pagesize_, version_, created_by_, std::move(file_encryption_properties_), default_column_properties_, column_properties, data_page_version_, - integer_annotate_decimal_)); + store_decimal_as_integer_)); } private: @@ -505,7 +521,7 @@ class PARQUET_EXPORT WriterProperties { ParquetVersion::type version_; ParquetDataPageVersion data_page_version_; std::string created_by_; - bool integer_annotate_decimal_; + bool store_decimal_as_integer_; std::shared_ptr file_encryption_properties_; @@ -536,7 +552,7 @@ class PARQUET_EXPORT WriterProperties { inline std::string created_by() const { return parquet_created_by_; } - inline bool integer_annotate_decimal() const { return integer_annotate_decimal_; } + inline bool store_decimal_as_integer() const { return store_decimal_as_integer_; } inline Encoding::type dictionary_index_encoding() const { if (parquet_version_ == ParquetVersion::PARQUET_1_0) { @@ -606,7 +622,7 @@ class PARQUET_EXPORT WriterProperties { std::shared_ptr file_encryption_properties, const ColumnProperties& default_column_properties, const std::unordered_map& column_properties, - ParquetDataPageVersion data_page_version, bool integer_annotate_decimal) + ParquetDataPageVersion data_page_version, bool store_short_decimal_as_integer) : pool_(pool), dictionary_pagesize_limit_(dictionary_pagesize_limit), write_batch_size_(write_batch_size), @@ -615,7 +631,7 @@ class PARQUET_EXPORT WriterProperties { parquet_data_page_version_(data_page_version), parquet_version_(version), parquet_created_by_(created_by), - integer_annotate_decimal_(integer_annotate_decimal), + store_decimal_as_integer_(store_short_decimal_as_integer), file_encryption_properties_(file_encryption_properties), default_column_properties_(default_column_properties), column_properties_(column_properties) {} @@ -628,7 +644,7 @@ class PARQUET_EXPORT WriterProperties { ParquetDataPageVersion parquet_data_page_version_; ParquetVersion::type parquet_version_; std::string parquet_created_by_; - bool integer_annotate_decimal_; + bool store_decimal_as_integer_; std::shared_ptr file_encryption_properties_;