diff --git a/cpp/src/parquet/arrow/arrow_reader_writer_test.cc b/cpp/src/parquet/arrow/arrow_reader_writer_test.cc index dd0b19c2ce0..bccdc2b372a 100644 --- a/cpp/src/parquet/arrow/arrow_reader_writer_test.cc +++ b/cpp/src/parquet/arrow/arrow_reader_writer_test.cc @@ -649,22 +649,24 @@ class ParquetIOTestBase : public ::testing::Test { AssertArraysEqual(values, *out); } - void ReadTableFromFile(std::unique_ptr reader, bool expect_metadata, + void ReadTableFromFile(std::unique_ptr reader, bool expect_schema, std::shared_ptr* out) { ASSERT_OK_NO_THROW(reader->ReadTable(out)); auto key_value_metadata = reader->parquet_reader()->metadata()->key_value_metadata().get(); - if (!expect_metadata) { - ASSERT_EQ(nullptr, key_value_metadata); + if (!expect_schema) { + ASSERT_TRUE(key_value_metadata == nullptr || + !key_value_metadata->Contains("ARROW:schema")); } else { ASSERT_NE(nullptr, key_value_metadata); + ASSERT_TRUE(key_value_metadata->Contains("ARROW:schema")); } ASSERT_NE(nullptr, out->get()); } void ReadTableFromFile(std::unique_ptr reader, std::shared_ptr
* out) { - ReadTableFromFile(std::move(reader), /*expect_metadata=*/false, out); + ReadTableFromFile(std::move(reader), /*expect_schema=*/false, out); } void RoundTripSingleColumn( @@ -680,9 +682,9 @@ class ParquetIOTestBase : public ::testing::Test { std::shared_ptr
out; std::unique_ptr reader; ASSERT_NO_FATAL_FAILURE(this->ReaderFromSink(&reader)); - const bool expect_metadata = arrow_properties->store_schema(); + const bool expect_schema = arrow_properties->store_schema(); ASSERT_NO_FATAL_FAILURE( - this->ReadTableFromFile(std::move(reader), expect_metadata, &out)); + this->ReadTableFromFile(std::move(reader), expect_schema, &out)); ASSERT_EQ(1, out->num_columns()); ASSERT_EQ(table->num_rows(), out->num_rows()); @@ -5343,6 +5345,27 @@ TEST(TestArrowReadWrite, OperationsOnClosedWriter) { ASSERT_RAISES(Invalid, writer->WriteTable(*table, 1)); } +TEST(TestArrowReadWrite, TableWithMetadata) { + auto values = std::make_shared(::arrow::ArrayVector{}, ::arrow::int32()); + auto table = + MakeSimpleTable(values, false, ::arrow::KeyValueMetadata::Make({"foo"}, {"bar"})); + + auto sink = CreateOutputStream(); + ASSERT_OK_NO_THROW( + WriteTable(*table, ::arrow::default_memory_pool(), sink, SMALL_SIZE)); + ASSERT_OK_AND_ASSIGN(auto buffer, sink->Finish()); + + std::shared_ptr
out; + std::unique_ptr reader; + ASSERT_OK_NO_THROW(OpenFile(std::make_shared(buffer), + ::arrow::default_memory_pool(), &reader)); + ASSERT_OK_NO_THROW(reader->ReadTable(&out)); + + ASSERT_NE(nullptr, out->schema()->metadata()); + ASSERT_TRUE(out->schema()->metadata()->Contains("foo")); + ASSERT_EQ("bar", out->schema()->metadata()->Get("foo")); +} + namespace { struct ColumnIndexObject { diff --git a/cpp/src/parquet/arrow/test_util.h b/cpp/src/parquet/arrow/test_util.h index b2be1b3c535..50336d39871 100644 --- a/cpp/src/parquet/arrow/test_util.h +++ b/cpp/src/parquet/arrow/test_util.h @@ -479,15 +479,18 @@ Status MakeEmptyListsArray(int64_t size, std::shared_ptr* out_array) { } std::shared_ptr<::arrow::Table> MakeSimpleTable( - const std::shared_ptr& values, bool nullable) { - auto schema = ::arrow::schema({::arrow::field("col", values->type(), nullable)}); + const std::shared_ptr& values, bool nullable, + std::shared_ptr<::arrow::KeyValueMetadata> metadata = nullptr) { + auto schema = ::arrow::schema({::arrow::field("col", values->type(), nullable)}, + std::move(metadata)); return ::arrow::Table::Make(schema, {values}); } -std::shared_ptr<::arrow::Table> MakeSimpleTable(const std::shared_ptr& values, - bool nullable) { +std::shared_ptr<::arrow::Table> MakeSimpleTable( + const std::shared_ptr& values, bool nullable, + std::shared_ptr<::arrow::KeyValueMetadata> metadata = nullptr) { auto carr = std::make_shared<::arrow::ChunkedArray>(values); - return MakeSimpleTable(carr, nullable); + return MakeSimpleTable(carr, nullable, std::move(metadata)); } template diff --git a/cpp/src/parquet/arrow/writer.cc b/cpp/src/parquet/arrow/writer.cc index 5238986c428..72437c9e62f 100644 --- a/cpp/src/parquet/arrow/writer.cc +++ b/cpp/src/parquet/arrow/writer.cc @@ -529,7 +529,12 @@ Status GetSchemaMetadata(const ::arrow::Schema& schema, ::arrow::MemoryPool* poo const ArrowWriterProperties& properties, std::shared_ptr* out) { if (!properties.store_schema()) { - *out = nullptr; + if (schema.metadata()) { + *out = schema.metadata()->Copy(); + } else { + // No metadata to propagate + *out = nullptr; + } return Status::OK(); }