Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 1 addition & 44 deletions c_glib/parquet-glib/arrow-file-reader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -231,15 +231,8 @@ gparquet_arrow_file_reader_get_schema(GParquetArrowFileReader *reader,
{
auto parquet_arrow_file_reader = gparquet_arrow_file_reader_get_raw(reader);

const auto n_columns =
parquet_arrow_file_reader->parquet_reader()->metadata()->num_columns();
std::vector<int> indices(n_columns);
for (int i = 0; i < n_columns; ++i) {
indices[i] = i;
}

std::shared_ptr<arrow::Schema> arrow_schema;
auto status = parquet_arrow_file_reader->GetSchema(indices, &arrow_schema);
auto status = parquet_arrow_file_reader->GetSchema(&arrow_schema);
if (garrow_error_check(error,
status,
"[parquet][arrow][file-reader][get-schema]")) {
Expand All @@ -249,42 +242,6 @@ gparquet_arrow_file_reader_get_schema(GParquetArrowFileReader *reader,
}
}

/**
* gparquet_arrow_file_reader_select_schema:
* @reader: A #GParquetArrowFileReader.
* @column_indexes: (array length=n_column_indexes):
* The array of column indexes to be selected.
* @n_column_indexes: The length of `column_indexes`.
* @error: (nullable): Return locatipcn for a #GError or %NULL.
*
* Returns: (transfer full) (nullable): A selected #GArrowSchema.
*
* Since: 0.12.0
*/
GArrowSchema *
gparquet_arrow_file_reader_select_schema(GParquetArrowFileReader *reader,
gint *column_indexes,
gsize n_column_indexes,
GError **error)
{
auto parquet_arrow_file_reader = gparquet_arrow_file_reader_get_raw(reader);

std::vector<int> indices(n_column_indexes);
for (gsize i = 0; i < n_column_indexes; ++i) {
indices[i] = column_indexes[i];
}

std::shared_ptr<arrow::Schema> arrow_schema;
auto status = parquet_arrow_file_reader->GetSchema(indices, &arrow_schema);
if (garrow_error_check(error,
status,
"[parquet][arrow][file-reader][select-schema]")) {
return garrow_schema_new_raw(&arrow_schema);
} else {
return NULL;
}
}

/**
* gparquet_arrow_file_reader_read_column_data:
* @reader: A #GParquetArrowFileReader.
Expand Down
5 changes: 0 additions & 5 deletions c_glib/parquet-glib/arrow-file-reader.h
Original file line number Diff line number Diff line change
Expand Up @@ -48,11 +48,6 @@ gparquet_arrow_file_reader_read_table(GParquetArrowFileReader *reader,
GArrowSchema *
gparquet_arrow_file_reader_get_schema(GParquetArrowFileReader *reader,
GError **error);
GArrowSchema *
gparquet_arrow_file_reader_select_schema(GParquetArrowFileReader *reader,
gint *column_indexes,
gsize n_column_indexes,
GError **error);

GARROW_AVAILABLE_IN_1_0
GArrowChunkedArray *
Expand Down
13 changes: 0 additions & 13 deletions c_glib/test/parquet/test-arrow-file-reader.rb
Original file line number Diff line number Diff line change
Expand Up @@ -39,19 +39,6 @@ def test_schema
SCHEMA
end

def test_select_schema
assert_equal(<<-SCHEMA.chomp, @reader.select_schema([0]).to_s)
a: string
SCHEMA
assert_equal(<<-SCHEMA.chomp, @reader.select_schema([1]).to_s)
b: int32
SCHEMA
assert_equal(<<-SCHEMA.chomp, @reader.select_schema([0, 1]).to_s)
a: string
b: int32
SCHEMA
end

def test_read_column
assert_equal([
Arrow::ChunkedArray.new([@a_array]),
Expand Down
52 changes: 34 additions & 18 deletions cpp/src/parquet/arrow/arrow-reader-writer-test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
#include "parquet/api/writer.h"

#include "parquet/arrow/reader.h"
#include "parquet/arrow/reader_internal.h"
#include "parquet/arrow/schema.h"
#include "parquet/arrow/test-util.h"
#include "parquet/arrow/writer.h"
Expand Down Expand Up @@ -597,12 +598,16 @@ class TestParquetIO : public ::testing::Test {
std::shared_ptr<::arrow::Schema> arrow_schema;
ArrowReaderProperties props;
ASSERT_OK_NO_THROW(FromParquetSchema(&descriptor, props, &arrow_schema));
FileWriter writer(::arrow::default_memory_pool(), MakeWriter(schema), arrow_schema);
ASSERT_OK_NO_THROW(writer.NewRowGroup(values->length()));
ASSERT_OK_NO_THROW(writer.WriteColumnChunk(*values));
ASSERT_OK_NO_THROW(writer.Close());
// writer.Close() should be idempotent
ASSERT_OK_NO_THROW(writer.Close());

std::unique_ptr<FileWriter> writer;
ASSERT_OK_NO_THROW(FileWriter::Make(::arrow::default_memory_pool(),
MakeWriter(schema), arrow_schema,
default_arrow_writer_properties(), &writer));
ASSERT_OK_NO_THROW(writer->NewRowGroup(values->length()));
ASSERT_OK_NO_THROW(writer->WriteColumnChunk(*values));
ASSERT_OK_NO_THROW(writer->Close());
// writer->Close() should be idempotent
ASSERT_OK_NO_THROW(writer->Close());
}

void ResetSink() { sink_ = CreateOutputStream(); }
Expand Down Expand Up @@ -789,13 +794,17 @@ TYPED_TEST(TestParquetIO, SingleColumnRequiredChunkedWrite) {
std::shared_ptr<::arrow::Schema> arrow_schema;
ArrowReaderProperties props;
ASSERT_OK_NO_THROW(FromParquetSchema(&descriptor, props, &arrow_schema));
FileWriter writer(default_memory_pool(), this->MakeWriter(schema), arrow_schema);

std::unique_ptr<FileWriter> writer;
ASSERT_OK_NO_THROW(FileWriter::Make(::arrow::default_memory_pool(),
this->MakeWriter(schema), arrow_schema,
default_arrow_writer_properties(), &writer));
for (int i = 0; i < 4; i++) {
ASSERT_OK_NO_THROW(writer.NewRowGroup(chunk_size));
ASSERT_OK_NO_THROW(writer->NewRowGroup(chunk_size));
std::shared_ptr<Array> sliced_array = values->Slice(i * chunk_size, chunk_size);
ASSERT_OK_NO_THROW(writer.WriteColumnChunk(*sliced_array));
ASSERT_OK_NO_THROW(writer->WriteColumnChunk(*sliced_array));
}
ASSERT_OK_NO_THROW(writer.Close());
ASSERT_OK_NO_THROW(writer->Close());

ASSERT_NO_FATAL_FAILURE(this->ReadAndCheckSingleColumnFile(*values));
}
Expand Down Expand Up @@ -859,14 +868,17 @@ TYPED_TEST(TestParquetIO, SingleColumnOptionalChunkedWrite) {
std::shared_ptr<::arrow::Schema> arrow_schema;
ArrowReaderProperties props;
ASSERT_OK_NO_THROW(FromParquetSchema(&descriptor, props, &arrow_schema));
FileWriter writer(::arrow::default_memory_pool(), this->MakeWriter(schema),
arrow_schema);

std::unique_ptr<FileWriter> writer;
ASSERT_OK_NO_THROW(FileWriter::Make(::arrow::default_memory_pool(),
this->MakeWriter(schema), arrow_schema,
default_arrow_writer_properties(), &writer));
for (int i = 0; i < 4; i++) {
ASSERT_OK_NO_THROW(writer.NewRowGroup(chunk_size));
ASSERT_OK_NO_THROW(writer->NewRowGroup(chunk_size));
std::shared_ptr<Array> sliced_array = values->Slice(i * chunk_size, chunk_size);
ASSERT_OK_NO_THROW(writer.WriteColumnChunk(*sliced_array));
ASSERT_OK_NO_THROW(writer->WriteColumnChunk(*sliced_array));
}
ASSERT_OK_NO_THROW(writer.Close());
ASSERT_OK_NO_THROW(writer->Close());

ASSERT_NO_FATAL_FAILURE(this->ReadAndCheckSingleColumnFile(*values));
}
Expand Down Expand Up @@ -2624,11 +2636,15 @@ TEST(TestArrowReaderAdHoc, DISABLED_LargeStringColumn) {
GroupNode::Make("schema", Repetition::REQUIRED, {schm->group_node()->field(0)}));

auto writer = ParquetFileWriter::Open(sink, schm_node);
FileWriter arrow_writer(default_memory_pool(), std::move(writer), table->schema());

std::unique_ptr<FileWriter> arrow_writer;
ASSERT_OK_NO_THROW(FileWriter::Make(::arrow::default_memory_pool(), std::move(writer),
table->schema(), default_arrow_writer_properties(),
&arrow_writer));
for (int i : {0, 1}) {
ASSERT_OK_NO_THROW(arrow_writer.WriteTable(*table, table->num_rows())) << i;
ASSERT_OK_NO_THROW(arrow_writer->WriteTable(*table, table->num_rows())) << i;
}
ASSERT_OK_NO_THROW(arrow_writer.Close());
ASSERT_OK_NO_THROW(arrow_writer->Close());

std::shared_ptr<Buffer> tables_buffer;
ASSERT_OK_NO_THROW(sink->Finish(&tables_buffer));
Expand Down
Loading