Skip to content
Merged
17 changes: 10 additions & 7 deletions cpp/src/parquet/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -130,9 +130,12 @@ set(PARQUET_STATIC_TEST_LINK_LIBS ${PARQUET_MIN_TEST_LIBS} parquet_static thrift

#
# Generated Thrift sources
set_source_files_properties(src/generated/parquet_types.cpp src/generated/parquet_types.h
src/generated/parquet_constants.cpp
src/generated/parquet_constants.h
set(PARQUET_THRIFT_SOURCE_DIR "${ARROW_SOURCE_DIR}/src/generated/")

set_source_files_properties("${PARQUET_THRIFT_SOURCE_DIR}/parquet_types.cpp"
"${PARQUET_THRIFT_SOURCE_DIR}/parquet_types.h"
"${PARQUET_THRIFT_SOURCE_DIR}/parquet_constants.cpp"
"${PARQUET_THRIFT_SOURCE_DIR}/parquet_constants.h"
PROPERTIES SKIP_PRECOMPILE_HEADERS ON
SKIP_UNITY_BUILD_INCLUSION ON)

Expand Down Expand Up @@ -167,8 +170,8 @@ set(PARQUET_SRCS
metadata.cc
xxhasher.cc
page_index.cc
"${ARROW_SOURCE_DIR}/src/generated/parquet_constants.cpp"
"${ARROW_SOURCE_DIR}/src/generated/parquet_types.cpp"
"${PARQUET_THRIFT_SOURCE_DIR}/parquet_constants.cpp"
"${PARQUET_THRIFT_SOURCE_DIR}/parquet_types.cpp"
platform.cc
printer.cc
properties.cc
Expand Down Expand Up @@ -277,8 +280,8 @@ add_arrow_lib(parquet

if(WIN32 AND NOT (ARROW_TEST_LINKAGE STREQUAL "static"))
add_library(parquet_test_support STATIC
"${ARROW_SOURCE_DIR}/src/generated/parquet_constants.cpp"
"${ARROW_SOURCE_DIR}/src/generated/parquet_types.cpp")
"${PARQUET_THRIFT_SOURCE_DIR}/parquet_constants.cpp"
"${PARQUET_THRIFT_SOURCE_DIR}/parquet_types.cpp")
target_link_libraries(parquet_test_support thrift::thrift)
set(PARQUET_SHARED_TEST_LINK_LIBS ${PARQUET_SHARED_TEST_LINK_LIBS} parquet_test_support)
set(PARQUET_LIBRARIES ${PARQUET_LIBRARIES} parquet_test_support)
Expand Down
23 changes: 22 additions & 1 deletion cpp/src/parquet/file_reader.cc
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
#include "parquet/exception.h"
#include "parquet/file_writer.h"
#include "parquet/metadata.h"
#include "parquet/page_index.h"
#include "parquet/platform.h"
#include "parquet/properties.h"
#include "parquet/schema.h"
Expand Down Expand Up @@ -302,6 +303,22 @@ class SerializedFile : public ParquetFileReader::Contents {

std::shared_ptr<FileMetaData> metadata() const override { return file_metadata_; }

std::shared_ptr<PageIndexReader> GetPageIndexReader() override {
if (!file_metadata_) {
// Usually this won't happen if user calls one of the static Open() functions
// to create a ParquetFileReader instance. But if user calls the constructor
// directly and calls GetPageIndexReader() before Open() then this could happen.
throw ParquetException(
"Cannot call GetPageIndexReader() due to missing file metadata. Did you "
"forget to call ParquetFileReader::Open() first?");
}
if (!page_index_reader_) {
page_index_reader_ = PageIndexReader::Make(source_.get(), file_metadata_,
properties_, file_decryptor_);
}
return page_index_reader_;
}

void set_metadata(std::shared_ptr<FileMetaData> metadata) {
file_metadata_ = std::move(metadata);
}
Expand Down Expand Up @@ -522,7 +539,7 @@ class SerializedFile : public ParquetFileReader::Contents {
int64_t source_size_;
std::shared_ptr<FileMetaData> file_metadata_;
ReaderProperties properties_;

std::shared_ptr<PageIndexReader> page_index_reader_;
std::shared_ptr<InternalFileDecryptor> file_decryptor_;

// \return The true length of the metadata in bytes
Expand Down Expand Up @@ -784,6 +801,10 @@ std::shared_ptr<FileMetaData> ParquetFileReader::metadata() const {
return contents_->metadata();
}

std::shared_ptr<PageIndexReader> ParquetFileReader::GetPageIndexReader() {
return contents_->GetPageIndexReader();
}

std::shared_ptr<RowGroupReader> ParquetFileReader::RowGroup(int i) {
if (i >= metadata()->num_row_groups()) {
std::stringstream ss;
Expand Down
13 changes: 13 additions & 0 deletions cpp/src/parquet/file_reader.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ namespace parquet {

class ColumnReader;
class FileMetaData;
class PageIndexReader;
class PageReader;
class RowGroupMetaData;

Expand Down Expand Up @@ -98,6 +99,7 @@ class PARQUET_EXPORT ParquetFileReader {
virtual void Close() = 0;
virtual std::shared_ptr<RowGroupReader> GetRowGroup(int i) = 0;
virtual std::shared_ptr<FileMetaData> metadata() const = 0;
virtual std::shared_ptr<PageIndexReader> GetPageIndexReader() = 0;
};

ParquetFileReader();
Expand Down Expand Up @@ -133,6 +135,17 @@ class PARQUET_EXPORT ParquetFileReader {
// Returns the file metadata. Only one instance is ever created
std::shared_ptr<FileMetaData> metadata() const;

/// Returns the PageIndexReader. Only one instance is ever created.
///
/// If the file does not have the page index, nullptr may be returned.
/// Because it pays to check existence of page index in the file, it
/// is possible to return a non null value even if page index does
/// not exist. It is the caller's responsibility to check the return
/// value and follow-up calls to PageIndexReader.
///
/// WARNING: The returned PageIndexReader must not outlive the ParquetFileReader.
std::shared_ptr<PageIndexReader> GetPageIndexReader();

/// Pre-buffer the specified column indices in all row groups.
///
/// Readers can optionally call this to cache the necessary slices
Expand Down
Loading