From 6a19696bc22af9ca3d40dec6a21094d5b0788c48 Mon Sep 17 00:00:00 2001 From: sgilmore Date: Tue, 11 May 2021 16:05:25 -0400 Subject: [PATCH 01/16] Update CMakeLists.txt to build against latest Arrow C++ libraries Co-authored-by: Kevin Gurney --- matlab/CMakeLists.txt | 57 +++++++++++++++++++++++++++++-------------- 1 file changed, 39 insertions(+), 18 deletions(-) diff --git a/matlab/CMakeLists.txt b/matlab/CMakeLists.txt index 09c8839aaf0..5e7687ceb96 100644 --- a/matlab/CMakeLists.txt +++ b/matlab/CMakeLists.txt @@ -15,7 +15,8 @@ # specific language governing permissions and limitations # under the License. -cmake_minimum_required(VERSION 3.2) +cmake_minimum_required(VERSION 3.20) + set(CMAKE_CXX_STANDARD 11) set(MLARROW_VERSION "5.0.0-SNAPSHOT") @@ -29,22 +30,42 @@ if(EXISTS "${CPP_CMAKE_MODULES}") set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${CPP_CMAKE_MODULES}) endif() -## Arrow is Required +set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${CMAKE_SOURCE_DIR}/cmake_modules) + +# Arrow is Required find_package(Arrow REQUIRED) -## MATLAB is required to be installed to build MEX interfaces -set(MATLAB_ADDITIONAL_VERSIONS "R2018a=9.4") -find_package(Matlab REQUIRED MX_LIBRARY) - -# Build featherread mex file based on the arrow shared library -matlab_add_mex(NAME featherreadmex - SRC src/featherreadmex.cc src/feather_reader.cc src/util/handle_status.cc - src/util/unicode_conversion.cc - LINK_TO ${ARROW_SHARED_LIB}) -target_include_directories(featherreadmex PRIVATE ${ARROW_INCLUDE_DIR}) - -# Build featherwrite mex file based on the arrow shared library -matlab_add_mex(NAME featherwritemex - SRC src/featherwritemex.cc src/feather_writer.cc src/util/handle_status.cc - LINK_TO ${ARROW_SHARED_LIB}) -target_include_directories(featherwritemex PRIVATE ${ARROW_INCLUDE_DIR}) +# MATLAB is Required +find_package(Matlab REQUIRED) + +# Construct the absolute path to featherread's source files +set(featherread_sources featherreadmex.cc feather_reader.cc + util/handle_status.cc util/unicode_conversion.cc) +list(TRANSFORM featherread_sources PREPEND ${CMAKE_SOURCE_DIR}/src/) + +# Build featherreadmex MEX binary +matlab_add_mex(R2018a + NAME featherreadmex + SRC ${featherread_sources} + LINK_TO arrow_shared) + +# Construct the absolute path to featherwrite's source files +set(featherwrite_sources featherwritemex.cc feather_writer.cc + util/handle_status.cc util/unicode_conversion.cc) +list(TRANSFORM featherwrite_sources PREPEND ${CMAKE_SOURCE_DIR}/src/) + + +# Build featherwritemex MEX binary +matlab_add_mex(R2018a + NAME featherwritemex + SRC ${featherwrite_sources} + LINK_TO arrow_shared) + +# Ensure the MEX binaries are placed in the src directory on all platforms +if (WIN32) + set_target_properties(featherreadmex PROPERTIES RUNTIME_OUTPUT_DIRECTORY $<1:${CMAKE_SOURCE_DIR}/src>) + set_target_properties(featherwritemex PROPERTIES RUNTIME_OUTPUT_DIRECTORY $<1:${CMAKE_SOURCE_DIR}/src>) +else() + set_target_properties(featherreadmex PROPERTIES LIBRARY_OUTPUT_DIRECTORY $<1:${CMAKE_SOURCE_DIR}/src>) + set_target_properties(featherwritemex PROPERTIES LIBRARY_OUTPUT_DIRECTORY $<1:${CMAKE_SOURCE_DIR}/src>) +endif() From 724e6acf1320317ca5537b0f48feee57140d6689 Mon Sep 17 00:00:00 2001 From: sgilmore Date: Tue, 11 May 2021 16:09:02 -0400 Subject: [PATCH 02/16] Update featherwrite C++ code to use the latest Arrow C++ APIs Co-authored-by: Kevin Gurney --- matlab/src/feather_writer.cc | 153 +++++++++++++++++++++------------- matlab/src/feather_writer.h | 29 +++---- matlab/src/featherwritemex.cc | 7 +- 3 files changed, 111 insertions(+), 78 deletions(-) diff --git a/matlab/src/feather_writer.cc b/matlab/src/feather_writer.cc index bd1576bca46..a2f4e13dcf5 100644 --- a/matlab/src/feather_writer.cc +++ b/matlab/src/feather_writer.cc @@ -15,9 +15,7 @@ // specific language governing permissions and limitations // under the License. -#include -#include /* for std::multiplies */ -#include /* for std::accumulate */ +#include "feather_writer.h" #include #include @@ -26,11 +24,15 @@ #include #include #include -#include - +#include +#include +#include #include -#include "feather_writer.h" +#include +#include /* for std::multiplies */ +#include /* for std::accumulate */ + #include "matlab_traits.h" #include "util/handle_status.h" @@ -38,6 +40,37 @@ namespace arrow { namespace matlab { namespace internal { +// Returns the arrow::DataType that corresponds to the input type string +std::shared_ptr ConvertMatlabTypeStringToArrowDataType( + const std::string& t) { + if (t == "double") { + return arrow::float64(); + } else if (t == "single") { + return arrow::float32(); + } else if (t == "uint64") { + return arrow::uint64(); + } else if (t == "uint32") { + return arrow::uint32(); + } else if (t == "uint16") { + return arrow::uint16(); + } else if (t == "uint8") { + return arrow::uint8(); + } else if (t == "int64") { + return arrow::int64(); + } else if (t == "int32") { + return arrow::int32(); + } else if (t == "int16") { + return arrow::int16(); + } else if (t == "int8") { + return arrow::int8(); + } + mexErrMsgIdAndTxt("MATLAB:arrow:UnsupportedMatlabTypeString", + "Unsupported MATLAB type string: '%s'", t.c_str()); + + // mexErrMsgIdAndTxt throws unconditionally so we should never reach this line + return nullptr; +} + // Utility that helps verify the input mxArray struct field name and type. // Returns void since any errors will throw and terminate MEX execution. void ValidateMxStructField(const mxArray* struct_array, const char* fieldname, @@ -71,8 +104,7 @@ void ValidateMxStructField(const mxArray* struct_array, const char* fieldname, mxGetClassName(field), fieldname); } } - - // Some struct fields (like the table description) can be empty, while others + // Some struct fields (like Data) can be empty, while others // (like NumRows) should never be empty. This conditional helps account for both cases. if (!can_be_empty) { // Ensure that individual mxStructArray fields are non-empty. @@ -120,7 +152,7 @@ void ValidateNumRows(int64_t actual, int64_t expected) { } // Calculate the number of bytes required in the bit-packed validity buffer. -constexpr int64_t BitPackedLength(int64_t num_elements) { +int64_t BitPackedLength(int64_t num_elements) { // Since mxLogicalArray encodes [0, 1] in a full byte, we can compress that byte // down to a bit...therefore dividing the mxLogicalArray length by 8 here. return static_cast(std::ceil(num_elements / 8.0)); @@ -134,7 +166,7 @@ size_t GetNumberOfElements(const mxArray* array) { const size_t* dimensions = mxGetDimensions(array); // Iterate over the dimensions array and accumulate the total number of elements. - return std::accumulate(dimensions, dimensions + num_dimensions, 1, + return std::accumulate(dimensions, dimensions + num_dimensions, size_t{1}, std::multiplies()); } @@ -164,7 +196,7 @@ void BitPackBuffer(const mxArray* logical_array, // Iterate over the mxLogical array and write bit-packed bools to the arrow::Buffer. // Call into a loop-unrolled Arrow utility for better performance when bit-packing. - auto generator = [&]() -> uint8_t { return *unpacked_buffer_ptr++; }; + auto generator = [&]() -> bool { return *(unpacked_buffer_ptr++); }; const int64_t start_offset = 0; arrow::internal::GenerateBitsUnrolled(packed_buffer_ptr, start_offset, unpacked_buffer_length, generator); @@ -195,8 +227,8 @@ std::unique_ptr WriteNumericData(const mxArray* data, mxGetElementSize(data) * mxGetNumberOfElements(data)); // Construct arrow::NumericArray specialization using arrow::Buffer. - // Pass in nulls information...we could compute and provide the number of nulls here too, - // but passing -1 for now so that Arrow recomputes it if necessary. + // Pass in nulls information...we could compute and provide the number of nulls here + // too, but passing -1 for now so that Arrow recomputes it if necessary. return std::unique_ptr(new NumericArray( mxGetNumberOfElements(data), buffer, validity_bitmap, -1)); } @@ -228,7 +260,6 @@ std::unique_ptr WriteVariableData(const mxArray* data, const std::string& return WriteNumericData(data, validity_bitmap); case mxINT64_CLASS: return WriteNumericData(data, validity_bitmap); - default: { mexErrMsgIdAndTxt("MATLAB:arrow:UnsupportedArrowType", "Unsupported arrow::Type '%s' for variable '%s'", @@ -248,60 +279,44 @@ Status FeatherWriter::Open(const std::string& filename, *feather_writer = std::shared_ptr(new FeatherWriter()); // Open a FileOutputStream corresponding to the provided filename. - std::shared_ptr writable_file(nullptr); - ARROW_RETURN_NOT_OK(io::FileOutputStream::Open(filename, &writable_file)); - - // TableWriter::Open expects a shared_ptr to an OutputStream. - // Open the Feather file for writing with a TableWriter. - return ipc::feather::TableWriter::Open(writable_file, - &(*feather_writer)->table_writer_); -} - -// Write table metadata to the Feather file from a mxArray*. -void FeatherWriter::WriteMetadata(const mxArray* metadata) { - // Verify that all required fieldnames are provided. - internal::ValidateMxStructField(metadata, "Description", mxCHAR_CLASS, true); - internal::ValidateMxStructField(metadata, "NumRows", mxDOUBLE_CLASS, false); - internal::ValidateMxStructField(metadata, "NumVariables", mxDOUBLE_CLASS, false); - - // Convert Description to a std::string and set on FeatherWriter and TableWriter. - std::string description = - internal::MxArrayToString(mxGetField(metadata, 0, "Description")); - this->description_ = description; - this->table_writer_->SetDescription(description); - - // Get the NumRows field in the struct array and set on TableWriter. - this->num_rows_ = static_cast(mxGetScalar(mxGetField(metadata, 0, "NumRows"))); - this->table_writer_->SetNumRows(this->num_rows_); + arrow::Result> maybe_file_output_stream = + io::FileOutputStream::Open(filename, &((*feather_writer)->file_output_stream_)); + RETURN_NOT_OK(maybe_file_output_stream); + (*feather_writer)->file_output_stream_ = maybe_file_output_stream.ValueOrDie(); - // Get the total number of variables. This is checked later for consistency with - // the provided number of columns before finishing the file write. - this->num_variables_ = - static_cast(mxGetScalar(mxGetField(metadata, 0, "NumVariables"))); + return Status::OK(); } // Write mxArrays from MATLAB into a Feather file. -Status FeatherWriter::WriteVariables(const mxArray* variables) { +Status FeatherWriter::WriteVariables(const mxArray* variables, const mxArray* metadata) { // Verify that all required fieldnames are provided. internal::ValidateMxStructField(variables, "Name", mxCHAR_CLASS, true); internal::ValidateMxStructField(variables, "Type", mxCHAR_CLASS, false); internal::ValidateMxStructField(variables, "Data", mxUNKNOWN_CLASS, true); internal::ValidateMxStructField(variables, "Valid", mxLOGICAL_CLASS, true); + // Verify that all required fieldnames are provided. + internal::ValidateMxStructField(metadata, "NumRows", mxDOUBLE_CLASS, false); + internal::ValidateMxStructField(metadata, "NumVariables", mxDOUBLE_CLASS, false); + // Get the number of columns in the struct array. size_t num_columns = internal::GetNumberOfElements(variables); + // Get the NumRows field in the struct array and set on TableWriter. + num_rows_ = static_cast(mxGetScalar(mxGetField(metadata, 0, "NumRows"))); + // Get the total number of variables. This is checked later for consistency with + // the provided number of columns before finishing the file write. + num_variables_ = + static_cast(mxGetScalar(mxGetField(metadata, 0, "NumVariables"))); + // Verify that we have all the columns required for writing // Currently we need all columns to be passed in together in the WriteVariables method. - internal::ValidateNumColumns(static_cast(num_columns), this->num_variables_); + internal::ValidateNumColumns(static_cast(num_columns), num_variables_); + + arrow::SchemaBuilder schema_builder; + std::vector> table_columns; - // Allocate a packed validity bitmap for later arrow::Buffers to reference and populate. - // Since this is defined in the enclosing scope around any arrow::Buffer usage, this - // should outlive any arrow::Buffers created on this range, thus avoiding dangling - // references. - std::shared_ptr validity_bitmap; - ARROW_RETURN_NOT_OK(AllocateResizableBuffer(internal::BitPackedLength(this->num_rows_), - &validity_bitmap)); + const int64_t bitpacked_length = internal::BitPackedLength(num_rows_); // Iterate over the input columns and generate arrow arrays. for (int idx = 0; idx < num_columns; ++idx) { @@ -316,22 +331,46 @@ Status FeatherWriter::WriteVariables(const mxArray* variables) { std::string name_str = internal::MxArrayToString(name); std::string type_str = internal::MxArrayToString(type); + std::shared_ptr datatype = + internal::ConvertMatlabTypeStringToArrowDataType(type_str); + std::shared_ptr field = + std::make_shared(name_str, datatype); + + arrow::Result> maybe_buffer = + arrow::AllocateResizableBuffer(internal::BitPackedLength(num_rows_)); + RETURN_NOT_OK(maybe_buffer); + std::shared_ptr validity_bitmap = maybe_buffer.ValueOrDie(); + // Populate bit-packed arrow::Buffer using validity data in the mxArray*. internal::BitPackBuffer(valid, validity_bitmap); // Wrap mxArray data in an arrow::Array of the equivalent type. - std::unique_ptr array = + std::shared_ptr array = internal::WriteVariableData(data, type_str, validity_bitmap); // Verify that the arrow::Array has the right number of elements. - internal::ValidateNumRows(array->length(), this->num_rows_); + internal::ValidateNumRows(array->length(), num_rows_); + + // Append the field to the schema builder + RETURN_NOT_OK(schema_builder.AddField(field)); - // Write another column to the Feather file. - ARROW_RETURN_NOT_OK(this->table_writer_->Append(name_str, *array)); + // Store the table column + table_columns.push_back(array); } + // Create the table schema + arrow::Result> table_schema_result = + schema_builder.Finish(); + RETURN_NOT_OK(table_schema_result); + + std::shared_ptr table_schema = table_schema_result.ValueOrDie(); + + // Specify the feather file format version as V1 + arrow::ipc::feather::WriteProperties write_props; + write_props.version = arrow::ipc::feather::kFeatherV1Version; + std::shared_ptr table = arrow::Table::Make(table_schema, table_columns); // Write the Feather file metadata to the end of the file. - return this->table_writer_->Finalize(); + return ipc::feather::WriteTable(*table, file_output_stream_.get(), write_props); } } // namespace matlab diff --git a/matlab/src/feather_writer.h b/matlab/src/feather_writer.h index 4b402e01e17..45fbe5294e2 100644 --- a/matlab/src/feather_writer.h +++ b/matlab/src/feather_writer.h @@ -17,15 +17,14 @@ #pragma once -#include -#include - #include #include #include - #include +#include +#include + namespace arrow { namespace matlab { @@ -33,24 +32,21 @@ class FeatherWriter { public: ~FeatherWriter() = default; - /// \brief Write Feather file metadata using information from an mxArray* struct. - /// The input mxArray must be a scalar struct array with the following fields: - /// - "Description" :: Nx1 mxChar array, table-level description - /// - "NumRows" :: scalar mxDouble array, number of rows in table - /// - "NumVariables" :: scalar mxDouble array, total number of variables - /// \param[in] metadata mxArray* scalar struct containing table-level metadata - void WriteMetadata(const mxArray* metadata); - - /// \brief Write mxArrays to a Feather file. The input must be a N-by-1 mxStruct - // array with the following fields: + /// \brief Write mxArrays to a Feather file. The first input must be a N-by-1 mxStruct + /// array with the following fields: /// - "Name" :: Nx1 mxChar array, name of the column /// - "Type" :: Nx1 mxChar array, the variable's MATLAB datatype /// - "Data" :: Nx1 mxArray, data for this variable /// - "Valid" :: Nx1 mxLogical array, 0 represents invalid (null) values and /// 1 represents valid (non-null) values + /// The second input must be a scalar mxStruct with the following + /// fields: + /// - "NumRows" :: scalar mxDouble array, number of rows in table + /// - "NumVariables" :: scalar mxDouble array, total number of variables /// \param[in] variables mxArray* struct array containing table variable data + /// \param[in] metadata mxArray* scalar struct containing table-level metadata /// \return status - Status WriteVariables(const mxArray* variables); + Status WriteVariables(const mxArray* variables, const mxArray* metadata); /// \brief Initialize a FeatherWriter object that writes to a Feather file /// \param[in] filename path to the new Feather file @@ -62,12 +58,11 @@ class FeatherWriter { private: FeatherWriter() = default; - std::unique_ptr table_writer_; int64_t num_rows_; int64_t num_variables_; std::string description_; + std::shared_ptr file_output_stream_; }; } // namespace matlab } // namespace arrow - diff --git a/matlab/src/featherwritemex.cc b/matlab/src/featherwritemex.cc index 3a6815e02c1..e7d90126490 100644 --- a/matlab/src/featherwritemex.cc +++ b/matlab/src/featherwritemex.cc @@ -15,10 +15,10 @@ // specific language governing permissions and limitations // under the License. -#include - #include +#include + #include "feather_writer.h" #include "util/handle_status.h" @@ -32,6 +32,5 @@ void mexFunction(int nlhs, mxArray* plhs[], int nrhs, const mxArray* prhs[]) { arrow::matlab::FeatherWriter::Open(filename, &feather_writer)); // Write the Feather file table variables and table metadata from MATLAB. - feather_writer->WriteMetadata(prhs[2]); - arrow::matlab::util::HandleStatus(feather_writer->WriteVariables(prhs[1])); + arrow::matlab::util::HandleStatus(feather_writer->WriteVariables(prhs[1], prhs[2])); } From 0197f9300199cb6637674859645fcfdc113b55b0 Mon Sep 17 00:00:00 2001 From: sgilmore Date: Tue, 11 May 2021 16:12:10 -0400 Subject: [PATCH 03/16] Update featherread C++ code to use the latest Arrow C++ APIs Co-authored-by: Kevin Gurney --- matlab/src/feather_reader.cc | 97 ++++++++++++++++++++++-------------- matlab/src/feather_reader.h | 12 ++--- matlab/src/featherreadmex.cc | 4 +- 3 files changed, 66 insertions(+), 47 deletions(-) diff --git a/matlab/src/feather_reader.cc b/matlab/src/feather_reader.cc index 484c300e0e4..626519db6c4 100644 --- a/matlab/src/feather_reader.cc +++ b/matlab/src/feather_reader.cc @@ -15,19 +15,24 @@ // specific language governing permissions and limitations // under the License. -#include -#include +#include "feather_reader.h" +#include +#include +#include #include #include +#include #include #include #include -#include - +#include +#include #include -#include "feather_reader.h" +#include +#include + #include "matlab_traits.h" #include "util/handle_status.h" #include "util/unicode_conversion.h" @@ -52,11 +57,12 @@ mxArray* ReadNumericVariableData(const std::shared_ptr& column) { mxArray* variable_data = mxCreateNumericMatrix(column->length(), 1, matlab_class_id, mxREAL); - std::shared_ptr integer_array = + std::shared_ptr arrow_numeric_array = std::static_pointer_cast(column); // Get a raw pointer to the Arrow array data. - const MatlabType* source = integer_array->raw_values(); + const MatlabType* source = + reinterpret_cast(arrow_numeric_array->values()->data()); // Get a mutable pointer to the MATLAB array data and std::copy the // Arrow array data into it. @@ -121,8 +127,7 @@ void BitUnpackBuffer(const std::shared_ptr& source, int64_t length, // writes to a zero-initialized destination buffer. // Implements a fast path for the fully-valid and fully-invalid cases. // Returns true if the destination buffer was successfully populated. -bool TryBitUnpackFastPath(const std::shared_ptr& array, - mxLogical* destination) { +bool TryBitUnpackFastPath(const std::shared_ptr& array, mxLogical* destination) { const int64_t null_count = array->null_count(); const int64_t length = array->length(); @@ -177,32 +182,34 @@ Status FeatherReader::Open(const std::string& filename, *feather_reader = std::shared_ptr(new FeatherReader()); // Open file with given filename as a ReadableFile. - std::shared_ptr readable_file(nullptr); - - RETURN_NOT_OK(io::ReadableFile::Open(filename, &readable_file)); + arrow::Result> maybe_readable_file = + io::ReadableFile::Open(filename); + RETURN_NOT_OK(maybe_readable_file); // TableReader expects a RandomAccessFile. - std::shared_ptr random_access_file(readable_file); + std::shared_ptr random_access_file{ + maybe_readable_file.ValueOrDie()}; // Open the Feather file for reading with a TableReader. - RETURN_NOT_OK(ipc::feather::TableReader::Open(random_access_file, - &(*feather_reader)->table_reader_)); - - // Read the table metadata from the Feather file. - (*feather_reader)->num_rows_ = (*feather_reader)->table_reader_->num_rows(); - (*feather_reader)->num_variables_ = (*feather_reader)->table_reader_->num_columns(); - (*feather_reader)->description_ = - (*feather_reader)->table_reader_->HasDescription() - ? (*feather_reader)->table_reader_->GetDescription() - : ""; - - if ((*feather_reader)->num_rows_ > internal::MAX_MATLAB_SIZE || - (*feather_reader)->num_variables_ > internal::MAX_MATLAB_SIZE) { - mexErrMsgIdAndTxt("MATLAB:arrow:SizeTooLarge", - "The table size exceeds MATLAB limits: %u x %u", - (*feather_reader)->num_rows_, (*feather_reader)->num_variables_); + arrow::Result> maybe_reader = + ipc::feather::Reader::Open(random_access_file); + RETURN_NOT_OK(maybe_reader); + + // Set the internal reader_ object. + (*feather_reader)->reader_ = maybe_reader.ValueOrDie(); + std::shared_ptr reader = (*feather_reader)->reader_; + + // Check the feather file version + int version = reader->version(); + if (version == ipc::feather::kFeatherV2Version) { + return Status::NotImplemented("Support for Feather V2 has not been implemented."); + } else if (version != ipc::feather::kFeatherV1Version) { + return Status::Invalid("Unknown Feather format version."); } + // read the table metadata from the Feather file + std::shared_ptr schema = reader->schema(); + (*feather_reader)->num_variables_ = schema->num_fields(); return Status::OK(); } @@ -225,15 +232,11 @@ mxArray* FeatherReader::ReadMetadata() const { mxSetField(metadata, 0, "NumVariables", mxCreateDoubleScalar(static_cast(num_variables_))); - // Set the description. - mxSetField(metadata, 0, "Description", - util::ConvertUTF8StringToUTF16CharMatrix(description_)); - return metadata; } // Read the table variables from the Feather file as a mxArray*. -mxArray* FeatherReader::ReadVariables() const { +mxArray* FeatherReader::ReadVariables() { const int32_t num_variable_fields = 4; const char* fieldnames[] = {"Name", "Type", "Data", "Valid"}; @@ -242,16 +245,34 @@ mxArray* FeatherReader::ReadVariables() const { mxArray* variables = mxCreateStructMatrix(1, num_variables_, num_variable_fields, fieldnames); - // Read all the table variables in the Feather file into memory. + // Read the entire table in the Feather file into memory. + std::shared_ptr table = nullptr; + arrow::Status status = reader_->Read(&table); + if (!status.ok()) { + mexErrMsgIdAndTxt("MATLAB:arrow:FeatherReader::FailedToReadTable", + "Failed to read arrow::Table from Feather file."); + } + + // Set the number of rows + num_rows_ = table->num_rows(); + + if (num_rows_ > internal::MAX_MATLAB_SIZE || + num_variables_ > internal::MAX_MATLAB_SIZE) { + mexErrMsgIdAndTxt("MATLAB:arrow:SizeTooLarge", + "The table size exceeds MATLAB limits: %u x %u", num_rows_, + num_variables_); + } + + std::vector column_names = table->ColumnNames(); + for (int64_t i = 0; i < num_variables_; ++i) { - std::shared_ptr column; - util::HandleStatus(table_reader_->GetColumn(i, &column)); + std::shared_ptr column = table->column(i); if (column->num_chunks() != 1) { mexErrMsgIdAndTxt("MATLAB:arrow:FeatherReader::ReadVariables", "Chunked columns not yet supported"); } std::shared_ptr chunk = column->chunk(0); - const std::string column_name = table_reader_->GetColumnName(i); + const std::string column_name = column_names[i]; // set the struct fields data mxSetField(variables, i, "Name", internal::ReadVariableName(column_name)); diff --git a/matlab/src/feather_reader.h b/matlab/src/feather_reader.h index 00fea68f7ae..8824b705e3b 100644 --- a/matlab/src/feather_reader.h +++ b/matlab/src/feather_reader.h @@ -17,15 +17,14 @@ #pragma once -#include -#include - #include #include #include - #include +#include +#include + namespace arrow { namespace matlab { @@ -56,7 +55,7 @@ class FeatherReader { /// Clients are responsible for freeing the returned mxArray memory /// when it is no longer needed, or passing it to MATLAB to be managed. /// \return variables mxArray* struct array containing table variable data - mxArray* ReadVariables() const; + mxArray* ReadVariables(); /// \brief Initialize a FeatherReader object from a given Feather file. /// \param[in] filename path to a Feather file @@ -66,7 +65,7 @@ class FeatherReader { private: FeatherReader() = default; - std::unique_ptr table_reader_; + std::shared_ptr reader_; int64_t num_rows_; int64_t num_variables_; std::string description_; @@ -74,4 +73,3 @@ class FeatherReader { } // namespace matlab } // namespace arrow - diff --git a/matlab/src/featherreadmex.cc b/matlab/src/featherreadmex.cc index b52b8a98f1d..7a5f8d71c9f 100644 --- a/matlab/src/featherreadmex.cc +++ b/matlab/src/featherreadmex.cc @@ -15,10 +15,10 @@ // specific language governing permissions and limitations // under the License. -#include - #include +#include + #include "feather_reader.h" #include "util/handle_status.h" From 40bd57171af8091abd2bd35f6297f76170009a09 Mon Sep 17 00:00:00 2001 From: sgilmore Date: Tue, 11 May 2021 16:15:00 -0400 Subject: [PATCH 04/16] Update featherread M code to no longer handle table descriptions. Co-authored-by: Kevin Gurney --- matlab/src/+mlarrow/+util/createMetadataStruct.m | 5 ++--- matlab/src/+mlarrow/+util/table2mlarrow.m | 3 +-- matlab/src/featherread.m | 4 ---- 3 files changed, 3 insertions(+), 9 deletions(-) diff --git a/matlab/src/+mlarrow/+util/createMetadataStruct.m b/matlab/src/+mlarrow/+util/createMetadataStruct.m index 7a2397059b6..b1b8bc7edd9 100644 --- a/matlab/src/+mlarrow/+util/createMetadataStruct.m +++ b/matlab/src/+mlarrow/+util/createMetadataStruct.m @@ -1,4 +1,4 @@ -function metadata = createMetadataStruct(description, numRows, numVariables) +function metadata = createMetadataStruct(numRows, numVariables) % CREATEMETADATASTRUCT Helper function for creating Feather MEX metadata % struct. @@ -17,8 +17,7 @@ % implied. See the License for the specific language governing % permissions and limitations under the License. -metadata = struct('Description', description, ... - 'NumRows', numRows, ... +metadata = struct('NumRows', numRows, ... 'NumVariables', numVariables); end diff --git a/matlab/src/+mlarrow/+util/table2mlarrow.m b/matlab/src/+mlarrow/+util/table2mlarrow.m index 3103724f945..36e4d1d15a9 100644 --- a/matlab/src/+mlarrow/+util/table2mlarrow.m +++ b/matlab/src/+mlarrow/+util/table2mlarrow.m @@ -23,7 +23,6 @@ % % Field Name Class Description % ------------ ------- ---------------------------------------------- -% Description char Table description (T.Properties.Description) % NumRows double Number of table rows (height(T)) % NumVariables double Number of table variables (width(T)) % @@ -51,7 +50,7 @@ variables = repmat(createVariableStruct('', [], [], ''), 1, width(t)); % Struct representing table-level metadata. -metadata = createMetadataStruct(t.Properties.Description, height(t), width(t)); +metadata = createMetadataStruct(height(t), width(t)); % Iterate over each variable in the given table, % extracting the underlying array data. diff --git a/matlab/src/featherread.m b/matlab/src/featherread.m index 4ac8a565182..31bc426b877 100644 --- a/matlab/src/featherread.m +++ b/matlab/src/featherread.m @@ -83,8 +83,4 @@ t.Properties.VariableDescriptions = cellstr(variableDescriptions); end -% Set the Description property of the table based on the Feather file -% description. -t.Properties.Description = metadata.Description; - end From 7a4b2a7d69657e8962574540a28e30f25a5f73fa Mon Sep 17 00:00:00 2001 From: sgilmore Date: Tue, 11 May 2021 16:17:53 -0400 Subject: [PATCH 05/16] Update tfeathermex tests to react to arrow C++ API changes Co-authored-by: Kevin Gurney --- matlab/test/tfeathermex.m | 2 +- matlab/test/util/createVariablesAndMetadataStructs.m | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/matlab/test/tfeathermex.m b/matlab/test/tfeathermex.m index fa79b4bdef0..77070ad1421 100644 --- a/matlab/test/tfeathermex.m +++ b/matlab/test/tfeathermex.m @@ -60,7 +60,7 @@ function InvalidMATLABTableVariableNames(testCase) invalidVariable = mlarrow.util.createVariableStruct('double', 1, true, '@'); validVariable = mlarrow.util.createVariableStruct('double', 1, true, 'Valid'); variables = [invalidVariable, validVariable]; - metadata = mlarrow.util.createMetadataStruct('', 1, 2); + metadata = mlarrow.util.createMetadataStruct(1, 2); featherwritemex(filename, variables, metadata); t = featherread(filename); diff --git a/matlab/test/util/createVariablesAndMetadataStructs.m b/matlab/test/util/createVariablesAndMetadataStructs.m index 01a8f58261b..0c60cbfbbcc 100644 --- a/matlab/test/util/createVariablesAndMetadataStructs.m +++ b/matlab/test/util/createVariablesAndMetadataStructs.m @@ -90,9 +90,8 @@ singleVariable, ... doubleVariable]; -description = 'test'; numRows = 3; numVariables = length(variables); -metadata = createMetadataStruct(description, numRows, numVariables); +metadata = createMetadataStruct(numRows, numVariables); end From 77a71f458bda1e5f21f1b1d6cc4197ff12547bc5 Mon Sep 17 00:00:00 2001 From: sgilmore Date: Mon, 17 May 2021 12:59:00 -0400 Subject: [PATCH 06/16] Fix CMakeLists.txt formatting --- matlab/CMakeLists.txt | 41 +++++++++++++++++++++++++---------------- 1 file changed, 25 insertions(+), 16 deletions(-) diff --git a/matlab/CMakeLists.txt b/matlab/CMakeLists.txt index 5e7687ceb96..16b61380be9 100644 --- a/matlab/CMakeLists.txt +++ b/matlab/CMakeLists.txt @@ -39,33 +39,42 @@ find_package(Arrow REQUIRED) find_package(Matlab REQUIRED) # Construct the absolute path to featherread's source files -set(featherread_sources featherreadmex.cc feather_reader.cc - util/handle_status.cc util/unicode_conversion.cc) +set(featherread_sources featherreadmex.cc feather_reader.cc util/handle_status.cc + util/unicode_conversion.cc) list(TRANSFORM featherread_sources PREPEND ${CMAKE_SOURCE_DIR}/src/) # Build featherreadmex MEX binary matlab_add_mex(R2018a - NAME featherreadmex - SRC ${featherread_sources} - LINK_TO arrow_shared) + NAME + featherreadmex + SRC + ${featherread_sources} + LINK_TO + arrow_shared) # Construct the absolute path to featherwrite's source files -set(featherwrite_sources featherwritemex.cc feather_writer.cc - util/handle_status.cc util/unicode_conversion.cc) +set(featherwrite_sources featherwritemex.cc feather_writer.cc util/handle_status.cc + util/unicode_conversion.cc) list(TRANSFORM featherwrite_sources PREPEND ${CMAKE_SOURCE_DIR}/src/) - # Build featherwritemex MEX binary matlab_add_mex(R2018a - NAME featherwritemex - SRC ${featherwrite_sources} - LINK_TO arrow_shared) + NAME + featherwritemex + SRC + ${featherwrite_sources} + LINK_TO + arrow_shared) # Ensure the MEX binaries are placed in the src directory on all platforms -if (WIN32) - set_target_properties(featherreadmex PROPERTIES RUNTIME_OUTPUT_DIRECTORY $<1:${CMAKE_SOURCE_DIR}/src>) - set_target_properties(featherwritemex PROPERTIES RUNTIME_OUTPUT_DIRECTORY $<1:${CMAKE_SOURCE_DIR}/src>) +if(WIN32) + set_target_properties(featherreadmex + PROPERTIES RUNTIME_OUTPUT_DIRECTORY $<1:${CMAKE_SOURCE_DIR}/src>) + set_target_properties(featherwritemex + PROPERTIES RUNTIME_OUTPUT_DIRECTORY $<1:${CMAKE_SOURCE_DIR}/src>) else() - set_target_properties(featherreadmex PROPERTIES LIBRARY_OUTPUT_DIRECTORY $<1:${CMAKE_SOURCE_DIR}/src>) - set_target_properties(featherwritemex PROPERTIES LIBRARY_OUTPUT_DIRECTORY $<1:${CMAKE_SOURCE_DIR}/src>) + set_target_properties(featherreadmex + PROPERTIES LIBRARY_OUTPUT_DIRECTORY $<1:${CMAKE_SOURCE_DIR}/src>) + set_target_properties(featherwritemex + PROPERTIES LIBRARY_OUTPUT_DIRECTORY $<1:${CMAKE_SOURCE_DIR}/src>) endif() From d03995549b1f81b91723eff1a6976fae4e4fc5a4 Mon Sep 17 00:00:00 2001 From: sgilmore Date: Wed, 23 Jun 2021 12:43:20 -0400 Subject: [PATCH 07/16] React to code review feedback on PR #10305 --- matlab/src/feather_reader.cc | 22 ++++++++++------------ matlab/src/feather_reader.h | 6 +++--- matlab/src/feather_writer.cc | 21 ++++++++------------- matlab/src/feather_writer.h | 6 +++--- matlab/src/featherreadmex.cc | 4 ++-- matlab/src/featherwritemex.cc | 4 ++-- 6 files changed, 28 insertions(+), 35 deletions(-) diff --git a/matlab/src/feather_reader.cc b/matlab/src/feather_reader.cc index 626519db6c4..0a48c675fd7 100644 --- a/matlab/src/feather_reader.cc +++ b/matlab/src/feather_reader.cc @@ -15,6 +15,9 @@ // specific language governing permissions and limitations // under the License. +#include +#include + #include "feather_reader.h" #include @@ -30,9 +33,6 @@ #include #include -#include -#include - #include "matlab_traits.h" #include "util/handle_status.h" #include "util/unicode_conversion.h" @@ -184,20 +184,18 @@ Status FeatherReader::Open(const std::string& filename, // Open file with given filename as a ReadableFile. arrow::Result> maybe_readable_file = io::ReadableFile::Open(filename); - RETURN_NOT_OK(maybe_readable_file); // TableReader expects a RandomAccessFile. - std::shared_ptr random_access_file{ - maybe_readable_file.ValueOrDie()}; + ARROW_ASSIGN_OR_RAISE(std::shared_ptr random_access_file, + maybe_readable_file); // Open the Feather file for reading with a TableReader. arrow::Result> maybe_reader = ipc::feather::Reader::Open(random_access_file); - RETURN_NOT_OK(maybe_reader); + ARROW_ASSIGN_OR_RAISE(auto reader, maybe_reader); // Set the internal reader_ object. - (*feather_reader)->reader_ = maybe_reader.ValueOrDie(); - std::shared_ptr reader = (*feather_reader)->reader_; + (*feather_reader)->reader_ = reader; // Check the feather file version int version = reader->version(); @@ -245,12 +243,12 @@ mxArray* FeatherReader::ReadVariables() { mxArray* variables = mxCreateStructMatrix(1, num_variables_, num_variable_fields, fieldnames); - // Read the entire table in the Feather file into memory. std::shared_ptr table = nullptr; arrow::Status status = reader_->Read(&table); if (!status.ok()) { - mexErrMsgIdAndTxt("MATLAB:arrow:FeatherReader::FailedToReadTable", - "Failed to read arrow::Table from Feather file."); + std::string err_msg = + "Failed to read arrow::Table from Feather file. Reason: " + status.message(); + mexErrMsgIdAndTxt("MATLAB:arrow:FeatherReader::FailedToReadTable", err_msg.c_str()); } // Set the number of rows diff --git a/matlab/src/feather_reader.h b/matlab/src/feather_reader.h index 8824b705e3b..197e470bf6e 100644 --- a/matlab/src/feather_reader.h +++ b/matlab/src/feather_reader.h @@ -17,14 +17,14 @@ #pragma once +#include +#include + #include #include #include #include -#include -#include - namespace arrow { namespace matlab { diff --git a/matlab/src/feather_writer.cc b/matlab/src/feather_writer.cc index a2f4e13dcf5..87b27ee7b19 100644 --- a/matlab/src/feather_writer.cc +++ b/matlab/src/feather_writer.cc @@ -15,6 +15,10 @@ // specific language governing permissions and limitations // under the License. +#include +#include /* for std::multiplies */ +#include /* for std::accumulate */ + #include "feather_writer.h" #include @@ -29,10 +33,6 @@ #include #include -#include -#include /* for std::multiplies */ -#include /* for std::accumulate */ - #include "matlab_traits.h" #include "util/handle_status.h" @@ -281,9 +281,7 @@ Status FeatherWriter::Open(const std::string& filename, // Open a FileOutputStream corresponding to the provided filename. arrow::Result> maybe_file_output_stream = io::FileOutputStream::Open(filename, &((*feather_writer)->file_output_stream_)); - RETURN_NOT_OK(maybe_file_output_stream); - (*feather_writer)->file_output_stream_ = maybe_file_output_stream.ValueOrDie(); - + ARROW_ASSIGN_OR_RAISE((*feather_writer)->file_output_stream_, maybe_file_output_stream); return Status::OK(); } @@ -338,8 +336,7 @@ Status FeatherWriter::WriteVariables(const mxArray* variables, const mxArray* me arrow::Result> maybe_buffer = arrow::AllocateResizableBuffer(internal::BitPackedLength(num_rows_)); - RETURN_NOT_OK(maybe_buffer); - std::shared_ptr validity_bitmap = maybe_buffer.ValueOrDie(); + ARROW_ASSIGN_OR_RAISE(auto validity_bitmap, maybe_buffer); // Populate bit-packed arrow::Buffer using validity data in the mxArray*. internal::BitPackBuffer(valid, validity_bitmap); @@ -358,11 +355,9 @@ Status FeatherWriter::WriteVariables(const mxArray* variables, const mxArray* me table_columns.push_back(array); } // Create the table schema - arrow::Result> table_schema_result = + arrow::Result> maybe_table_schema = schema_builder.Finish(); - RETURN_NOT_OK(table_schema_result); - - std::shared_ptr table_schema = table_schema_result.ValueOrDie(); + ARROW_ASSIGN_OR_RAISE(auto table_schema, maybe_table_schema); // Specify the feather file format version as V1 arrow::ipc::feather::WriteProperties write_props; diff --git a/matlab/src/feather_writer.h b/matlab/src/feather_writer.h index 45fbe5294e2..a35b1434340 100644 --- a/matlab/src/feather_writer.h +++ b/matlab/src/feather_writer.h @@ -17,14 +17,14 @@ #pragma once +#include +#include + #include #include #include #include -#include -#include - namespace arrow { namespace matlab { diff --git a/matlab/src/featherreadmex.cc b/matlab/src/featherreadmex.cc index 7a5f8d71c9f..b52b8a98f1d 100644 --- a/matlab/src/featherreadmex.cc +++ b/matlab/src/featherreadmex.cc @@ -15,10 +15,10 @@ // specific language governing permissions and limitations // under the License. -#include - #include +#include + #include "feather_reader.h" #include "util/handle_status.h" diff --git a/matlab/src/featherwritemex.cc b/matlab/src/featherwritemex.cc index e7d90126490..d8f90baafc5 100644 --- a/matlab/src/featherwritemex.cc +++ b/matlab/src/featherwritemex.cc @@ -15,10 +15,10 @@ // specific language governing permissions and limitations // under the License. -#include - #include +#include + #include "feather_writer.h" #include "util/handle_status.h" From 36201e7b0d40ebc6e0d8f61f32332c2df32b0f47 Mon Sep 17 00:00:00 2001 From: sgilmore Date: Wed, 23 Jun 2021 21:35:05 -0400 Subject: [PATCH 08/16] Fix CMakeLists.txt formatting --- matlab/CMakeLists.txt | 34 ++++++++++++++-------------------- 1 file changed, 14 insertions(+), 20 deletions(-) diff --git a/matlab/CMakeLists.txt b/matlab/CMakeLists.txt index 16b61380be9..5ee48a87c3a 100644 --- a/matlab/CMakeLists.txt +++ b/matlab/CMakeLists.txt @@ -45,12 +45,9 @@ list(TRANSFORM featherread_sources PREPEND ${CMAKE_SOURCE_DIR}/src/) # Build featherreadmex MEX binary matlab_add_mex(R2018a - NAME - featherreadmex - SRC - ${featherread_sources} - LINK_TO - arrow_shared) + NAME featherreadmex + SRC ${featherread_sources} + LINK_TO arrow_shared) # Construct the absolute path to featherwrite's source files set(featherwrite_sources featherwritemex.cc feather_writer.cc util/handle_status.cc @@ -59,22 +56,19 @@ list(TRANSFORM featherwrite_sources PREPEND ${CMAKE_SOURCE_DIR}/src/) # Build featherwritemex MEX binary matlab_add_mex(R2018a - NAME - featherwritemex - SRC - ${featherwrite_sources} - LINK_TO - arrow_shared) + NAME featherwritemex + SRC ${featherwrite_sources} + LINK_TO arrow_shared) # Ensure the MEX binaries are placed in the src directory on all platforms if(WIN32) - set_target_properties(featherreadmex - PROPERTIES RUNTIME_OUTPUT_DIRECTORY $<1:${CMAKE_SOURCE_DIR}/src>) - set_target_properties(featherwritemex - PROPERTIES RUNTIME_OUTPUT_DIRECTORY $<1:${CMAKE_SOURCE_DIR}/src>) + set_target_properties(featherreadmex PROPERTIES RUNTIME_OUTPUT_DIRECTORY + $<1:${CMAKE_SOURCE_DIR}/src>) + set_target_properties(featherwritemex PROPERTIES RUNTIME_OUTPUT_DIRECTORY + $<1:${CMAKE_SOURCE_DIR}/src>) else() - set_target_properties(featherreadmex - PROPERTIES LIBRARY_OUTPUT_DIRECTORY $<1:${CMAKE_SOURCE_DIR}/src>) - set_target_properties(featherwritemex - PROPERTIES LIBRARY_OUTPUT_DIRECTORY $<1:${CMAKE_SOURCE_DIR}/src>) + set_target_properties(featherreadmex PROPERTIES LIBRARY_OUTPUT_DIRECTORY + $<1:${CMAKE_SOURCE_DIR}/src>) + set_target_properties(featherwritemex PROPERTIES LIBRARY_OUTPUT_DIRECTORY + $<1:${CMAKE_SOURCE_DIR}/src>) endif() From c86c5da8f17f473803d2045bf5bc10f8dc6602db Mon Sep 17 00:00:00 2001 From: sgilmore10 <74676073+sgilmore10@users.noreply.github.com> Date: Fri, 25 Jun 2021 09:49:32 -0400 Subject: [PATCH 09/16] Update matlab/src/feather_reader.cc Co-authored-by: Sutou Kouhei --- matlab/src/feather_reader.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/matlab/src/feather_reader.cc b/matlab/src/feather_reader.cc index 0a48c675fd7..f3a03eabba6 100644 --- a/matlab/src/feather_reader.cc +++ b/matlab/src/feather_reader.cc @@ -57,7 +57,7 @@ mxArray* ReadNumericVariableData(const std::shared_ptr& column) { mxArray* variable_data = mxCreateNumericMatrix(column->length(), 1, matlab_class_id, mxREAL); - std::shared_ptr arrow_numeric_array = + auto arrow_numeric_array = std::static_pointer_cast(column); // Get a raw pointer to the Arrow array data. From bbd0ebe202c77a9f7249c7b9378537aac2321c1e Mon Sep 17 00:00:00 2001 From: sgilmore10 <74676073+sgilmore10@users.noreply.github.com> Date: Fri, 25 Jun 2021 09:50:21 -0400 Subject: [PATCH 10/16] Update matlab/src/feather_reader.cc Co-authored-by: Sutou Kouhei --- matlab/src/feather_reader.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/matlab/src/feather_reader.cc b/matlab/src/feather_reader.cc index f3a03eabba6..9501ba12259 100644 --- a/matlab/src/feather_reader.cc +++ b/matlab/src/feather_reader.cc @@ -243,7 +243,7 @@ mxArray* FeatherReader::ReadVariables() { mxArray* variables = mxCreateStructMatrix(1, num_variables_, num_variable_fields, fieldnames); - std::shared_ptr table = nullptr; + std::shared_ptr table; arrow::Status status = reader_->Read(&table); if (!status.ok()) { std::string err_msg = From d1f730beb989b1953d93bbacbc4e4c7fd0694f06 Mon Sep 17 00:00:00 2001 From: sgilmore10 <74676073+sgilmore10@users.noreply.github.com> Date: Fri, 25 Jun 2021 09:51:05 -0400 Subject: [PATCH 11/16] Update matlab/src/feather_reader.cc Co-authored-by: Sutou Kouhei --- matlab/src/feather_reader.cc | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/matlab/src/feather_reader.cc b/matlab/src/feather_reader.cc index 9501ba12259..3f65ed9f50a 100644 --- a/matlab/src/feather_reader.cc +++ b/matlab/src/feather_reader.cc @@ -182,8 +182,7 @@ Status FeatherReader::Open(const std::string& filename, *feather_reader = std::shared_ptr(new FeatherReader()); // Open file with given filename as a ReadableFile. - arrow::Result> maybe_readable_file = - io::ReadableFile::Open(filename); + ARROW_ASSIGN_OR_RAISE(auto readable_file, io::ReadableFile::Open(filename)); // TableReader expects a RandomAccessFile. ARROW_ASSIGN_OR_RAISE(std::shared_ptr random_access_file, From 24af5115512ff5e31fc0d5451c9ae284539db89b Mon Sep 17 00:00:00 2001 From: sgilmore10 <74676073+sgilmore10@users.noreply.github.com> Date: Fri, 25 Jun 2021 09:51:28 -0400 Subject: [PATCH 12/16] Update matlab/src/feather_writer.cc Co-authored-by: Sutou Kouhei --- matlab/src/feather_writer.cc | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/matlab/src/feather_writer.cc b/matlab/src/feather_writer.cc index 87b27ee7b19..3ee28d1cc6f 100644 --- a/matlab/src/feather_writer.cc +++ b/matlab/src/feather_writer.cc @@ -355,9 +355,7 @@ Status FeatherWriter::WriteVariables(const mxArray* variables, const mxArray* me table_columns.push_back(array); } // Create the table schema - arrow::Result> maybe_table_schema = - schema_builder.Finish(); - ARROW_ASSIGN_OR_RAISE(auto table_schema, maybe_table_schema); + ARROW_ASSIGN_OR_RAISE(auto table_schema, schema_builder.Finish()); // Specify the feather file format version as V1 arrow::ipc::feather::WriteProperties write_props; From 45b184297891d444564bafee49f7a6792f44007b Mon Sep 17 00:00:00 2001 From: sgilmore10 <74676073+sgilmore10@users.noreply.github.com> Date: Fri, 25 Jun 2021 09:52:00 -0400 Subject: [PATCH 13/16] Update matlab/src/feather_writer.cc Co-authored-by: Sutou Kouhei --- matlab/src/feather_writer.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/matlab/src/feather_writer.cc b/matlab/src/feather_writer.cc index 3ee28d1cc6f..889ad8dde31 100644 --- a/matlab/src/feather_writer.cc +++ b/matlab/src/feather_writer.cc @@ -342,7 +342,7 @@ Status FeatherWriter::WriteVariables(const mxArray* variables, const mxArray* me internal::BitPackBuffer(valid, validity_bitmap); // Wrap mxArray data in an arrow::Array of the equivalent type. - std::shared_ptr array = + auto array = internal::WriteVariableData(data, type_str, validity_bitmap); // Verify that the arrow::Array has the right number of elements. From 338a09ad95e369d6516ac8df7beb19937163cb17 Mon Sep 17 00:00:00 2001 From: sgilmore10 <74676073+sgilmore10@users.noreply.github.com> Date: Fri, 25 Jun 2021 10:18:52 -0400 Subject: [PATCH 14/16] Apply suggestions from code review Co-authored-by: Sutou Kouhei --- matlab/src/feather_reader.cc | 17 ++++++++--------- matlab/src/feather_writer.cc | 12 ++++-------- 2 files changed, 12 insertions(+), 17 deletions(-) diff --git a/matlab/src/feather_reader.cc b/matlab/src/feather_reader.cc index 3f65ed9f50a..cdc0278a3ce 100644 --- a/matlab/src/feather_reader.cc +++ b/matlab/src/feather_reader.cc @@ -197,7 +197,7 @@ Status FeatherReader::Open(const std::string& filename, (*feather_reader)->reader_ = reader; // Check the feather file version - int version = reader->version(); + auto version = reader->version(); if (version == ipc::feather::kFeatherV2Version) { return Status::NotImplemented("Support for Feather V2 has not been implemented."); } else if (version != ipc::feather::kFeatherV1Version) { @@ -205,8 +205,7 @@ Status FeatherReader::Open(const std::string& filename, } // read the table metadata from the Feather file - std::shared_ptr schema = reader->schema(); - (*feather_reader)->num_variables_ = schema->num_fields(); + (*feather_reader)->num_variables_ = reader->schema()->num_fields(); return Status::OK(); } @@ -243,11 +242,11 @@ mxArray* FeatherReader::ReadVariables() { mxCreateStructMatrix(1, num_variables_, num_variable_fields, fieldnames); std::shared_ptr table; - arrow::Status status = reader_->Read(&table); + auto status = reader_->Read(&table); if (!status.ok()) { - std::string err_msg = - "Failed to read arrow::Table from Feather file. Reason: " + status.message(); - mexErrMsgIdAndTxt("MATLAB:arrow:FeatherReader::FailedToReadTable", err_msg.c_str()); + mexErrMsgIdAndTxt("MATLAB:arrow:FeatherReader::FailedToReadTable", + "Failed to read arrow::Table from Feather file. Reason: %s", + status.message()); } // Set the number of rows @@ -260,10 +259,10 @@ mxArray* FeatherReader::ReadVariables() { num_variables_); } - std::vector column_names = table->ColumnNames(); + auto column_names = table->ColumnNames(); for (int64_t i = 0; i < num_variables_; ++i) { - std::shared_ptr column = table->column(i); + auto column = table->column(i); if (column->num_chunks() != 1) { mexErrMsgIdAndTxt("MATLAB:arrow:FeatherReader::ReadVariables", "Chunked columns not yet supported"); diff --git a/matlab/src/feather_writer.cc b/matlab/src/feather_writer.cc index 889ad8dde31..07274b32fb5 100644 --- a/matlab/src/feather_writer.cc +++ b/matlab/src/feather_writer.cc @@ -279,9 +279,8 @@ Status FeatherWriter::Open(const std::string& filename, *feather_writer = std::shared_ptr(new FeatherWriter()); // Open a FileOutputStream corresponding to the provided filename. - arrow::Result> maybe_file_output_stream = + ARROW_ASSIGN_OR_RAISE((*feather_writer)->file_output_stream_, io::FileOutputStream::Open(filename, &((*feather_writer)->file_output_stream_)); - ARROW_ASSIGN_OR_RAISE((*feather_writer)->file_output_stream_, maybe_file_output_stream); return Status::OK(); } @@ -329,14 +328,11 @@ Status FeatherWriter::WriteVariables(const mxArray* variables, const mxArray* me std::string name_str = internal::MxArrayToString(name); std::string type_str = internal::MxArrayToString(type); - std::shared_ptr datatype = - internal::ConvertMatlabTypeStringToArrowDataType(type_str); - std::shared_ptr field = - std::make_shared(name_str, datatype); + auto datatype = internal::ConvertMatlabTypeStringToArrowDataType(type_str); + auto field = std::make_shared(name_str, datatype); - arrow::Result> maybe_buffer = + ARROW_ASSIGN_OR_RAISE(auto validity_bitmap, arrow::AllocateResizableBuffer(internal::BitPackedLength(num_rows_)); - ARROW_ASSIGN_OR_RAISE(auto validity_bitmap, maybe_buffer); // Populate bit-packed arrow::Buffer using validity data in the mxArray*. internal::BitPackBuffer(valid, validity_bitmap); From 141a37dab870946504dae263f1c1e067b68bdd1d Mon Sep 17 00:00:00 2001 From: sgilmore Date: Fri, 25 Jun 2021 10:32:57 -0400 Subject: [PATCH 15/16] Fix build failures --- matlab/src/feather_reader.cc | 16 +++++----------- matlab/src/feather_writer.cc | 8 ++++---- 2 files changed, 9 insertions(+), 15 deletions(-) diff --git a/matlab/src/feather_reader.cc b/matlab/src/feather_reader.cc index cdc0278a3ce..8a325c4b10d 100644 --- a/matlab/src/feather_reader.cc +++ b/matlab/src/feather_reader.cc @@ -182,17 +182,11 @@ Status FeatherReader::Open(const std::string& filename, *feather_reader = std::shared_ptr(new FeatherReader()); // Open file with given filename as a ReadableFile. - ARROW_ASSIGN_OR_RAISE(auto readable_file, io::ReadableFile::Open(filename)); - - // TableReader expects a RandomAccessFile. - ARROW_ASSIGN_OR_RAISE(std::shared_ptr random_access_file, - maybe_readable_file); - + ARROW_ASSIGN_OR_RAISE(auto readable_file, io::ReadableFile::Open(filename)); + // Open the Feather file for reading with a TableReader. - arrow::Result> maybe_reader = - ipc::feather::Reader::Open(random_access_file); - ARROW_ASSIGN_OR_RAISE(auto reader, maybe_reader); - + ARROW_ASSIGN_OR_RAISE(auto reader, ipc::feather::Reader::Open(readable_file)); + // Set the internal reader_ object. (*feather_reader)->reader_ = reader; @@ -246,7 +240,7 @@ mxArray* FeatherReader::ReadVariables() { if (!status.ok()) { mexErrMsgIdAndTxt("MATLAB:arrow:FeatherReader::FailedToReadTable", "Failed to read arrow::Table from Feather file. Reason: %s", - status.message()); + status.message().c_str()); } // Set the number of rows diff --git a/matlab/src/feather_writer.cc b/matlab/src/feather_writer.cc index 07274b32fb5..1a76ada1995 100644 --- a/matlab/src/feather_writer.cc +++ b/matlab/src/feather_writer.cc @@ -280,7 +280,7 @@ Status FeatherWriter::Open(const std::string& filename, // Open a FileOutputStream corresponding to the provided filename. ARROW_ASSIGN_OR_RAISE((*feather_writer)->file_output_stream_, - io::FileOutputStream::Open(filename, &((*feather_writer)->file_output_stream_)); + io::FileOutputStream::Open(filename, &((*feather_writer)->file_output_stream_))); return Status::OK(); } @@ -331,8 +331,8 @@ Status FeatherWriter::WriteVariables(const mxArray* variables, const mxArray* me auto datatype = internal::ConvertMatlabTypeStringToArrowDataType(type_str); auto field = std::make_shared(name_str, datatype); - ARROW_ASSIGN_OR_RAISE(auto validity_bitmap, - arrow::AllocateResizableBuffer(internal::BitPackedLength(num_rows_)); + ARROW_ASSIGN_OR_RAISE(std::shared_ptr validity_bitmap, + arrow::AllocateResizableBuffer(internal::BitPackedLength(num_rows_))); // Populate bit-packed arrow::Buffer using validity data in the mxArray*. internal::BitPackBuffer(valid, validity_bitmap); @@ -348,7 +348,7 @@ Status FeatherWriter::WriteVariables(const mxArray* variables, const mxArray* me RETURN_NOT_OK(schema_builder.AddField(field)); // Store the table column - table_columns.push_back(array); + table_columns.push_back(std::move(array)); } // Create the table schema ARROW_ASSIGN_OR_RAISE(auto table_schema, schema_builder.Finish()); From e3e40f65d27558b34aae60c69d5f77f8f5969085 Mon Sep 17 00:00:00 2001 From: sgilmore Date: Fri, 25 Jun 2021 10:45:17 -0400 Subject: [PATCH 16/16] Access the raw data viaraw_values() instead of values() --- matlab/src/feather_reader.cc | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/matlab/src/feather_reader.cc b/matlab/src/feather_reader.cc index 8a325c4b10d..1cbb50541e7 100644 --- a/matlab/src/feather_reader.cc +++ b/matlab/src/feather_reader.cc @@ -61,8 +61,7 @@ mxArray* ReadNumericVariableData(const std::shared_ptr& column) { std::static_pointer_cast(column); // Get a raw pointer to the Arrow array data. - const MatlabType* source = - reinterpret_cast(arrow_numeric_array->values()->data()); + const MatlabType* source = arrow_numeric_array->raw_values(); // Get a mutable pointer to the MATLAB array data and std::copy the // Arrow array data into it.