From 46274a9518861670fe0ec0ecc4c0538545589e91 Mon Sep 17 00:00:00 2001 From: Kevin Gurney Date: Fri, 4 Aug 2023 15:56:49 -0400 Subject: [PATCH 01/11] Add basic internal MATLAB Feather V1 Reader class. --- .../+arrow/+internal/+io/+feather/Reader.m | 64 +++++++++++++++++++ 1 file changed, 64 insertions(+) create mode 100644 matlab/src/matlab/+arrow/+internal/+io/+feather/Reader.m diff --git a/matlab/src/matlab/+arrow/+internal/+io/+feather/Reader.m b/matlab/src/matlab/+arrow/+internal/+io/+feather/Reader.m new file mode 100644 index 00000000000..b5abc385e5e --- /dev/null +++ b/matlab/src/matlab/+arrow/+internal/+io/+feather/Reader.m @@ -0,0 +1,64 @@ +% Licensed to the Apache Software Foundation (ASF) under one or more +% contributor license agreements. See the NOTICE file distributed with +% this work for additional information regarding copyright ownership. +% The ASF licenses this file to you under the Apache License, Version +% 2.0 (the "License"); you may not use this file except in compliance +% with the License. You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +% implied. See the License for the specific language governing +% permissions and limitations under the License. + +classdef Reader +%READER An internal Reader object for reading Feather files. + + properties (GetAccess=public, SetAccess=private, Hidden) + Proxy + end + + properties (Dependent, SetAccess=private, GetAccess=public) + % Name of the file to read. + Filename + end + + methods + + function obj = Reader(filename) + filename = convertCharsToStrings(filename); + if ~(isstring(filename) && isscalar(filename)) + error("arrow:io:feather:FilenameUnsupportedType", "Filename must be a scalar string or char row vector."); + end + obj.Filename = filename; + end + + function T = read(obj) + args = struct(Filename=obj.Filename); + recordBatchProxyID = obj.Proxy.read(args); + proxy = libmexclass.proxy.Proxy(Name="arrow.tabular.proxy.RecordBatch", ID=recordBatchProxyID); + recordBatch = arrow.tabular.RecordBatch(proxy); + T = recordBatch.toMATLAB(); + end + + end + + methods (Access = private) + + function str = toString(obj) + str = obj.Proxy.toString(); + end + + end + + methods (Access=protected) + + function displayScalarObject(obj) + disp(obj.toString()); + end + + end + +end \ No newline at end of file From f343ab549cb8ac1dbb5cbfd0843d71bd64dcf8cd Mon Sep 17 00:00:00 2001 From: Kevin Gurney Date: Fri, 4 Aug 2023 16:00:09 -0400 Subject: [PATCH 02/11] Register Feather Reader Proxy with Proxy Factory. --- matlab/src/cpp/arrow/matlab/proxy/factory.cc | 2 ++ matlab/tools/cmake/BuildMatlabArrowInterface.cmake | 4 ++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/matlab/src/cpp/arrow/matlab/proxy/factory.cc b/matlab/src/cpp/arrow/matlab/proxy/factory.cc index 7a2a4f3192f..29d82bce00b 100644 --- a/matlab/src/cpp/arrow/matlab/proxy/factory.cc +++ b/matlab/src/cpp/arrow/matlab/proxy/factory.cc @@ -26,6 +26,7 @@ #include "arrow/matlab/type/proxy/timestamp_type.h" #include "arrow/matlab/type/proxy/field.h" #include "arrow/matlab/io/feather/proxy/feather_writer.h" +#include "arrow/matlab/io/feather/proxy/reader.h" #include "factory.h" @@ -62,6 +63,7 @@ libmexclass::proxy::MakeResult Factory::make_proxy(const ClassName& class_name, REGISTER_PROXY(arrow.type.proxy.StringType , arrow::matlab::type::proxy::StringType); REGISTER_PROXY(arrow.type.proxy.TimestampType , arrow::matlab::type::proxy::TimestampType); REGISTER_PROXY(arrow.io.feather.proxy.FeatherWriter , arrow::matlab::io::feather::proxy::FeatherWriter); + REGISTER_PROXY(arrow.io.feather.proxy.Reader , arrow::io::feather::proxy::Reader); return libmexclass::error::Error{error::UNKNOWN_PROXY_ERROR_ID, "Did not find matching C++ proxy for " + class_name}; }; diff --git a/matlab/tools/cmake/BuildMatlabArrowInterface.cmake b/matlab/tools/cmake/BuildMatlabArrowInterface.cmake index 1d579994176..42b63afb386 100644 --- a/matlab/tools/cmake/BuildMatlabArrowInterface.cmake +++ b/matlab/tools/cmake/BuildMatlabArrowInterface.cmake @@ -56,8 +56,8 @@ set(MATLAB_ARROW_LIBMEXCLASS_CLIENT_PROXY_SOURCES "${CMAKE_SOURCE_DIR}/src/cpp/a "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/type/proxy/timestamp_type.cc" "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/type/proxy/field.cc" "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/type/proxy/wrap.cc" - "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/io/feather/proxy/feather_writer.cc") - + "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/io/feather/proxy/feather_writer.cc" + "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/io/feather/proxy/reader.cc") set(MATLAB_ARROW_LIBMEXCLASS_CLIENT_PROXY_FACTORY_INCLUDE_DIR "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/proxy") From 9d7d1d86732e9b18610efd2600211fa89ea2bf30 Mon Sep 17 00:00:00 2001 From: Kevin Gurney Date: Fri, 4 Aug 2023 16:11:08 -0400 Subject: [PATCH 03/11] Add basic implementation of Feather Reader Proxy in C++. --- .../arrow/matlab/io/feather/proxy/reader.cc | 46 +++++++++++++++++++ .../arrow/matlab/io/feather/proxy/reader.h | 38 +++++++++++++++ 2 files changed, 84 insertions(+) create mode 100644 matlab/src/cpp/arrow/matlab/io/feather/proxy/reader.cc create mode 100644 matlab/src/cpp/arrow/matlab/io/feather/proxy/reader.h diff --git a/matlab/src/cpp/arrow/matlab/io/feather/proxy/reader.cc b/matlab/src/cpp/arrow/matlab/io/feather/proxy/reader.cc new file mode 100644 index 00000000000..675f4e77e8a --- /dev/null +++ b/matlab/src/cpp/arrow/matlab/io/feather/proxy/reader.cc @@ -0,0 +1,46 @@ +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "arrow/matlab/error/error.h" +#include "arrow/matlab/io/feather/proxy/reader.h" + +#include "arrow/util/utf8.h" + +#include + +namespace arrow::matlab::io::feather::proxy { + + Reader::Reader(const std::string& filename) : filename{filename} { + REGISTER_METHOD(Reader, read); + } + + libmexclass::proxy::MakeResult Reader::make(const libmexclass::proxy::FunctionArguments& constructor_arguments) { + namespace mda = ::matlab::data; + using ReaderProxy = arrow::matlab::io::feather::proxy::Schema; + + mda::StructArray args = constructor_arguments[0]; + const mda::StringArray filename_utf16_mda = args[0]["Filename"]; + const auto filename_utf16 = std::u16string(filename_utf16_mda[0]); + MATLAB_ASSIGN_OR_ERROR_WITH_CONTEXT(const auto filename, arrow::util::UTF16StringToUTF8(filename_utf16), context, error::UNICODE_CONVERSION_ERROR_ID); + + return std::make_shared(filename); + } + + void Reader::read(libmexclass::proxy::method::Context& context) { + std::cout << "Test" << std::endl; + } + +} diff --git a/matlab/src/cpp/arrow/matlab/io/feather/proxy/reader.h b/matlab/src/cpp/arrow/matlab/io/feather/proxy/reader.h new file mode 100644 index 00000000000..1fc8dbf4041 --- /dev/null +++ b/matlab/src/cpp/arrow/matlab/io/feather/proxy/reader.h @@ -0,0 +1,38 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include "libmexclass/proxy/Proxy.h" + +namespace arrow::matlab::tabular::proxy { + + class Reader : public libmexclass::proxy::Proxy { + public: + Reader(const std::string& filename); + + virtual ~Reader() {} + + static libmexclass::proxy::MakeResult make(const libmexclass::proxy::FunctionArguments& constructor_arguments); + + protected: + void read(libmexclass::proxy::method::Context& context); + + const std::string filename; + }; + +} From 7d3f2a1a21f5ca994533b93ae1cb8ea8f943837a Mon Sep 17 00:00:00 2001 From: Kevin Gurney Date: Fri, 4 Aug 2023 16:35:19 -0400 Subject: [PATCH 04/11] Implement support for Filename property. --- .../arrow/matlab/io/feather/proxy/reader.cc | 16 ++++++++++-- .../arrow/matlab/io/feather/proxy/reader.h | 3 ++- matlab/src/cpp/arrow/matlab/proxy/factory.cc | 2 +- .../+arrow/+internal/+io/+feather/Reader.m | 26 +++++-------------- 4 files changed, 24 insertions(+), 23 deletions(-) diff --git a/matlab/src/cpp/arrow/matlab/io/feather/proxy/reader.cc b/matlab/src/cpp/arrow/matlab/io/feather/proxy/reader.cc index 675f4e77e8a..0bbfecf82b2 100644 --- a/matlab/src/cpp/arrow/matlab/io/feather/proxy/reader.cc +++ b/matlab/src/cpp/arrow/matlab/io/feather/proxy/reader.cc @@ -19,22 +19,25 @@ #include "arrow/util/utf8.h" +#include "arrow/result.h" + #include namespace arrow::matlab::io::feather::proxy { Reader::Reader(const std::string& filename) : filename{filename} { REGISTER_METHOD(Reader, read); + REGISTER_METHOD(Reader, getFilename); } libmexclass::proxy::MakeResult Reader::make(const libmexclass::proxy::FunctionArguments& constructor_arguments) { namespace mda = ::matlab::data; - using ReaderProxy = arrow::matlab::io::feather::proxy::Schema; + using ReaderProxy = arrow::matlab::io::feather::proxy::Reader; mda::StructArray args = constructor_arguments[0]; const mda::StringArray filename_utf16_mda = args[0]["Filename"]; const auto filename_utf16 = std::u16string(filename_utf16_mda[0]); - MATLAB_ASSIGN_OR_ERROR_WITH_CONTEXT(const auto filename, arrow::util::UTF16StringToUTF8(filename_utf16), context, error::UNICODE_CONVERSION_ERROR_ID); + MATLAB_ASSIGN_OR_ERROR(const auto filename, arrow::util::UTF16StringToUTF8(filename_utf16), error::UNICODE_CONVERSION_ERROR_ID); return std::make_shared(filename); } @@ -43,4 +46,13 @@ namespace arrow::matlab::io::feather::proxy { std::cout << "Test" << std::endl; } + void Reader::getFilename(libmexclass::proxy::method::Context& context) { + namespace mda = ::matlab::data; + mda::ArrayFactory factory; + + MATLAB_ASSIGN_OR_ERROR_WITH_CONTEXT(const auto filename_utf16, arrow::util::UTF8StringToUTF16(filename), context, error::UNICODE_CONVERSION_ERROR_ID); + auto filename_utf16_mda = factory.createScalar(filename_utf16); + context.outputs[0] = filename_utf16_mda; + } + } diff --git a/matlab/src/cpp/arrow/matlab/io/feather/proxy/reader.h b/matlab/src/cpp/arrow/matlab/io/feather/proxy/reader.h index 1fc8dbf4041..fb6c06de863 100644 --- a/matlab/src/cpp/arrow/matlab/io/feather/proxy/reader.h +++ b/matlab/src/cpp/arrow/matlab/io/feather/proxy/reader.h @@ -19,7 +19,7 @@ #include "libmexclass/proxy/Proxy.h" -namespace arrow::matlab::tabular::proxy { +namespace arrow::matlab::io::feather::proxy { class Reader : public libmexclass::proxy::Proxy { public: @@ -31,6 +31,7 @@ namespace arrow::matlab::tabular::proxy { protected: void read(libmexclass::proxy::method::Context& context); + void getFilename(libmexclass::proxy::method::Context& context); const std::string filename; }; diff --git a/matlab/src/cpp/arrow/matlab/proxy/factory.cc b/matlab/src/cpp/arrow/matlab/proxy/factory.cc index 29d82bce00b..92cce491a83 100644 --- a/matlab/src/cpp/arrow/matlab/proxy/factory.cc +++ b/matlab/src/cpp/arrow/matlab/proxy/factory.cc @@ -63,7 +63,7 @@ libmexclass::proxy::MakeResult Factory::make_proxy(const ClassName& class_name, REGISTER_PROXY(arrow.type.proxy.StringType , arrow::matlab::type::proxy::StringType); REGISTER_PROXY(arrow.type.proxy.TimestampType , arrow::matlab::type::proxy::TimestampType); REGISTER_PROXY(arrow.io.feather.proxy.FeatherWriter , arrow::matlab::io::feather::proxy::FeatherWriter); - REGISTER_PROXY(arrow.io.feather.proxy.Reader , arrow::io::feather::proxy::Reader); + REGISTER_PROXY(arrow.io.feather.proxy.Reader , arrow::matlab::io::feather::proxy::Reader); return libmexclass::error::Error{error::UNKNOWN_PROXY_ERROR_ID, "Did not find matching C++ proxy for " + class_name}; }; diff --git a/matlab/src/matlab/+arrow/+internal/+io/+feather/Reader.m b/matlab/src/matlab/+arrow/+internal/+io/+feather/Reader.m index b5abc385e5e..80ba1ca5552 100644 --- a/matlab/src/matlab/+arrow/+internal/+io/+feather/Reader.m +++ b/matlab/src/matlab/+arrow/+internal/+io/+feather/Reader.m @@ -32,33 +32,21 @@ if ~(isstring(filename) && isscalar(filename)) error("arrow:io:feather:FilenameUnsupportedType", "Filename must be a scalar string or char row vector."); end - obj.Filename = filename; + args = struct(Filename=filename); + obj.Proxy = arrow.internal.proxy.create("arrow.io.feather.proxy.Reader", args); end function T = read(obj) - args = struct(Filename=obj.Filename); - recordBatchProxyID = obj.Proxy.read(args); + recordBatchProxyID = obj.Proxy.read(); proxy = libmexclass.proxy.Proxy(Name="arrow.tabular.proxy.RecordBatch", ID=recordBatchProxyID); recordBatch = arrow.tabular.RecordBatch(proxy); T = recordBatch.toMATLAB(); end - - end - - methods (Access = private) - - function str = toString(obj) - str = obj.Proxy.toString(); - end - end - - methods (Access=protected) - - function displayScalarObject(obj) - disp(obj.toString()); + function filename = get.Filename(obj) + filename = obj.Proxy.getFilename(); end - + end -end \ No newline at end of file +end From b65fac2256f11d5eec4c63425f89d8622208b60e Mon Sep 17 00:00:00 2001 From: Kevin Gurney Date: Fri, 4 Aug 2023 17:06:45 -0400 Subject: [PATCH 05/11] Implement reading logic in C++. --- .../arrow/matlab/io/feather/proxy/reader.cc | 32 +++++++++++++++++-- 1 file changed, 29 insertions(+), 3 deletions(-) diff --git a/matlab/src/cpp/arrow/matlab/io/feather/proxy/reader.cc b/matlab/src/cpp/arrow/matlab/io/feather/proxy/reader.cc index 0bbfecf82b2..080b2da9418 100644 --- a/matlab/src/cpp/arrow/matlab/io/feather/proxy/reader.cc +++ b/matlab/src/cpp/arrow/matlab/io/feather/proxy/reader.cc @@ -14,14 +14,19 @@ // specific language governing permissions and limitations // under the License. +#include "libmexclass/proxy/ProxyManager.h" + #include "arrow/matlab/error/error.h" #include "arrow/matlab/io/feather/proxy/reader.h" +#include "arrow/matlab/tabular/proxy/record_batch.h" #include "arrow/util/utf8.h" #include "arrow/result.h" -#include +#include "arrow/io/file.h" +#include "arrow/ipc/feather.h" +#include "arrow/table.h" namespace arrow::matlab::io::feather::proxy { @@ -43,14 +48,35 @@ namespace arrow::matlab::io::feather::proxy { } void Reader::read(libmexclass::proxy::method::Context& context) { - std::cout << "Test" << std::endl; + namespace mda = ::matlab::data; + using namespace libmexclass::proxy; + using RecordBatchProxy = arrow::matlab::tabular::proxy::RecordBatch; + + // Create a file input stream. + MATLAB_ASSIGN_OR_ERROR_WITH_CONTEXT(auto source, arrow::io::ReadableFile::Open(filename, arrow::default_memory_pool()), context, "test"); + // Create a Reader. + // TODO: Create an error ID. + // TODO: Error if Feather V2. + MATLAB_ASSIGN_OR_ERROR_WITH_CONTEXT(auto reader, arrow::ipc::feather::Reader::Open(source), context, "test"); + std::shared_ptr table = nullptr; + MATLAB_ERROR_IF_NOT_OK_WITH_CONTEXT(reader->Read(&table), context, "test"); + arrow::TableBatchReader table_batch_reader{table}; + std::shared_ptr record_batch = nullptr; + MATLAB_ERROR_IF_NOT_OK_WITH_CONTEXT(table_batch_reader.ReadNext(&record_batch), context, "test"); + auto record_batch_proxy = std::make_shared(record_batch); + const auto record_batch_proxy_id = ProxyManager::manageProxy(record_batch_proxy); + + mda::ArrayFactory factory; + const auto record_batch_proxy_id_mda = factory.createScalar(record_batch_proxy_id); + + context.outputs[0] = record_batch_proxy_id_mda; } void Reader::getFilename(libmexclass::proxy::method::Context& context) { namespace mda = ::matlab::data; mda::ArrayFactory factory; - MATLAB_ASSIGN_OR_ERROR_WITH_CONTEXT(const auto filename_utf16, arrow::util::UTF8StringToUTF16(filename), context, error::UNICODE_CONVERSION_ERROR_ID); + MATLAB_ASSIGN_OR_ERROR_WITH_CONTEXT(const auto filename_utf16, arrow::util::UTF8StringToUTF16(filename), context, error::UNICODE_CONVERSION_ERROR_ID); auto filename_utf16_mda = factory.createScalar(filename_utf16); context.outputs[0] = filename_utf16_mda; } From c922972c54898c640b7b2e471271f484ae121933 Mon Sep 17 00:00:00 2001 From: Kevin Gurney Date: Mon, 7 Aug 2023 12:09:14 -0400 Subject: [PATCH 06/11] 1. Add error message IDs. 2. Error if not Feather V1 file. --- .../arrow/matlab/io/feather/proxy/reader.cc | 30 ++++++++++++++----- 1 file changed, 22 insertions(+), 8 deletions(-) diff --git a/matlab/src/cpp/arrow/matlab/io/feather/proxy/reader.cc b/matlab/src/cpp/arrow/matlab/io/feather/proxy/reader.cc index 080b2da9418..36c4e801d86 100644 --- a/matlab/src/cpp/arrow/matlab/io/feather/proxy/reader.cc +++ b/matlab/src/cpp/arrow/matlab/io/feather/proxy/reader.cc @@ -52,21 +52,35 @@ namespace arrow::matlab::io::feather::proxy { using namespace libmexclass::proxy; using RecordBatchProxy = arrow::matlab::tabular::proxy::RecordBatch; + mda::ArrayFactory factory; + // Create a file input stream. - MATLAB_ASSIGN_OR_ERROR_WITH_CONTEXT(auto source, arrow::io::ReadableFile::Open(filename, arrow::default_memory_pool()), context, "test"); - // Create a Reader. - // TODO: Create an error ID. - // TODO: Error if Feather V2. - MATLAB_ASSIGN_OR_ERROR_WITH_CONTEXT(auto reader, arrow::ipc::feather::Reader::Open(source), context, "test"); + MATLAB_ASSIGN_OR_ERROR_WITH_CONTEXT(auto source, arrow::io::ReadableFile::Open(filename, arrow::default_memory_pool()), context, "arrow:io:feather:reader:FailedToOpenFile"); + + // Create a Reader from the file input stream. + MATLAB_ASSIGN_OR_ERROR_WITH_CONTEXT(auto reader, arrow::ipc::feather::Reader::Open(source), context, "arrow:io:feather:reader:FailedToCreateReader"); + + // Error if not Feather V1. + const auto version = reader->version(); + if (version == ipc::feather::kFeatherV2Version) { + MATLAB_ERROR_IF_NOT_OK_WITH_CONTEXT(Status::NotImplemented("Support for Feather V2 has not been implemented."), context, "arrow:io:feather:reader:FeatherVersion2"); + } else if (version != ipc::feather::kFeatherV1Version) { + MATLAB_ERROR_IF_NOT_OK_WITH_CONTEXT(Status::Invalid("Unknown Feather format version."), context, "arrow:io:feather:reader:UnknownFeatherVersion"); + } + + // Read a Table from the file. std::shared_ptr table = nullptr; - MATLAB_ERROR_IF_NOT_OK_WITH_CONTEXT(reader->Read(&table), context, "test"); + MATLAB_ERROR_IF_NOT_OK_WITH_CONTEXT(reader->Read(&table), context, "arrow:io:feather:reader:FailedToReadTable"); + + // Get the first RecordBatch from the Table. arrow::TableBatchReader table_batch_reader{table}; std::shared_ptr record_batch = nullptr; - MATLAB_ERROR_IF_NOT_OK_WITH_CONTEXT(table_batch_reader.ReadNext(&record_batch), context, "test"); + MATLAB_ERROR_IF_NOT_OK_WITH_CONTEXT(table_batch_reader.ReadNext(&record_batch), context, "arrow:io:feather:reader:FailedToReadRecordBatch"); + + // Create a Proxy from the first RecordBatch. auto record_batch_proxy = std::make_shared(record_batch); const auto record_batch_proxy_id = ProxyManager::manageProxy(record_batch_proxy); - mda::ArrayFactory factory; const auto record_batch_proxy_id_mda = factory.createScalar(record_batch_proxy_id); context.outputs[0] = record_batch_proxy_id_mda; From c7ecc10d2e2f60429edc8ed2e7b9ed38c5aa4c27 Mon Sep 17 00:00:00 2001 From: Kevin Gurney Date: Mon, 7 Aug 2023 15:38:51 -0400 Subject: [PATCH 07/11] Add static const variables for error message IDs. --- matlab/src/cpp/arrow/matlab/error/error.h | 6 ++++++ .../src/cpp/arrow/matlab/io/feather/proxy/reader.cc | 12 ++++++------ 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/matlab/src/cpp/arrow/matlab/error/error.h b/matlab/src/cpp/arrow/matlab/error/error.h index e1d2982f282..deac5e26fc1 100644 --- a/matlab/src/cpp/arrow/matlab/error/error.h +++ b/matlab/src/cpp/arrow/matlab/error/error.h @@ -181,7 +181,13 @@ namespace arrow::matlab::error { static const char* RECORD_BATCH_NUMERIC_INDEX_WITH_EMPTY_RECORD_BATCH = "arrow:tabular:recordbatch:NumericIndexWithEmptyRecordBatch"; static const char* RECORD_BATCH_INVALID_NUMERIC_COLUMN_INDEX = "arrow:tabular:recordbatch:InvalidNumericColumnIndex"; static const char* FAILED_TO_OPEN_FILE_FOR_WRITE = "arrow:io:FailedToOpenFileForWrite"; + static const char* FAILED_TO_OPEN_FILE_FOR_READ = "arrow:io:FailedToOpenFileForRead"; static const char* FEATHER_FAILED_TO_WRITE_TABLE = "arrow:io:feather:FailedToWriteTable"; static const char* TABLE_FROM_RECORD_BATCH = "arrow:table:FromRecordBatch"; + static const char* FEATHER_FAILED_TO_CREATE_READER = "arrow:io:feather:FailedToCreateReader"; + static const char* FEATHER_VERSION_2 = "arrow:io:feather:FeatherVersion2"; + static const char* FEATHER_VERSION_UNKNOWN = "arrow:io:feather:FeatherVersionUnknown"; + static const char* FEATHER_FAILED_TO_READ_TABLE = "arrow:io:feather:FailedToReadTable"; + static const char* FEATHER_FAILED_TO_READ_RECORD_BATCH = "arrow:io:feather:FailedToReadRecordBatch"; } diff --git a/matlab/src/cpp/arrow/matlab/io/feather/proxy/reader.cc b/matlab/src/cpp/arrow/matlab/io/feather/proxy/reader.cc index 36c4e801d86..a264d24ecb1 100644 --- a/matlab/src/cpp/arrow/matlab/io/feather/proxy/reader.cc +++ b/matlab/src/cpp/arrow/matlab/io/feather/proxy/reader.cc @@ -55,27 +55,27 @@ namespace arrow::matlab::io::feather::proxy { mda::ArrayFactory factory; // Create a file input stream. - MATLAB_ASSIGN_OR_ERROR_WITH_CONTEXT(auto source, arrow::io::ReadableFile::Open(filename, arrow::default_memory_pool()), context, "arrow:io:feather:reader:FailedToOpenFile"); + MATLAB_ASSIGN_OR_ERROR_WITH_CONTEXT(auto source, arrow::io::ReadableFile::Open(filename, arrow::default_memory_pool()), context, error::FAILED_TO_OPEN_FILE_FOR_READ); // Create a Reader from the file input stream. - MATLAB_ASSIGN_OR_ERROR_WITH_CONTEXT(auto reader, arrow::ipc::feather::Reader::Open(source), context, "arrow:io:feather:reader:FailedToCreateReader"); + MATLAB_ASSIGN_OR_ERROR_WITH_CONTEXT(auto reader, arrow::ipc::feather::Reader::Open(source), context, error::FEATHER_FAILED_TO_CREATE_READER); // Error if not Feather V1. const auto version = reader->version(); if (version == ipc::feather::kFeatherV2Version) { - MATLAB_ERROR_IF_NOT_OK_WITH_CONTEXT(Status::NotImplemented("Support for Feather V2 has not been implemented."), context, "arrow:io:feather:reader:FeatherVersion2"); + MATLAB_ERROR_IF_NOT_OK_WITH_CONTEXT(Status::NotImplemented("Support for Feather V2 has not been implemented."), context, error::FEATHER_VERSION_2); } else if (version != ipc::feather::kFeatherV1Version) { - MATLAB_ERROR_IF_NOT_OK_WITH_CONTEXT(Status::Invalid("Unknown Feather format version."), context, "arrow:io:feather:reader:UnknownFeatherVersion"); + MATLAB_ERROR_IF_NOT_OK_WITH_CONTEXT(Status::Invalid("Unknown Feather format version."), context, error::FEATHER_VERSION_UNKNOWN); } // Read a Table from the file. std::shared_ptr table = nullptr; - MATLAB_ERROR_IF_NOT_OK_WITH_CONTEXT(reader->Read(&table), context, "arrow:io:feather:reader:FailedToReadTable"); + MATLAB_ERROR_IF_NOT_OK_WITH_CONTEXT(reader->Read(&table), context, error::FEATHER_FAILED_TO_READ_TABLE); // Get the first RecordBatch from the Table. arrow::TableBatchReader table_batch_reader{table}; std::shared_ptr record_batch = nullptr; - MATLAB_ERROR_IF_NOT_OK_WITH_CONTEXT(table_batch_reader.ReadNext(&record_batch), context, "arrow:io:feather:reader:FailedToReadRecordBatch"); + MATLAB_ERROR_IF_NOT_OK_WITH_CONTEXT(table_batch_reader.ReadNext(&record_batch), context, error::FEATHER_FAILED_TO_READ_RECORD_BATCH); // Create a Proxy from the first RecordBatch. auto record_batch_proxy = std::make_shared(record_batch); From dbd3a312b05df3108dc90beac4dc177e15a2956a Mon Sep 17 00:00:00 2001 From: Kevin Gurney Date: Mon, 7 Aug 2023 15:47:01 -0400 Subject: [PATCH 08/11] Rename C++ Proxy Feather Writer from FeatherWriter to Writer. --- .../proxy/{feather_writer.cc => writer.cc} | 16 ++++++++-------- .../feather/proxy/{feather_writer.h => writer.h} | 6 +++--- .../+arrow/+internal/+io/+feather/Writer.m | 4 ++-- .../tools/cmake/BuildMatlabArrowInterface.cmake | 2 +- 4 files changed, 14 insertions(+), 14 deletions(-) rename matlab/src/cpp/arrow/matlab/io/feather/proxy/{feather_writer.cc => writer.cc} (86%) rename matlab/src/cpp/arrow/matlab/io/feather/proxy/{feather_writer.h => writer.h} (89%) diff --git a/matlab/src/cpp/arrow/matlab/io/feather/proxy/feather_writer.cc b/matlab/src/cpp/arrow/matlab/io/feather/proxy/writer.cc similarity index 86% rename from matlab/src/cpp/arrow/matlab/io/feather/proxy/feather_writer.cc rename to matlab/src/cpp/arrow/matlab/io/feather/proxy/writer.cc index a27e1fb0e62..c71c9ae7a51 100644 --- a/matlab/src/cpp/arrow/matlab/io/feather/proxy/feather_writer.cc +++ b/matlab/src/cpp/arrow/matlab/io/feather/proxy/writer.cc @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -#include "arrow/matlab/io/feather/proxy/feather_writer.h" +#include "arrow/matlab/io/feather/proxy/writer.h" #include "arrow/matlab/tabular/proxy/record_batch.h" #include "arrow/matlab/error/error.h" @@ -30,12 +30,12 @@ namespace arrow::matlab::io::feather::proxy { - FeatherWriter::FeatherWriter(const std::string& filename) : filename{filename} { - REGISTER_METHOD(FeatherWriter, getFilename); - REGISTER_METHOD(FeatherWriter, write); + Writer::Writer(const std::string& filename) : filename{filename} { + REGISTER_METHOD(Writer, getFilename); + REGISTER_METHOD(Writer, write); } - libmexclass::proxy::MakeResult FeatherWriter::make(const libmexclass::proxy::FunctionArguments& constructor_arguments) { + libmexclass::proxy::MakeResult Writer::make(const libmexclass::proxy::FunctionArguments& constructor_arguments) { namespace mda = ::matlab::data; mda::StructArray opts = constructor_arguments[0]; const mda::StringArray filename_mda = opts[0]["Filename"]; @@ -45,10 +45,10 @@ namespace arrow::matlab::io::feather::proxy { arrow::util::UTF16StringToUTF8(filename_utf16), error::UNICODE_CONVERSION_ERROR_ID); - return std::make_shared(filename_utf8); + return std::make_shared(filename_utf8); } - void FeatherWriter::getFilename(libmexclass::proxy::method::Context& context) { + void Writer::getFilename(libmexclass::proxy::method::Context& context) { namespace mda = ::matlab::data; MATLAB_ASSIGN_OR_ERROR_WITH_CONTEXT(const auto utf16_filename, arrow::util::UTF8StringToUTF16(filename), @@ -59,7 +59,7 @@ namespace arrow::matlab::io::feather::proxy { context.outputs[0] = str_mda; } - void FeatherWriter::write(libmexclass::proxy::method::Context& context) { + void Writer::write(libmexclass::proxy::method::Context& context) { namespace mda = ::matlab::data; mda::StructArray opts = context.inputs[0]; const mda::TypedArray record_batch_proxy_id_mda = opts[0]["RecordBatchProxyID"]; diff --git a/matlab/src/cpp/arrow/matlab/io/feather/proxy/feather_writer.h b/matlab/src/cpp/arrow/matlab/io/feather/proxy/writer.h similarity index 89% rename from matlab/src/cpp/arrow/matlab/io/feather/proxy/feather_writer.h rename to matlab/src/cpp/arrow/matlab/io/feather/proxy/writer.h index dadb4798878..21dc70f432a 100644 --- a/matlab/src/cpp/arrow/matlab/io/feather/proxy/feather_writer.h +++ b/matlab/src/cpp/arrow/matlab/io/feather/proxy/writer.h @@ -23,11 +23,11 @@ namespace arrow::matlab::io::feather::proxy { - class FeatherWriter : public libmexclass::proxy::Proxy { + class Writer : public libmexclass::proxy::Proxy { public: - FeatherWriter(const std::string& filename); + Writer(const std::string& filename); - ~FeatherWriter() {} + ~Writer() {} static libmexclass::proxy::MakeResult make(const libmexclass::proxy::FunctionArguments& constructor_arguments); diff --git a/matlab/src/matlab/+arrow/+internal/+io/+feather/Writer.m b/matlab/src/matlab/+arrow/+internal/+io/+feather/Writer.m index 470c41fd5b2..37c785f10a5 100644 --- a/matlab/src/matlab/+arrow/+internal/+io/+feather/Writer.m +++ b/matlab/src/matlab/+arrow/+internal/+io/+feather/Writer.m @@ -31,7 +31,7 @@ end args = struct(Filename=filename); - proxyName = "arrow.io.feather.proxy.FeatherWriter"; + proxyName = "arrow.io.feather.proxy.Writer"; obj.Proxy = arrow.internal.proxy.create(proxyName, args); end @@ -45,4 +45,4 @@ function write(obj, T) filename = obj.Proxy.getFilename(); end end -end \ No newline at end of file +end diff --git a/matlab/tools/cmake/BuildMatlabArrowInterface.cmake b/matlab/tools/cmake/BuildMatlabArrowInterface.cmake index 42b63afb386..c19740f1814 100644 --- a/matlab/tools/cmake/BuildMatlabArrowInterface.cmake +++ b/matlab/tools/cmake/BuildMatlabArrowInterface.cmake @@ -56,7 +56,7 @@ set(MATLAB_ARROW_LIBMEXCLASS_CLIENT_PROXY_SOURCES "${CMAKE_SOURCE_DIR}/src/cpp/a "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/type/proxy/timestamp_type.cc" "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/type/proxy/field.cc" "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/type/proxy/wrap.cc" - "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/io/feather/proxy/feather_writer.cc" + "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/io/feather/proxy/writer.cc" "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/io/feather/proxy/reader.cc") From 96e738d254330d5e7fe6ef4daea8dfb1bd3cebb2 Mon Sep 17 00:00:00 2001 From: Kevin Gurney Date: Mon, 7 Aug 2023 15:49:07 -0400 Subject: [PATCH 09/11] Update factory to include Writer instead of FeatherWriter. --- matlab/src/cpp/arrow/matlab/proxy/factory.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/matlab/src/cpp/arrow/matlab/proxy/factory.cc b/matlab/src/cpp/arrow/matlab/proxy/factory.cc index 92cce491a83..bce875bb9f1 100644 --- a/matlab/src/cpp/arrow/matlab/proxy/factory.cc +++ b/matlab/src/cpp/arrow/matlab/proxy/factory.cc @@ -25,7 +25,7 @@ #include "arrow/matlab/type/proxy/string_type.h" #include "arrow/matlab/type/proxy/timestamp_type.h" #include "arrow/matlab/type/proxy/field.h" -#include "arrow/matlab/io/feather/proxy/feather_writer.h" +#include "arrow/matlab/io/feather/proxy/writer.h" #include "arrow/matlab/io/feather/proxy/reader.h" #include "factory.h" @@ -62,7 +62,7 @@ libmexclass::proxy::MakeResult Factory::make_proxy(const ClassName& class_name, REGISTER_PROXY(arrow.type.proxy.BooleanType , arrow::matlab::type::proxy::PrimitiveCType); REGISTER_PROXY(arrow.type.proxy.StringType , arrow::matlab::type::proxy::StringType); REGISTER_PROXY(arrow.type.proxy.TimestampType , arrow::matlab::type::proxy::TimestampType); - REGISTER_PROXY(arrow.io.feather.proxy.FeatherWriter , arrow::matlab::io::feather::proxy::FeatherWriter); + REGISTER_PROXY(arrow.io.feather.proxy.Writer , arrow::matlab::io::feather::proxy::Writer); REGISTER_PROXY(arrow.io.feather.proxy.Reader , arrow::matlab::io::feather::proxy::Reader); return libmexclass::error::Error{error::UNKNOWN_PROXY_ERROR_ID, "Did not find matching C++ proxy for " + class_name}; From 09e92079e6ec3325c2be6b83829c29de753ff725 Mon Sep 17 00:00:00 2001 From: Kevin Gurney Date: Mon, 7 Aug 2023 15:55:30 -0400 Subject: [PATCH 10/11] Add Feather Reader code to test file tRoundTrip.m. --- matlab/test/arrow/io/feather/tRoundTrip.m | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/matlab/test/arrow/io/feather/tRoundTrip.m b/matlab/test/arrow/io/feather/tRoundTrip.m index d56152be6d1..e735d196c18 100644 --- a/matlab/test/arrow/io/feather/tRoundTrip.m +++ b/matlab/test/arrow/io/feather/tRoundTrip.m @@ -49,4 +49,9 @@ function Basic(testCase) function featherwrite(T, filename) writer = arrow.internal.io.feather.Writer(filename); writer.write(T); +end + +function T = featherread(filename) + reader = arrow.internal.io.feather.Reader(filename); + T = reader.read(); end \ No newline at end of file From 666d631aa4d1fdf1630b01b792da5a8bca34e0da Mon Sep 17 00:00:00 2001 From: Kevin Gurney Date: Mon, 7 Aug 2023 16:12:45 -0400 Subject: [PATCH 11/11] Use MATLAB arguments block syntax for validating filename passed to Reader object. --- matlab/src/matlab/+arrow/+internal/+io/+feather/Reader.m | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/matlab/src/matlab/+arrow/+internal/+io/+feather/Reader.m b/matlab/src/matlab/+arrow/+internal/+io/+feather/Reader.m index 80ba1ca5552..80da7294d2d 100644 --- a/matlab/src/matlab/+arrow/+internal/+io/+feather/Reader.m +++ b/matlab/src/matlab/+arrow/+internal/+io/+feather/Reader.m @@ -28,10 +28,10 @@ methods function obj = Reader(filename) - filename = convertCharsToStrings(filename); - if ~(isstring(filename) && isscalar(filename)) - error("arrow:io:feather:FilenameUnsupportedType", "Filename must be a scalar string or char row vector."); + arguments + filename(1, 1) {mustBeNonmissing, mustBeNonzeroLengthText} end + args = struct(Filename=filename); obj.Proxy = arrow.internal.proxy.create("arrow.io.feather.proxy.Reader", args); end @@ -46,7 +46,7 @@ function filename = get.Filename(obj) filename = obj.Proxy.getFilename(); end - + end end