diff --git a/matlab/src/cpp/arrow/matlab/error/error.h b/matlab/src/cpp/arrow/matlab/error/error.h index e1d2982f282..deac5e26fc1 100644 --- a/matlab/src/cpp/arrow/matlab/error/error.h +++ b/matlab/src/cpp/arrow/matlab/error/error.h @@ -181,7 +181,13 @@ namespace arrow::matlab::error { static const char* RECORD_BATCH_NUMERIC_INDEX_WITH_EMPTY_RECORD_BATCH = "arrow:tabular:recordbatch:NumericIndexWithEmptyRecordBatch"; static const char* RECORD_BATCH_INVALID_NUMERIC_COLUMN_INDEX = "arrow:tabular:recordbatch:InvalidNumericColumnIndex"; static const char* FAILED_TO_OPEN_FILE_FOR_WRITE = "arrow:io:FailedToOpenFileForWrite"; + static const char* FAILED_TO_OPEN_FILE_FOR_READ = "arrow:io:FailedToOpenFileForRead"; static const char* FEATHER_FAILED_TO_WRITE_TABLE = "arrow:io:feather:FailedToWriteTable"; static const char* TABLE_FROM_RECORD_BATCH = "arrow:table:FromRecordBatch"; + static const char* FEATHER_FAILED_TO_CREATE_READER = "arrow:io:feather:FailedToCreateReader"; + static const char* FEATHER_VERSION_2 = "arrow:io:feather:FeatherVersion2"; + static const char* FEATHER_VERSION_UNKNOWN = "arrow:io:feather:FeatherVersionUnknown"; + static const char* FEATHER_FAILED_TO_READ_TABLE = "arrow:io:feather:FailedToReadTable"; + static const char* FEATHER_FAILED_TO_READ_RECORD_BATCH = "arrow:io:feather:FailedToReadRecordBatch"; } diff --git a/matlab/src/cpp/arrow/matlab/io/feather/proxy/reader.cc b/matlab/src/cpp/arrow/matlab/io/feather/proxy/reader.cc new file mode 100644 index 00000000000..a264d24ecb1 --- /dev/null +++ b/matlab/src/cpp/arrow/matlab/io/feather/proxy/reader.cc @@ -0,0 +1,98 @@ +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "libmexclass/proxy/ProxyManager.h" + +#include "arrow/matlab/error/error.h" +#include "arrow/matlab/io/feather/proxy/reader.h" +#include "arrow/matlab/tabular/proxy/record_batch.h" + +#include "arrow/util/utf8.h" + +#include "arrow/result.h" + +#include "arrow/io/file.h" +#include "arrow/ipc/feather.h" +#include "arrow/table.h" + +namespace arrow::matlab::io::feather::proxy { + + Reader::Reader(const std::string& filename) : filename{filename} { + REGISTER_METHOD(Reader, read); + REGISTER_METHOD(Reader, getFilename); + } + + libmexclass::proxy::MakeResult Reader::make(const libmexclass::proxy::FunctionArguments& constructor_arguments) { + namespace mda = ::matlab::data; + using ReaderProxy = arrow::matlab::io::feather::proxy::Reader; + + mda::StructArray args = constructor_arguments[0]; + const mda::StringArray filename_utf16_mda = args[0]["Filename"]; + const auto filename_utf16 = std::u16string(filename_utf16_mda[0]); + MATLAB_ASSIGN_OR_ERROR(const auto filename, arrow::util::UTF16StringToUTF8(filename_utf16), error::UNICODE_CONVERSION_ERROR_ID); + + return std::make_shared(filename); + } + + void Reader::read(libmexclass::proxy::method::Context& context) { + namespace mda = ::matlab::data; + using namespace libmexclass::proxy; + using RecordBatchProxy = arrow::matlab::tabular::proxy::RecordBatch; + + mda::ArrayFactory factory; + + // Create a file input stream. + MATLAB_ASSIGN_OR_ERROR_WITH_CONTEXT(auto source, arrow::io::ReadableFile::Open(filename, arrow::default_memory_pool()), context, error::FAILED_TO_OPEN_FILE_FOR_READ); + + // Create a Reader from the file input stream. + MATLAB_ASSIGN_OR_ERROR_WITH_CONTEXT(auto reader, arrow::ipc::feather::Reader::Open(source), context, error::FEATHER_FAILED_TO_CREATE_READER); + + // Error if not Feather V1. + const auto version = reader->version(); + if (version == ipc::feather::kFeatherV2Version) { + MATLAB_ERROR_IF_NOT_OK_WITH_CONTEXT(Status::NotImplemented("Support for Feather V2 has not been implemented."), context, error::FEATHER_VERSION_2); + } else if (version != ipc::feather::kFeatherV1Version) { + MATLAB_ERROR_IF_NOT_OK_WITH_CONTEXT(Status::Invalid("Unknown Feather format version."), context, error::FEATHER_VERSION_UNKNOWN); + } + + // Read a Table from the file. + std::shared_ptr table = nullptr; + MATLAB_ERROR_IF_NOT_OK_WITH_CONTEXT(reader->Read(&table), context, error::FEATHER_FAILED_TO_READ_TABLE); + + // Get the first RecordBatch from the Table. + arrow::TableBatchReader table_batch_reader{table}; + std::shared_ptr record_batch = nullptr; + MATLAB_ERROR_IF_NOT_OK_WITH_CONTEXT(table_batch_reader.ReadNext(&record_batch), context, error::FEATHER_FAILED_TO_READ_RECORD_BATCH); + + // Create a Proxy from the first RecordBatch. + auto record_batch_proxy = std::make_shared(record_batch); + const auto record_batch_proxy_id = ProxyManager::manageProxy(record_batch_proxy); + + const auto record_batch_proxy_id_mda = factory.createScalar(record_batch_proxy_id); + + context.outputs[0] = record_batch_proxy_id_mda; + } + + void Reader::getFilename(libmexclass::proxy::method::Context& context) { + namespace mda = ::matlab::data; + mda::ArrayFactory factory; + + MATLAB_ASSIGN_OR_ERROR_WITH_CONTEXT(const auto filename_utf16, arrow::util::UTF8StringToUTF16(filename), context, error::UNICODE_CONVERSION_ERROR_ID); + auto filename_utf16_mda = factory.createScalar(filename_utf16); + context.outputs[0] = filename_utf16_mda; + } + +} diff --git a/matlab/src/cpp/arrow/matlab/io/feather/proxy/reader.h b/matlab/src/cpp/arrow/matlab/io/feather/proxy/reader.h new file mode 100644 index 00000000000..fb6c06de863 --- /dev/null +++ b/matlab/src/cpp/arrow/matlab/io/feather/proxy/reader.h @@ -0,0 +1,39 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include "libmexclass/proxy/Proxy.h" + +namespace arrow::matlab::io::feather::proxy { + + class Reader : public libmexclass::proxy::Proxy { + public: + Reader(const std::string& filename); + + virtual ~Reader() {} + + static libmexclass::proxy::MakeResult make(const libmexclass::proxy::FunctionArguments& constructor_arguments); + + protected: + void read(libmexclass::proxy::method::Context& context); + void getFilename(libmexclass::proxy::method::Context& context); + + const std::string filename; + }; + +} diff --git a/matlab/src/cpp/arrow/matlab/io/feather/proxy/feather_writer.cc b/matlab/src/cpp/arrow/matlab/io/feather/proxy/writer.cc similarity index 86% rename from matlab/src/cpp/arrow/matlab/io/feather/proxy/feather_writer.cc rename to matlab/src/cpp/arrow/matlab/io/feather/proxy/writer.cc index a27e1fb0e62..c71c9ae7a51 100644 --- a/matlab/src/cpp/arrow/matlab/io/feather/proxy/feather_writer.cc +++ b/matlab/src/cpp/arrow/matlab/io/feather/proxy/writer.cc @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -#include "arrow/matlab/io/feather/proxy/feather_writer.h" +#include "arrow/matlab/io/feather/proxy/writer.h" #include "arrow/matlab/tabular/proxy/record_batch.h" #include "arrow/matlab/error/error.h" @@ -30,12 +30,12 @@ namespace arrow::matlab::io::feather::proxy { - FeatherWriter::FeatherWriter(const std::string& filename) : filename{filename} { - REGISTER_METHOD(FeatherWriter, getFilename); - REGISTER_METHOD(FeatherWriter, write); + Writer::Writer(const std::string& filename) : filename{filename} { + REGISTER_METHOD(Writer, getFilename); + REGISTER_METHOD(Writer, write); } - libmexclass::proxy::MakeResult FeatherWriter::make(const libmexclass::proxy::FunctionArguments& constructor_arguments) { + libmexclass::proxy::MakeResult Writer::make(const libmexclass::proxy::FunctionArguments& constructor_arguments) { namespace mda = ::matlab::data; mda::StructArray opts = constructor_arguments[0]; const mda::StringArray filename_mda = opts[0]["Filename"]; @@ -45,10 +45,10 @@ namespace arrow::matlab::io::feather::proxy { arrow::util::UTF16StringToUTF8(filename_utf16), error::UNICODE_CONVERSION_ERROR_ID); - return std::make_shared(filename_utf8); + return std::make_shared(filename_utf8); } - void FeatherWriter::getFilename(libmexclass::proxy::method::Context& context) { + void Writer::getFilename(libmexclass::proxy::method::Context& context) { namespace mda = ::matlab::data; MATLAB_ASSIGN_OR_ERROR_WITH_CONTEXT(const auto utf16_filename, arrow::util::UTF8StringToUTF16(filename), @@ -59,7 +59,7 @@ namespace arrow::matlab::io::feather::proxy { context.outputs[0] = str_mda; } - void FeatherWriter::write(libmexclass::proxy::method::Context& context) { + void Writer::write(libmexclass::proxy::method::Context& context) { namespace mda = ::matlab::data; mda::StructArray opts = context.inputs[0]; const mda::TypedArray record_batch_proxy_id_mda = opts[0]["RecordBatchProxyID"]; diff --git a/matlab/src/cpp/arrow/matlab/io/feather/proxy/feather_writer.h b/matlab/src/cpp/arrow/matlab/io/feather/proxy/writer.h similarity index 89% rename from matlab/src/cpp/arrow/matlab/io/feather/proxy/feather_writer.h rename to matlab/src/cpp/arrow/matlab/io/feather/proxy/writer.h index dadb4798878..21dc70f432a 100644 --- a/matlab/src/cpp/arrow/matlab/io/feather/proxy/feather_writer.h +++ b/matlab/src/cpp/arrow/matlab/io/feather/proxy/writer.h @@ -23,11 +23,11 @@ namespace arrow::matlab::io::feather::proxy { - class FeatherWriter : public libmexclass::proxy::Proxy { + class Writer : public libmexclass::proxy::Proxy { public: - FeatherWriter(const std::string& filename); + Writer(const std::string& filename); - ~FeatherWriter() {} + ~Writer() {} static libmexclass::proxy::MakeResult make(const libmexclass::proxy::FunctionArguments& constructor_arguments); diff --git a/matlab/src/cpp/arrow/matlab/proxy/factory.cc b/matlab/src/cpp/arrow/matlab/proxy/factory.cc index 7a2a4f3192f..bce875bb9f1 100644 --- a/matlab/src/cpp/arrow/matlab/proxy/factory.cc +++ b/matlab/src/cpp/arrow/matlab/proxy/factory.cc @@ -25,7 +25,8 @@ #include "arrow/matlab/type/proxy/string_type.h" #include "arrow/matlab/type/proxy/timestamp_type.h" #include "arrow/matlab/type/proxy/field.h" -#include "arrow/matlab/io/feather/proxy/feather_writer.h" +#include "arrow/matlab/io/feather/proxy/writer.h" +#include "arrow/matlab/io/feather/proxy/reader.h" #include "factory.h" @@ -61,7 +62,8 @@ libmexclass::proxy::MakeResult Factory::make_proxy(const ClassName& class_name, REGISTER_PROXY(arrow.type.proxy.BooleanType , arrow::matlab::type::proxy::PrimitiveCType); REGISTER_PROXY(arrow.type.proxy.StringType , arrow::matlab::type::proxy::StringType); REGISTER_PROXY(arrow.type.proxy.TimestampType , arrow::matlab::type::proxy::TimestampType); - REGISTER_PROXY(arrow.io.feather.proxy.FeatherWriter , arrow::matlab::io::feather::proxy::FeatherWriter); + REGISTER_PROXY(arrow.io.feather.proxy.Writer , arrow::matlab::io::feather::proxy::Writer); + REGISTER_PROXY(arrow.io.feather.proxy.Reader , arrow::matlab::io::feather::proxy::Reader); return libmexclass::error::Error{error::UNKNOWN_PROXY_ERROR_ID, "Did not find matching C++ proxy for " + class_name}; }; diff --git a/matlab/src/matlab/+arrow/+internal/+io/+feather/Reader.m b/matlab/src/matlab/+arrow/+internal/+io/+feather/Reader.m new file mode 100644 index 00000000000..80da7294d2d --- /dev/null +++ b/matlab/src/matlab/+arrow/+internal/+io/+feather/Reader.m @@ -0,0 +1,52 @@ +% Licensed to the Apache Software Foundation (ASF) under one or more +% contributor license agreements. See the NOTICE file distributed with +% this work for additional information regarding copyright ownership. +% The ASF licenses this file to you under the Apache License, Version +% 2.0 (the "License"); you may not use this file except in compliance +% with the License. You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +% implied. See the License for the specific language governing +% permissions and limitations under the License. + +classdef Reader +%READER An internal Reader object for reading Feather files. + + properties (GetAccess=public, SetAccess=private, Hidden) + Proxy + end + + properties (Dependent, SetAccess=private, GetAccess=public) + % Name of the file to read. + Filename + end + + methods + + function obj = Reader(filename) + arguments + filename(1, 1) {mustBeNonmissing, mustBeNonzeroLengthText} + end + + args = struct(Filename=filename); + obj.Proxy = arrow.internal.proxy.create("arrow.io.feather.proxy.Reader", args); + end + + function T = read(obj) + recordBatchProxyID = obj.Proxy.read(); + proxy = libmexclass.proxy.Proxy(Name="arrow.tabular.proxy.RecordBatch", ID=recordBatchProxyID); + recordBatch = arrow.tabular.RecordBatch(proxy); + T = recordBatch.toMATLAB(); + end + + function filename = get.Filename(obj) + filename = obj.Proxy.getFilename(); + end + + end + +end diff --git a/matlab/src/matlab/+arrow/+internal/+io/+feather/Writer.m b/matlab/src/matlab/+arrow/+internal/+io/+feather/Writer.m index 470c41fd5b2..37c785f10a5 100644 --- a/matlab/src/matlab/+arrow/+internal/+io/+feather/Writer.m +++ b/matlab/src/matlab/+arrow/+internal/+io/+feather/Writer.m @@ -31,7 +31,7 @@ end args = struct(Filename=filename); - proxyName = "arrow.io.feather.proxy.FeatherWriter"; + proxyName = "arrow.io.feather.proxy.Writer"; obj.Proxy = arrow.internal.proxy.create(proxyName, args); end @@ -45,4 +45,4 @@ function write(obj, T) filename = obj.Proxy.getFilename(); end end -end \ No newline at end of file +end diff --git a/matlab/test/arrow/io/feather/tRoundTrip.m b/matlab/test/arrow/io/feather/tRoundTrip.m index d56152be6d1..e735d196c18 100644 --- a/matlab/test/arrow/io/feather/tRoundTrip.m +++ b/matlab/test/arrow/io/feather/tRoundTrip.m @@ -49,4 +49,9 @@ function Basic(testCase) function featherwrite(T, filename) writer = arrow.internal.io.feather.Writer(filename); writer.write(T); +end + +function T = featherread(filename) + reader = arrow.internal.io.feather.Reader(filename); + T = reader.read(); end \ No newline at end of file diff --git a/matlab/tools/cmake/BuildMatlabArrowInterface.cmake b/matlab/tools/cmake/BuildMatlabArrowInterface.cmake index 1d579994176..c19740f1814 100644 --- a/matlab/tools/cmake/BuildMatlabArrowInterface.cmake +++ b/matlab/tools/cmake/BuildMatlabArrowInterface.cmake @@ -56,8 +56,8 @@ set(MATLAB_ARROW_LIBMEXCLASS_CLIENT_PROXY_SOURCES "${CMAKE_SOURCE_DIR}/src/cpp/a "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/type/proxy/timestamp_type.cc" "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/type/proxy/field.cc" "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/type/proxy/wrap.cc" - "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/io/feather/proxy/feather_writer.cc") - + "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/io/feather/proxy/writer.cc" + "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/io/feather/proxy/reader.cc") set(MATLAB_ARROW_LIBMEXCLASS_CLIENT_PROXY_FACTORY_INCLUDE_DIR "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/proxy")