From 85b78c3b0c7fcf305b41206865b1a7f80d9c9bc9 Mon Sep 17 00:00:00 2001 From: Sarah Gilmore Date: Fri, 4 Aug 2023 15:31:15 -0400 Subject: [PATCH 01/17] Add stub files for feather_writer.cc and feather_writer.h --- .../matlab/io/feather/proxy/feather_writer.cc | 26 ++++++++++++++ .../matlab/io/feather/proxy/feather_writer.h | 34 +++++++++++++++++++ .../cmake/BuildMatlabArrowInterface.cmake | 7 ++-- 3 files changed, 65 insertions(+), 2 deletions(-) create mode 100644 matlab/src/cpp/arrow/matlab/io/feather/proxy/feather_writer.cc create mode 100644 matlab/src/cpp/arrow/matlab/io/feather/proxy/feather_writer.h diff --git a/matlab/src/cpp/arrow/matlab/io/feather/proxy/feather_writer.cc b/matlab/src/cpp/arrow/matlab/io/feather/proxy/feather_writer.cc new file mode 100644 index 00000000000..9ef28d6e5be --- /dev/null +++ b/matlab/src/cpp/arrow/matlab/io/feather/proxy/feather_writer.cc @@ -0,0 +1,26 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "arrow/matlab/io/feather/proxy/feather_writer.h" + +namespace arrow::matlab::io::feather::proxy { + + + libmexclass::proxy::MakeResult make(const libmexclass::proxy::FunctionArguments& constructor_arguments) { + return libmexclass::error::Error{"arrow:NotImplemented", "Not implemented"}; + } +} diff --git a/matlab/src/cpp/arrow/matlab/io/feather/proxy/feather_writer.h b/matlab/src/cpp/arrow/matlab/io/feather/proxy/feather_writer.h new file mode 100644 index 00000000000..edd0c55895d --- /dev/null +++ b/matlab/src/cpp/arrow/matlab/io/feather/proxy/feather_writer.h @@ -0,0 +1,34 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include "arrow/ipc/feather.h" + +#include "libmexclass/proxy/Proxy.h" + +namespace arrow::matlab::io::feather::proxy { + + class FeatherWriter : public libmexclass::proxy::Proxy { + public: + FeatherWriter() {} + + virtual ~FeatherWriter() {} + + static libmexclass::proxy::MakeResult make(const libmexclass::proxy::FunctionArguments& constructor_arguments); + }; +} diff --git a/matlab/tools/cmake/BuildMatlabArrowInterface.cmake b/matlab/tools/cmake/BuildMatlabArrowInterface.cmake index f4696cfad26..ec19bf78f9a 100644 --- a/matlab/tools/cmake/BuildMatlabArrowInterface.cmake +++ b/matlab/tools/cmake/BuildMatlabArrowInterface.cmake @@ -39,7 +39,8 @@ set(MATLAB_ARROW_LIBMEXCLASS_CLIENT_PROXY_INCLUDE_DIR "${CMAKE_SOURCE_DIR}/src/c "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/error" "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/type" "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/type/proxy" - "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/buffer") + "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/buffer" + "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/io/feather/proxy") set(MATLAB_ARROW_LIBMEXCLASS_CLIENT_PROXY_SOURCES "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/array/proxy/array.cc" "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/array/proxy/boolean_array.cc" @@ -55,7 +56,9 @@ set(MATLAB_ARROW_LIBMEXCLASS_CLIENT_PROXY_SOURCES "${CMAKE_SOURCE_DIR}/src/cpp/a "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/type/proxy/string_type.cc" "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/type/proxy/timestamp_type.cc" "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/type/proxy/field.cc" - "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/type/proxy/wrap.cc") + "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/type/proxy/wrap.cc" + "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/io/feather/proxy/feather_writer.cc") + set(MATLAB_ARROW_LIBMEXCLASS_CLIENT_PROXY_FACTORY_INCLUDE_DIR "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/proxy") From 56beb41feea6f124fe8e9c90652b523563d75b7f Mon Sep 17 00:00:00 2001 From: Sarah Gilmore Date: Fri, 4 Aug 2023 15:56:01 -0400 Subject: [PATCH 02/17] Add FeatherWriter::make --- .../matlab/io/feather/proxy/feather_writer.cc | 16 +++++++++++++++- .../matlab/io/feather/proxy/feather_writer.h | 11 +++++++---- 2 files changed, 22 insertions(+), 5 deletions(-) diff --git a/matlab/src/cpp/arrow/matlab/io/feather/proxy/feather_writer.cc b/matlab/src/cpp/arrow/matlab/io/feather/proxy/feather_writer.cc index 9ef28d6e5be..9c6823f9244 100644 --- a/matlab/src/cpp/arrow/matlab/io/feather/proxy/feather_writer.cc +++ b/matlab/src/cpp/arrow/matlab/io/feather/proxy/feather_writer.cc @@ -16,11 +16,25 @@ // under the License. #include "arrow/matlab/io/feather/proxy/feather_writer.h" +#include "arrow/matlab/error/error.h" + +#include "arrow/result.h" +#include "arrow/util/utf8.h" namespace arrow::matlab::io::feather::proxy { + FeatherWriter::FeatherWriter(const std::string& filename) : filename{filename} {} + + libmexclass::proxy::MakeResult FeatherWriter::make(const libmexclass::proxy::FunctionArguments& constructor_arguments) { + namespace mda = ::matlab::data; + mda::StructArray opts = constructor_arguments[0]; + const mda::StringArray filename_mda = opts[0]["Filename"]; - libmexclass::proxy::MakeResult make(const libmexclass::proxy::FunctionArguments& constructor_arguments) { + const auto filename_utf16 = std::u16string(filename_mda[0]); + MATLAB_ASSIGN_OR_ERROR(const auto column_name_utf8, + arrow::util::UTF16StringToUTF8(filename_utf16), + error::UNICODE_CONVERSION_ERROR_ID); + return libmexclass::error::Error{"arrow:NotImplemented", "Not implemented"}; } } diff --git a/matlab/src/cpp/arrow/matlab/io/feather/proxy/feather_writer.h b/matlab/src/cpp/arrow/matlab/io/feather/proxy/feather_writer.h index edd0c55895d..75edd8b47bb 100644 --- a/matlab/src/cpp/arrow/matlab/io/feather/proxy/feather_writer.h +++ b/matlab/src/cpp/arrow/matlab/io/feather/proxy/feather_writer.h @@ -17,7 +17,7 @@ #pragma once -#include "arrow/ipc/feather.h" +#include "arrow/status.h" #include "libmexclass/proxy/Proxy.h" @@ -25,10 +25,13 @@ namespace arrow::matlab::io::feather::proxy { class FeatherWriter : public libmexclass::proxy::Proxy { public: - FeatherWriter() {} + FeatherWriter(const std::string& filename); - virtual ~FeatherWriter() {} + ~FeatherWriter() {} - static libmexclass::proxy::MakeResult make(const libmexclass::proxy::FunctionArguments& constructor_arguments); + static libmexclass::proxy::MakeResult make(const libmexclass::proxy::FunctionArguments& constructor_arguments); + + private: + std::string filename; }; } From 7fc1906425a0764e9df86aad5443ff1ec575fef6 Mon Sep 17 00:00:00 2001 From: Sarah Gilmore Date: Fri, 4 Aug 2023 16:00:57 -0400 Subject: [PATCH 03/17] Add arrow.internal.io.feather.Writer class --- .../+arrow/+internal/+io/+feather/Writer.m | 37 +++++++++++++++++++ 1 file changed, 37 insertions(+) create mode 100644 matlab/src/matlab/+arrow/+internal/+io/+feather/Writer.m diff --git a/matlab/src/matlab/+arrow/+internal/+io/+feather/Writer.m b/matlab/src/matlab/+arrow/+internal/+io/+feather/Writer.m new file mode 100644 index 00000000000..c0d513425c9 --- /dev/null +++ b/matlab/src/matlab/+arrow/+internal/+io/+feather/Writer.m @@ -0,0 +1,37 @@ +%WRITER Class for writing feather V1 files. + +% Licensed to the Apache Software Foundation (ASF) under one or more +% contributor license agreements. See the NOTICE file distributed with +% this work for additional information regarding copyright ownership. +% The ASF licenses this file to you under the Apache License, Version +% 2.0 (the "License"); you may not use this file except in compliance +% with the License. You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +% implied. See the License for the specific language governing +% permissions and limitations under the License. +classdef Writer < matlab.mixin.Scalar + %UNTITLED2 Summary of this class goes here + % Detailed explanation goes here + + properties(Hidden, SetAccess=private, GetAccess=public) + Proxy + end + + methods + function obj = Writer(filename) + arguments + filename(1, 1) {mustBeNonmissing, mustBeNonzeroLengthText} + end + + args = struct(Filename=filename); + proxyName = "arrow.io.feather.proxy.FeatherWriter"; + obj.Proxy = libmexclass.proxy.Proxy(Name=proxyName, ... + ConstructorArguments=args); + end + end +end \ No newline at end of file From 72038d378b885f96d2a894fcac6e1f17d487c6a6 Mon Sep 17 00:00:00 2001 From: Sarah Gilmore Date: Fri, 4 Aug 2023 16:02:31 -0400 Subject: [PATCH 04/17] Register FeatherWriter proxy --- matlab/src/cpp/arrow/matlab/proxy/factory.cc | 2 ++ 1 file changed, 2 insertions(+) diff --git a/matlab/src/cpp/arrow/matlab/proxy/factory.cc b/matlab/src/cpp/arrow/matlab/proxy/factory.cc index 7d18c6c6b62..7a2a4f3192f 100644 --- a/matlab/src/cpp/arrow/matlab/proxy/factory.cc +++ b/matlab/src/cpp/arrow/matlab/proxy/factory.cc @@ -25,6 +25,7 @@ #include "arrow/matlab/type/proxy/string_type.h" #include "arrow/matlab/type/proxy/timestamp_type.h" #include "arrow/matlab/type/proxy/field.h" +#include "arrow/matlab/io/feather/proxy/feather_writer.h" #include "factory.h" @@ -60,6 +61,7 @@ libmexclass::proxy::MakeResult Factory::make_proxy(const ClassName& class_name, REGISTER_PROXY(arrow.type.proxy.BooleanType , arrow::matlab::type::proxy::PrimitiveCType); REGISTER_PROXY(arrow.type.proxy.StringType , arrow::matlab::type::proxy::StringType); REGISTER_PROXY(arrow.type.proxy.TimestampType , arrow::matlab::type::proxy::TimestampType); + REGISTER_PROXY(arrow.io.feather.proxy.FeatherWriter , arrow::matlab::io::feather::proxy::FeatherWriter); return libmexclass::error::Error{error::UNKNOWN_PROXY_ERROR_ID, "Did not find matching C++ proxy for " + class_name}; }; From 9a5cd5e28c1d3e241590555d637d077859f01493 Mon Sep 17 00:00:00 2001 From: Sarah Gilmore Date: Fri, 4 Aug 2023 16:09:55 -0400 Subject: [PATCH 05/17] Add Filename property to Writer --- .../matlab/io/feather/proxy/feather_writer.cc | 20 ++++++++++++++++--- .../matlab/io/feather/proxy/feather_writer.h | 3 +++ .../+arrow/+internal/+io/+feather/Writer.m | 10 +++++++++- 3 files changed, 29 insertions(+), 4 deletions(-) diff --git a/matlab/src/cpp/arrow/matlab/io/feather/proxy/feather_writer.cc b/matlab/src/cpp/arrow/matlab/io/feather/proxy/feather_writer.cc index 9c6823f9244..ae8d12afce7 100644 --- a/matlab/src/cpp/arrow/matlab/io/feather/proxy/feather_writer.cc +++ b/matlab/src/cpp/arrow/matlab/io/feather/proxy/feather_writer.cc @@ -23,7 +23,9 @@ namespace arrow::matlab::io::feather::proxy { - FeatherWriter::FeatherWriter(const std::string& filename) : filename{filename} {} + FeatherWriter::FeatherWriter(const std::string& filename) : filename{filename} { + REGISTER_METHOD(FeatherWriter, getFilename); + } libmexclass::proxy::MakeResult FeatherWriter::make(const libmexclass::proxy::FunctionArguments& constructor_arguments) { namespace mda = ::matlab::data; @@ -31,10 +33,22 @@ namespace arrow::matlab::io::feather::proxy { const mda::StringArray filename_mda = opts[0]["Filename"]; const auto filename_utf16 = std::u16string(filename_mda[0]); - MATLAB_ASSIGN_OR_ERROR(const auto column_name_utf8, + MATLAB_ASSIGN_OR_ERROR(const auto filename_utf8, arrow::util::UTF16StringToUTF8(filename_utf16), error::UNICODE_CONVERSION_ERROR_ID); - return libmexclass::error::Error{"arrow:NotImplemented", "Not implemented"}; + return std::make_shared(filename_utf8); } + + void FeatherWriter::getFilename(libmexclass::proxy::method::Context& context) { + namespace mda = ::matlab::data; + MATLAB_ASSIGN_OR_ERROR_WITH_CONTEXT(const auto utf16_filename, + arrow::util::UTF8StringToUTF16(filename), + context, + error::UNICODE_CONVERSION_ERROR_ID); + mda::ArrayFactory factory; + auto str_mda = factory.createScalar(utf16_filename); + context.outputs[0] = str_mda; + } + } diff --git a/matlab/src/cpp/arrow/matlab/io/feather/proxy/feather_writer.h b/matlab/src/cpp/arrow/matlab/io/feather/proxy/feather_writer.h index 75edd8b47bb..9a583ed6a76 100644 --- a/matlab/src/cpp/arrow/matlab/io/feather/proxy/feather_writer.h +++ b/matlab/src/cpp/arrow/matlab/io/feather/proxy/feather_writer.h @@ -30,6 +30,9 @@ namespace arrow::matlab::io::feather::proxy { ~FeatherWriter() {} static libmexclass::proxy::MakeResult make(const libmexclass::proxy::FunctionArguments& constructor_arguments); + + protected: + void getFilename(libmexclass::proxy::method::Context& context); private: std::string filename; diff --git a/matlab/src/matlab/+arrow/+internal/+io/+feather/Writer.m b/matlab/src/matlab/+arrow/+internal/+io/+feather/Writer.m index c0d513425c9..4fc9f0e1e02 100644 --- a/matlab/src/matlab/+arrow/+internal/+io/+feather/Writer.m +++ b/matlab/src/matlab/+arrow/+internal/+io/+feather/Writer.m @@ -22,6 +22,10 @@ Proxy end + properties(Dependent) + Filename + end + methods function obj = Writer(filename) arguments @@ -31,7 +35,11 @@ args = struct(Filename=filename); proxyName = "arrow.io.feather.proxy.FeatherWriter"; obj.Proxy = libmexclass.proxy.Proxy(Name=proxyName, ... - ConstructorArguments=args); + ConstructorArguments={args}); + end + + function filename = get.Filename(obj) + filename = obj.Proxy.getFilename(); end end end \ No newline at end of file From 74dc904af9eb64fdb3c6ecbbb44d3d164f20aa05 Mon Sep 17 00:00:00 2001 From: Sarah Gilmore Date: Fri, 4 Aug 2023 16:17:01 -0400 Subject: [PATCH 06/17] Make Proxy property on RecordBatch get-able, but not set-able --- matlab/src/matlab/+arrow/+tabular/RecordBatch.m | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/matlab/src/matlab/+arrow/+tabular/RecordBatch.m b/matlab/src/matlab/+arrow/+tabular/RecordBatch.m index 0d002797f01..be5eee7d89c 100644 --- a/matlab/src/matlab/+arrow/+tabular/RecordBatch.m +++ b/matlab/src/matlab/+arrow/+tabular/RecordBatch.m @@ -23,7 +23,7 @@ ColumnNames end - properties (Access=protected) + properties (Hidden, SetAccess=private, GetAccess=public) Proxy end From 0bc1ae1ce0194d6d6e67324f58ace00cb971efd4 Mon Sep 17 00:00:00 2001 From: Sarah Gilmore Date: Fri, 4 Aug 2023 16:20:11 -0400 Subject: [PATCH 07/17] Add write method to MATLAB Writer --- matlab/src/matlab/+arrow/+internal/+io/+feather/Writer.m | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/matlab/src/matlab/+arrow/+internal/+io/+feather/Writer.m b/matlab/src/matlab/+arrow/+internal/+io/+feather/Writer.m index 4fc9f0e1e02..31f25b56ebf 100644 --- a/matlab/src/matlab/+arrow/+internal/+io/+feather/Writer.m +++ b/matlab/src/matlab/+arrow/+internal/+io/+feather/Writer.m @@ -38,6 +38,12 @@ ConstructorArguments={args}); end + function write(obj, T) + rb = arrow.recordbatch(T); + args = struct(RecordBatchProxyID=rb.Proxy.ID); + obj.Proxy.writeRecordBatch(args); + end + function filename = get.Filename(obj) filename = obj.Proxy.getFilename(); end From 6d8bbeffb81da8fce6c0f5745be56c70f1f6a808 Mon Sep 17 00:00:00 2001 From: Sarah Gilmore Date: Fri, 4 Aug 2023 16:28:02 -0400 Subject: [PATCH 08/17] Add unwrap to RecordBatch proxy --- matlab/src/cpp/arrow/matlab/tabular/proxy/record_batch.cc | 4 ++++ matlab/src/cpp/arrow/matlab/tabular/proxy/record_batch.h | 2 ++ 2 files changed, 6 insertions(+) diff --git a/matlab/src/cpp/arrow/matlab/tabular/proxy/record_batch.cc b/matlab/src/cpp/arrow/matlab/tabular/proxy/record_batch.cc index ed30472f6c4..e159e926ec5 100644 --- a/matlab/src/cpp/arrow/matlab/tabular/proxy/record_batch.cc +++ b/matlab/src/cpp/arrow/matlab/tabular/proxy/record_batch.cc @@ -56,6 +56,10 @@ namespace arrow::matlab::tabular::proxy { REGISTER_METHOD(RecordBatch, getColumnByIndex); } + std::shared_ptr RecordBatch::unwrap() { + return record_batch; + } + void RecordBatch::toString(libmexclass::proxy::method::Context& context) { namespace mda = ::matlab::data; MATLAB_ASSIGN_OR_ERROR_WITH_CONTEXT(const auto utf16_string, arrow::util::UTF8StringToUTF16(record_batch->ToString()), context, error::UNICODE_CONVERSION_ERROR_ID); diff --git a/matlab/src/cpp/arrow/matlab/tabular/proxy/record_batch.h b/matlab/src/cpp/arrow/matlab/tabular/proxy/record_batch.h index b5d741060a1..b8c038816b3 100644 --- a/matlab/src/cpp/arrow/matlab/tabular/proxy/record_batch.h +++ b/matlab/src/cpp/arrow/matlab/tabular/proxy/record_batch.h @@ -29,6 +29,8 @@ namespace arrow::matlab::tabular::proxy { virtual ~RecordBatch() {} + std::shared_ptr unwrap(); + static libmexclass::proxy::MakeResult make(const libmexclass::proxy::FunctionArguments& constructor_arguments); protected: From 3c4f75a406fa5cfc281c697cc213543e83a8a908 Mon Sep 17 00:00:00 2001 From: Sarah Gilmore Date: Fri, 4 Aug 2023 16:30:02 -0400 Subject: [PATCH 09/17] Add extracting record batch from ID to writeRecordBatch C++ method --- .../matlab/io/feather/proxy/feather_writer.cc | 16 ++++++++++++++++ .../matlab/io/feather/proxy/feather_writer.h | 1 + 2 files changed, 17 insertions(+) diff --git a/matlab/src/cpp/arrow/matlab/io/feather/proxy/feather_writer.cc b/matlab/src/cpp/arrow/matlab/io/feather/proxy/feather_writer.cc index ae8d12afce7..e220976ad66 100644 --- a/matlab/src/cpp/arrow/matlab/io/feather/proxy/feather_writer.cc +++ b/matlab/src/cpp/arrow/matlab/io/feather/proxy/feather_writer.cc @@ -16,15 +16,19 @@ // under the License. #include "arrow/matlab/io/feather/proxy/feather_writer.h" +#include "arrow/matlab/tabular/proxy/record_batch.h" #include "arrow/matlab/error/error.h" #include "arrow/result.h" #include "arrow/util/utf8.h" +#include "libmexclass/proxy/ProxyManager.h" + namespace arrow::matlab::io::feather::proxy { FeatherWriter::FeatherWriter(const std::string& filename) : filename{filename} { REGISTER_METHOD(FeatherWriter, getFilename); + REGISTER_METHOD(FeatherWriter, writeRecordBatch); } libmexclass::proxy::MakeResult FeatherWriter::make(const libmexclass::proxy::FunctionArguments& constructor_arguments) { @@ -51,4 +55,16 @@ namespace arrow::matlab::io::feather::proxy { context.outputs[0] = str_mda; } + void FeatherWriter::writeRecordBatch(libmexclass::proxy::method::Context& context) { + namespace mda = ::matlab::data; + mda::StructArray opts = context.inputs[0]; + const mda::TypedArray record_batch_proxy_id_mda = opts[0]["RecordBatchProxyID"]; + const uint64_t record_batch_proxy_id = record_batch_proxy_id_mda[0]; + + auto proxy = libmexclass::proxy::ProxyManager::getProxy(record_batch_proxy_id); + auto record_batch_proxy = std::static_pointer_cast(proxy); + auto record_batch = record_batch_proxy->unwrap(); + } + + } diff --git a/matlab/src/cpp/arrow/matlab/io/feather/proxy/feather_writer.h b/matlab/src/cpp/arrow/matlab/io/feather/proxy/feather_writer.h index 9a583ed6a76..ae1223ebc2b 100644 --- a/matlab/src/cpp/arrow/matlab/io/feather/proxy/feather_writer.h +++ b/matlab/src/cpp/arrow/matlab/io/feather/proxy/feather_writer.h @@ -33,6 +33,7 @@ namespace arrow::matlab::io::feather::proxy { protected: void getFilename(libmexclass::proxy::method::Context& context); + void writeRecordBatch(libmexclass::proxy::method::Context& context); private: std::string filename; From 094124b6a6323b4355f83b861767dbfe38e4d1f8 Mon Sep 17 00:00:00 2001 From: Sarah Gilmore Date: Fri, 4 Aug 2023 16:39:11 -0400 Subject: [PATCH 10/17] Add opening file for write --- .../arrow/matlab/io/feather/proxy/feather_writer.cc | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/matlab/src/cpp/arrow/matlab/io/feather/proxy/feather_writer.cc b/matlab/src/cpp/arrow/matlab/io/feather/proxy/feather_writer.cc index e220976ad66..d0cfacc2e36 100644 --- a/matlab/src/cpp/arrow/matlab/io/feather/proxy/feather_writer.cc +++ b/matlab/src/cpp/arrow/matlab/io/feather/proxy/feather_writer.cc @@ -22,6 +22,9 @@ #include "arrow/result.h" #include "arrow/util/utf8.h" +#include "arrow/io/file.h" +#include "arrow/ipc/feather.h" + #include "libmexclass/proxy/ProxyManager.h" namespace arrow::matlab::io::feather::proxy { @@ -64,6 +67,13 @@ namespace arrow::matlab::io::feather::proxy { auto proxy = libmexclass::proxy::ProxyManager::getProxy(record_batch_proxy_id); auto record_batch_proxy = std::static_pointer_cast(proxy); auto record_batch = record_batch_proxy->unwrap(); + + MATLAB_ASSIGN_OR_ERROR_WITH_CONTEXT(std::shared_ptr output_stream, + arrow::io::FileOutputStream::Open(filename), + context, + "arrow:io:feather:FailedToOpenFileForWrite"); + + } From 665f5aa6c2436590df3e8e03e12f173a68441a4e Mon Sep 17 00:00:00 2001 From: Sarah Gilmore Date: Fri, 4 Aug 2023 16:52:05 -0400 Subject: [PATCH 11/17] Convert record batch to table and call WriteTable --- .../matlab/io/feather/proxy/feather_writer.cc | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/matlab/src/cpp/arrow/matlab/io/feather/proxy/feather_writer.cc b/matlab/src/cpp/arrow/matlab/io/feather/proxy/feather_writer.cc index d0cfacc2e36..03ccce4a08b 100644 --- a/matlab/src/cpp/arrow/matlab/io/feather/proxy/feather_writer.cc +++ b/matlab/src/cpp/arrow/matlab/io/feather/proxy/feather_writer.cc @@ -20,6 +20,7 @@ #include "arrow/matlab/error/error.h" #include "arrow/result.h" +#include "arrow/table.h" #include "arrow/util/utf8.h" #include "arrow/io/file.h" @@ -68,13 +69,24 @@ namespace arrow::matlab::io::feather::proxy { auto record_batch_proxy = std::static_pointer_cast(proxy); auto record_batch = record_batch_proxy->unwrap(); + MATLAB_ASSIGN_OR_ERROR_WITH_CONTEXT(auto table, + arrow::Table::FromRecordBatches({record_batch}), + context, + "arrow:io:feather:FailedToCreateTableFromRecordBatch"); + + MATLAB_ASSIGN_OR_ERROR_WITH_CONTEXT(std::shared_ptr output_stream, arrow::io::FileOutputStream::Open(filename), context, "arrow:io:feather:FailedToOpenFileForWrite"); + // Specify the feather file format version as V1 + arrow::ipc::feather::WriteProperties write_props; + write_props.version = arrow::ipc::feather::kFeatherV1Version; + // Write the Feather file metadata to the end of the file. + MATLAB_ERROR_IF_NOT_OK_WITH_CONTEXT(ipc::feather::WriteTable(*table, output_stream.get(), write_props), + context, + "arrow:io:feather:FailedToWriteTable"); } - - } From d7b7d4459e0670b0599dfbbfc054fe470136b908 Mon Sep 17 00:00:00 2001 From: Sarah Gilmore Date: Fri, 4 Aug 2023 16:53:24 -0400 Subject: [PATCH 12/17] Remove stub comments --- matlab/src/matlab/+arrow/+internal/+io/+feather/Writer.m | 2 -- 1 file changed, 2 deletions(-) diff --git a/matlab/src/matlab/+arrow/+internal/+io/+feather/Writer.m b/matlab/src/matlab/+arrow/+internal/+io/+feather/Writer.m index 31f25b56ebf..2710e9e3d91 100644 --- a/matlab/src/matlab/+arrow/+internal/+io/+feather/Writer.m +++ b/matlab/src/matlab/+arrow/+internal/+io/+feather/Writer.m @@ -15,8 +15,6 @@ % implied. See the License for the specific language governing % permissions and limitations under the License. classdef Writer < matlab.mixin.Scalar - %UNTITLED2 Summary of this class goes here - % Detailed explanation goes here properties(Hidden, SetAccess=private, GetAccess=public) Proxy From 6ca5aff10cababfa650435758192cb35150d4bf7 Mon Sep 17 00:00:00 2001 From: Sarah Gilmore Date: Fri, 4 Aug 2023 17:00:18 -0400 Subject: [PATCH 13/17] Add error id static const variable --- matlab/src/cpp/arrow/matlab/error/error.h | 4 ++++ .../cpp/arrow/matlab/io/feather/proxy/feather_writer.cc | 7 +++---- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/matlab/src/cpp/arrow/matlab/error/error.h b/matlab/src/cpp/arrow/matlab/error/error.h index b7c0d7d696d..e1d2982f282 100644 --- a/matlab/src/cpp/arrow/matlab/error/error.h +++ b/matlab/src/cpp/arrow/matlab/error/error.h @@ -180,4 +180,8 @@ namespace arrow::matlab::error { static const char* UNKNOWN_PROXY_FOR_ARRAY_TYPE = "arrow:array:UnknownProxyForArrayType"; static const char* RECORD_BATCH_NUMERIC_INDEX_WITH_EMPTY_RECORD_BATCH = "arrow:tabular:recordbatch:NumericIndexWithEmptyRecordBatch"; static const char* RECORD_BATCH_INVALID_NUMERIC_COLUMN_INDEX = "arrow:tabular:recordbatch:InvalidNumericColumnIndex"; + static const char* FAILED_TO_OPEN_FILE_FOR_WRITE = "arrow:io:FailedToOpenFileForWrite"; + static const char* FEATHER_FAILED_TO_WRITE_TABLE = "arrow:io:feather:FailedToWriteTable"; + static const char* TABLE_FROM_RECORD_BATCH = "arrow:table:FromRecordBatch"; + } diff --git a/matlab/src/cpp/arrow/matlab/io/feather/proxy/feather_writer.cc b/matlab/src/cpp/arrow/matlab/io/feather/proxy/feather_writer.cc index 03ccce4a08b..c2650f8c6f8 100644 --- a/matlab/src/cpp/arrow/matlab/io/feather/proxy/feather_writer.cc +++ b/matlab/src/cpp/arrow/matlab/io/feather/proxy/feather_writer.cc @@ -72,13 +72,12 @@ namespace arrow::matlab::io::feather::proxy { MATLAB_ASSIGN_OR_ERROR_WITH_CONTEXT(auto table, arrow::Table::FromRecordBatches({record_batch}), context, - "arrow:io:feather:FailedToCreateTableFromRecordBatch"); - + error::TABLE_FROM_RECORD_BATCH); MATLAB_ASSIGN_OR_ERROR_WITH_CONTEXT(std::shared_ptr output_stream, arrow::io::FileOutputStream::Open(filename), context, - "arrow:io:feather:FailedToOpenFileForWrite"); + error::FAILED_TO_OPEN_FILE_FOR_WRITE); // Specify the feather file format version as V1 arrow::ipc::feather::WriteProperties write_props; @@ -87,6 +86,6 @@ namespace arrow::matlab::io::feather::proxy { // Write the Feather file metadata to the end of the file. MATLAB_ERROR_IF_NOT_OK_WITH_CONTEXT(ipc::feather::WriteTable(*table, output_stream.get(), write_props), context, - "arrow:io:feather:FailedToWriteTable"); + error::FEATHER_FAILED_TO_WRITE_TABLE); } } From 6073fc827e80bd12e2a1cce5e9bd2ac266c59d7f Mon Sep 17 00:00:00 2001 From: Sarah Gilmore Date: Mon, 7 Aug 2023 11:59:58 -0400 Subject: [PATCH 14/17] Add basic test for writing feather files --- matlab/test/arrow/io/feather/tRoundTrip.m | 53 +++++++++++++++++++++++ 1 file changed, 53 insertions(+) create mode 100644 matlab/test/arrow/io/feather/tRoundTrip.m diff --git a/matlab/test/arrow/io/feather/tRoundTrip.m b/matlab/test/arrow/io/feather/tRoundTrip.m new file mode 100644 index 00000000000..4ef8a02a5ac --- /dev/null +++ b/matlab/test/arrow/io/feather/tRoundTrip.m @@ -0,0 +1,53 @@ +%TROUNDTRIP Round trip tests for feather. + +% Licensed to the Apache Software Foundation (ASF) under one or more +% contributor license agreements. See the NOTICE file distributed with +% this work for additional information regarding copyright ownership. +% The ASF licenses this file to you under the Apache License, Version +% 2.0 (the "License"); you may not use this file except in compliance +% with the License. You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +% implied. See the License for the specific language governing +% permissions and limitations under the License. +classdef tRoundTrip < matlab.unittest.TestCase + + methods(TestClassSetup) + % Delet once arrow.internal.io.feather.Reader is submitted. + function addFeatherFunctionsToMATLABPath(testCase) + import matlab.unittest.fixtures.PathFixture + % Add Feather test utilities to the MATLAB path. + testCase.applyFixture(PathFixture('../../../util')); + % arrow.cpp.call must be on the MATLAB path. + testCase.assertTrue(~isempty(which('arrow.cpp.call')), ... + '''arrow.cpp.call'' must be on the MATLAB path. Use ''addpath'' to add folders to the MATLAB path.'); + end + end + + methods(Test) + function Basic(testCase) + import matlab.unittest.fixtures.TemporaryFolderFixture + + fixture = testCase.applyFixture(TemporaryFolderFixture); + filename = fullfile(fixture.Folder, "temp.feather"); + + DoubleVar = [10; 20; 30; 40]; + SingleVar = single([10; 15; 20; 25]); + tWrite = table(DoubleVar, SingleVar); + + featherwrite(tWrite, filename); + tRead = featherread(filename); + testCase.verifyEqual(tWrite, tRead); + end + end +end + +function featherwrite(T, filename) + disp("hi"); + writer = arrow.internal.io.feather.Writer(filename); + writer.write(T); +end \ No newline at end of file From 8ed68c2e4f47a5fc7dc9cc40dbc86fa1e8f25852 Mon Sep 17 00:00:00 2001 From: Sarah Gilmore Date: Mon, 7 Aug 2023 12:00:45 -0400 Subject: [PATCH 15/17] Remove disp statement in test --- matlab/test/arrow/io/feather/tRoundTrip.m | 1 - 1 file changed, 1 deletion(-) diff --git a/matlab/test/arrow/io/feather/tRoundTrip.m b/matlab/test/arrow/io/feather/tRoundTrip.m index 4ef8a02a5ac..ed31460f7de 100644 --- a/matlab/test/arrow/io/feather/tRoundTrip.m +++ b/matlab/test/arrow/io/feather/tRoundTrip.m @@ -47,7 +47,6 @@ function Basic(testCase) end function featherwrite(T, filename) - disp("hi"); writer = arrow.internal.io.feather.Writer(filename); writer.write(T); end \ No newline at end of file From 3554a922e512c196a3036658af7d975bf46d478a Mon Sep 17 00:00:00 2001 From: Sarah Gilmore Date: Mon, 7 Aug 2023 14:52:29 -0400 Subject: [PATCH 16/17] Address code feedback --- .../cpp/arrow/matlab/io/feather/proxy/feather_writer.cc | 7 +++---- .../src/cpp/arrow/matlab/io/feather/proxy/feather_writer.h | 4 ++-- matlab/src/matlab/+arrow/+internal/+io/+feather/Writer.m | 5 ++--- matlab/tools/cmake/BuildMatlabArrowInterface.cmake | 3 +-- 4 files changed, 8 insertions(+), 11 deletions(-) diff --git a/matlab/src/cpp/arrow/matlab/io/feather/proxy/feather_writer.cc b/matlab/src/cpp/arrow/matlab/io/feather/proxy/feather_writer.cc index c2650f8c6f8..a27e1fb0e62 100644 --- a/matlab/src/cpp/arrow/matlab/io/feather/proxy/feather_writer.cc +++ b/matlab/src/cpp/arrow/matlab/io/feather/proxy/feather_writer.cc @@ -32,7 +32,7 @@ namespace arrow::matlab::io::feather::proxy { FeatherWriter::FeatherWriter(const std::string& filename) : filename{filename} { REGISTER_METHOD(FeatherWriter, getFilename); - REGISTER_METHOD(FeatherWriter, writeRecordBatch); + REGISTER_METHOD(FeatherWriter, write); } libmexclass::proxy::MakeResult FeatherWriter::make(const libmexclass::proxy::FunctionArguments& constructor_arguments) { @@ -59,7 +59,7 @@ namespace arrow::matlab::io::feather::proxy { context.outputs[0] = str_mda; } - void FeatherWriter::writeRecordBatch(libmexclass::proxy::method::Context& context) { + void FeatherWriter::write(libmexclass::proxy::method::Context& context) { namespace mda = ::matlab::data; mda::StructArray opts = context.inputs[0]; const mda::TypedArray record_batch_proxy_id_mda = opts[0]["RecordBatchProxyID"]; @@ -69,7 +69,7 @@ namespace arrow::matlab::io::feather::proxy { auto record_batch_proxy = std::static_pointer_cast(proxy); auto record_batch = record_batch_proxy->unwrap(); - MATLAB_ASSIGN_OR_ERROR_WITH_CONTEXT(auto table, + MATLAB_ASSIGN_OR_ERROR_WITH_CONTEXT(const auto table, arrow::Table::FromRecordBatches({record_batch}), context, error::TABLE_FROM_RECORD_BATCH); @@ -83,7 +83,6 @@ namespace arrow::matlab::io::feather::proxy { arrow::ipc::feather::WriteProperties write_props; write_props.version = arrow::ipc::feather::kFeatherV1Version; - // Write the Feather file metadata to the end of the file. MATLAB_ERROR_IF_NOT_OK_WITH_CONTEXT(ipc::feather::WriteTable(*table, output_stream.get(), write_props), context, error::FEATHER_FAILED_TO_WRITE_TABLE); diff --git a/matlab/src/cpp/arrow/matlab/io/feather/proxy/feather_writer.h b/matlab/src/cpp/arrow/matlab/io/feather/proxy/feather_writer.h index ae1223ebc2b..dadb4798878 100644 --- a/matlab/src/cpp/arrow/matlab/io/feather/proxy/feather_writer.h +++ b/matlab/src/cpp/arrow/matlab/io/feather/proxy/feather_writer.h @@ -33,9 +33,9 @@ namespace arrow::matlab::io::feather::proxy { protected: void getFilename(libmexclass::proxy::method::Context& context); - void writeRecordBatch(libmexclass::proxy::method::Context& context); + void write(libmexclass::proxy::method::Context& context); private: - std::string filename; + const std::string filename; }; } diff --git a/matlab/src/matlab/+arrow/+internal/+io/+feather/Writer.m b/matlab/src/matlab/+arrow/+internal/+io/+feather/Writer.m index 2710e9e3d91..470c41fd5b2 100644 --- a/matlab/src/matlab/+arrow/+internal/+io/+feather/Writer.m +++ b/matlab/src/matlab/+arrow/+internal/+io/+feather/Writer.m @@ -32,14 +32,13 @@ args = struct(Filename=filename); proxyName = "arrow.io.feather.proxy.FeatherWriter"; - obj.Proxy = libmexclass.proxy.Proxy(Name=proxyName, ... - ConstructorArguments={args}); + obj.Proxy = arrow.internal.proxy.create(proxyName, args); end function write(obj, T) rb = arrow.recordbatch(T); args = struct(RecordBatchProxyID=rb.Proxy.ID); - obj.Proxy.writeRecordBatch(args); + obj.Proxy.write(args); end function filename = get.Filename(obj) diff --git a/matlab/tools/cmake/BuildMatlabArrowInterface.cmake b/matlab/tools/cmake/BuildMatlabArrowInterface.cmake index ec19bf78f9a..1d579994176 100644 --- a/matlab/tools/cmake/BuildMatlabArrowInterface.cmake +++ b/matlab/tools/cmake/BuildMatlabArrowInterface.cmake @@ -39,8 +39,7 @@ set(MATLAB_ARROW_LIBMEXCLASS_CLIENT_PROXY_INCLUDE_DIR "${CMAKE_SOURCE_DIR}/src/c "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/error" "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/type" "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/type/proxy" - "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/buffer" - "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/io/feather/proxy") + "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/buffer") set(MATLAB_ARROW_LIBMEXCLASS_CLIENT_PROXY_SOURCES "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/array/proxy/array.cc" "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/array/proxy/boolean_array.cc" From 241c0d7520b9fd59903f6fac125540f9979aa03a Mon Sep 17 00:00:00 2001 From: Sarah Gilmore Date: Mon, 7 Aug 2023 14:53:19 -0400 Subject: [PATCH 17/17] Fix typo --- matlab/test/arrow/io/feather/tRoundTrip.m | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/matlab/test/arrow/io/feather/tRoundTrip.m b/matlab/test/arrow/io/feather/tRoundTrip.m index ed31460f7de..d56152be6d1 100644 --- a/matlab/test/arrow/io/feather/tRoundTrip.m +++ b/matlab/test/arrow/io/feather/tRoundTrip.m @@ -17,7 +17,7 @@ classdef tRoundTrip < matlab.unittest.TestCase methods(TestClassSetup) - % Delet once arrow.internal.io.feather.Reader is submitted. + % Delete once arrow.internal.io.feather.Reader is submitted. function addFeatherFunctionsToMATLABPath(testCase) import matlab.unittest.fixtures.PathFixture % Add Feather test utilities to the MATLAB path.