diff --git a/matlab/src/cpp/arrow/matlab/error/error.h b/matlab/src/cpp/arrow/matlab/error/error.h index b7c0d7d696d..e1d2982f282 100644 --- a/matlab/src/cpp/arrow/matlab/error/error.h +++ b/matlab/src/cpp/arrow/matlab/error/error.h @@ -180,4 +180,8 @@ namespace arrow::matlab::error { static const char* UNKNOWN_PROXY_FOR_ARRAY_TYPE = "arrow:array:UnknownProxyForArrayType"; static const char* RECORD_BATCH_NUMERIC_INDEX_WITH_EMPTY_RECORD_BATCH = "arrow:tabular:recordbatch:NumericIndexWithEmptyRecordBatch"; static const char* RECORD_BATCH_INVALID_NUMERIC_COLUMN_INDEX = "arrow:tabular:recordbatch:InvalidNumericColumnIndex"; + static const char* FAILED_TO_OPEN_FILE_FOR_WRITE = "arrow:io:FailedToOpenFileForWrite"; + static const char* FEATHER_FAILED_TO_WRITE_TABLE = "arrow:io:feather:FailedToWriteTable"; + static const char* TABLE_FROM_RECORD_BATCH = "arrow:table:FromRecordBatch"; + } diff --git a/matlab/src/cpp/arrow/matlab/io/feather/proxy/feather_writer.cc b/matlab/src/cpp/arrow/matlab/io/feather/proxy/feather_writer.cc new file mode 100644 index 00000000000..a27e1fb0e62 --- /dev/null +++ b/matlab/src/cpp/arrow/matlab/io/feather/proxy/feather_writer.cc @@ -0,0 +1,90 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "arrow/matlab/io/feather/proxy/feather_writer.h" +#include "arrow/matlab/tabular/proxy/record_batch.h" +#include "arrow/matlab/error/error.h" + +#include "arrow/result.h" +#include "arrow/table.h" +#include "arrow/util/utf8.h" + +#include "arrow/io/file.h" +#include "arrow/ipc/feather.h" + +#include "libmexclass/proxy/ProxyManager.h" + +namespace arrow::matlab::io::feather::proxy { + + FeatherWriter::FeatherWriter(const std::string& filename) : filename{filename} { + REGISTER_METHOD(FeatherWriter, getFilename); + REGISTER_METHOD(FeatherWriter, write); + } + + libmexclass::proxy::MakeResult FeatherWriter::make(const libmexclass::proxy::FunctionArguments& constructor_arguments) { + namespace mda = ::matlab::data; + mda::StructArray opts = constructor_arguments[0]; + const mda::StringArray filename_mda = opts[0]["Filename"]; + + const auto filename_utf16 = std::u16string(filename_mda[0]); + MATLAB_ASSIGN_OR_ERROR(const auto filename_utf8, + arrow::util::UTF16StringToUTF8(filename_utf16), + error::UNICODE_CONVERSION_ERROR_ID); + + return std::make_shared(filename_utf8); + } + + void FeatherWriter::getFilename(libmexclass::proxy::method::Context& context) { + namespace mda = ::matlab::data; + MATLAB_ASSIGN_OR_ERROR_WITH_CONTEXT(const auto utf16_filename, + arrow::util::UTF8StringToUTF16(filename), + context, + error::UNICODE_CONVERSION_ERROR_ID); + mda::ArrayFactory factory; + auto str_mda = factory.createScalar(utf16_filename); + context.outputs[0] = str_mda; + } + + void FeatherWriter::write(libmexclass::proxy::method::Context& context) { + namespace mda = ::matlab::data; + mda::StructArray opts = context.inputs[0]; + const mda::TypedArray record_batch_proxy_id_mda = opts[0]["RecordBatchProxyID"]; + const uint64_t record_batch_proxy_id = record_batch_proxy_id_mda[0]; + + auto proxy = libmexclass::proxy::ProxyManager::getProxy(record_batch_proxy_id); + auto record_batch_proxy = std::static_pointer_cast(proxy); + auto record_batch = record_batch_proxy->unwrap(); + + MATLAB_ASSIGN_OR_ERROR_WITH_CONTEXT(const auto table, + arrow::Table::FromRecordBatches({record_batch}), + context, + error::TABLE_FROM_RECORD_BATCH); + + MATLAB_ASSIGN_OR_ERROR_WITH_CONTEXT(std::shared_ptr output_stream, + arrow::io::FileOutputStream::Open(filename), + context, + error::FAILED_TO_OPEN_FILE_FOR_WRITE); + + // Specify the feather file format version as V1 + arrow::ipc::feather::WriteProperties write_props; + write_props.version = arrow::ipc::feather::kFeatherV1Version; + + MATLAB_ERROR_IF_NOT_OK_WITH_CONTEXT(ipc::feather::WriteTable(*table, output_stream.get(), write_props), + context, + error::FEATHER_FAILED_TO_WRITE_TABLE); + } +} diff --git a/matlab/src/cpp/arrow/matlab/io/feather/proxy/feather_writer.h b/matlab/src/cpp/arrow/matlab/io/feather/proxy/feather_writer.h new file mode 100644 index 00000000000..dadb4798878 --- /dev/null +++ b/matlab/src/cpp/arrow/matlab/io/feather/proxy/feather_writer.h @@ -0,0 +1,41 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include "arrow/status.h" + +#include "libmexclass/proxy/Proxy.h" + +namespace arrow::matlab::io::feather::proxy { + + class FeatherWriter : public libmexclass::proxy::Proxy { + public: + FeatherWriter(const std::string& filename); + + ~FeatherWriter() {} + + static libmexclass::proxy::MakeResult make(const libmexclass::proxy::FunctionArguments& constructor_arguments); + + protected: + void getFilename(libmexclass::proxy::method::Context& context); + void write(libmexclass::proxy::method::Context& context); + + private: + const std::string filename; + }; +} diff --git a/matlab/src/cpp/arrow/matlab/proxy/factory.cc b/matlab/src/cpp/arrow/matlab/proxy/factory.cc index 7d18c6c6b62..7a2a4f3192f 100644 --- a/matlab/src/cpp/arrow/matlab/proxy/factory.cc +++ b/matlab/src/cpp/arrow/matlab/proxy/factory.cc @@ -25,6 +25,7 @@ #include "arrow/matlab/type/proxy/string_type.h" #include "arrow/matlab/type/proxy/timestamp_type.h" #include "arrow/matlab/type/proxy/field.h" +#include "arrow/matlab/io/feather/proxy/feather_writer.h" #include "factory.h" @@ -60,6 +61,7 @@ libmexclass::proxy::MakeResult Factory::make_proxy(const ClassName& class_name, REGISTER_PROXY(arrow.type.proxy.BooleanType , arrow::matlab::type::proxy::PrimitiveCType); REGISTER_PROXY(arrow.type.proxy.StringType , arrow::matlab::type::proxy::StringType); REGISTER_PROXY(arrow.type.proxy.TimestampType , arrow::matlab::type::proxy::TimestampType); + REGISTER_PROXY(arrow.io.feather.proxy.FeatherWriter , arrow::matlab::io::feather::proxy::FeatherWriter); return libmexclass::error::Error{error::UNKNOWN_PROXY_ERROR_ID, "Did not find matching C++ proxy for " + class_name}; }; diff --git a/matlab/src/cpp/arrow/matlab/tabular/proxy/record_batch.cc b/matlab/src/cpp/arrow/matlab/tabular/proxy/record_batch.cc index ed30472f6c4..e159e926ec5 100644 --- a/matlab/src/cpp/arrow/matlab/tabular/proxy/record_batch.cc +++ b/matlab/src/cpp/arrow/matlab/tabular/proxy/record_batch.cc @@ -56,6 +56,10 @@ namespace arrow::matlab::tabular::proxy { REGISTER_METHOD(RecordBatch, getColumnByIndex); } + std::shared_ptr RecordBatch::unwrap() { + return record_batch; + } + void RecordBatch::toString(libmexclass::proxy::method::Context& context) { namespace mda = ::matlab::data; MATLAB_ASSIGN_OR_ERROR_WITH_CONTEXT(const auto utf16_string, arrow::util::UTF8StringToUTF16(record_batch->ToString()), context, error::UNICODE_CONVERSION_ERROR_ID); diff --git a/matlab/src/cpp/arrow/matlab/tabular/proxy/record_batch.h b/matlab/src/cpp/arrow/matlab/tabular/proxy/record_batch.h index b5d741060a1..b8c038816b3 100644 --- a/matlab/src/cpp/arrow/matlab/tabular/proxy/record_batch.h +++ b/matlab/src/cpp/arrow/matlab/tabular/proxy/record_batch.h @@ -29,6 +29,8 @@ namespace arrow::matlab::tabular::proxy { virtual ~RecordBatch() {} + std::shared_ptr unwrap(); + static libmexclass::proxy::MakeResult make(const libmexclass::proxy::FunctionArguments& constructor_arguments); protected: diff --git a/matlab/src/matlab/+arrow/+internal/+io/+feather/Writer.m b/matlab/src/matlab/+arrow/+internal/+io/+feather/Writer.m new file mode 100644 index 00000000000..470c41fd5b2 --- /dev/null +++ b/matlab/src/matlab/+arrow/+internal/+io/+feather/Writer.m @@ -0,0 +1,48 @@ +%WRITER Class for writing feather V1 files. + +% Licensed to the Apache Software Foundation (ASF) under one or more +% contributor license agreements. See the NOTICE file distributed with +% this work for additional information regarding copyright ownership. +% The ASF licenses this file to you under the Apache License, Version +% 2.0 (the "License"); you may not use this file except in compliance +% with the License. You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +% implied. See the License for the specific language governing +% permissions and limitations under the License. +classdef Writer < matlab.mixin.Scalar + + properties(Hidden, SetAccess=private, GetAccess=public) + Proxy + end + + properties(Dependent) + Filename + end + + methods + function obj = Writer(filename) + arguments + filename(1, 1) {mustBeNonmissing, mustBeNonzeroLengthText} + end + + args = struct(Filename=filename); + proxyName = "arrow.io.feather.proxy.FeatherWriter"; + obj.Proxy = arrow.internal.proxy.create(proxyName, args); + end + + function write(obj, T) + rb = arrow.recordbatch(T); + args = struct(RecordBatchProxyID=rb.Proxy.ID); + obj.Proxy.write(args); + end + + function filename = get.Filename(obj) + filename = obj.Proxy.getFilename(); + end + end +end \ No newline at end of file diff --git a/matlab/src/matlab/+arrow/+tabular/RecordBatch.m b/matlab/src/matlab/+arrow/+tabular/RecordBatch.m index 0d002797f01..be5eee7d89c 100644 --- a/matlab/src/matlab/+arrow/+tabular/RecordBatch.m +++ b/matlab/src/matlab/+arrow/+tabular/RecordBatch.m @@ -23,7 +23,7 @@ ColumnNames end - properties (Access=protected) + properties (Hidden, SetAccess=private, GetAccess=public) Proxy end diff --git a/matlab/test/arrow/io/feather/tRoundTrip.m b/matlab/test/arrow/io/feather/tRoundTrip.m new file mode 100644 index 00000000000..d56152be6d1 --- /dev/null +++ b/matlab/test/arrow/io/feather/tRoundTrip.m @@ -0,0 +1,52 @@ +%TROUNDTRIP Round trip tests for feather. + +% Licensed to the Apache Software Foundation (ASF) under one or more +% contributor license agreements. See the NOTICE file distributed with +% this work for additional information regarding copyright ownership. +% The ASF licenses this file to you under the Apache License, Version +% 2.0 (the "License"); you may not use this file except in compliance +% with the License. You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +% implied. See the License for the specific language governing +% permissions and limitations under the License. +classdef tRoundTrip < matlab.unittest.TestCase + + methods(TestClassSetup) + % Delete once arrow.internal.io.feather.Reader is submitted. + function addFeatherFunctionsToMATLABPath(testCase) + import matlab.unittest.fixtures.PathFixture + % Add Feather test utilities to the MATLAB path. + testCase.applyFixture(PathFixture('../../../util')); + % arrow.cpp.call must be on the MATLAB path. + testCase.assertTrue(~isempty(which('arrow.cpp.call')), ... + '''arrow.cpp.call'' must be on the MATLAB path. Use ''addpath'' to add folders to the MATLAB path.'); + end + end + + methods(Test) + function Basic(testCase) + import matlab.unittest.fixtures.TemporaryFolderFixture + + fixture = testCase.applyFixture(TemporaryFolderFixture); + filename = fullfile(fixture.Folder, "temp.feather"); + + DoubleVar = [10; 20; 30; 40]; + SingleVar = single([10; 15; 20; 25]); + tWrite = table(DoubleVar, SingleVar); + + featherwrite(tWrite, filename); + tRead = featherread(filename); + testCase.verifyEqual(tWrite, tRead); + end + end +end + +function featherwrite(T, filename) + writer = arrow.internal.io.feather.Writer(filename); + writer.write(T); +end \ No newline at end of file diff --git a/matlab/tools/cmake/BuildMatlabArrowInterface.cmake b/matlab/tools/cmake/BuildMatlabArrowInterface.cmake index f4696cfad26..1d579994176 100644 --- a/matlab/tools/cmake/BuildMatlabArrowInterface.cmake +++ b/matlab/tools/cmake/BuildMatlabArrowInterface.cmake @@ -55,7 +55,9 @@ set(MATLAB_ARROW_LIBMEXCLASS_CLIENT_PROXY_SOURCES "${CMAKE_SOURCE_DIR}/src/cpp/a "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/type/proxy/string_type.cc" "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/type/proxy/timestamp_type.cc" "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/type/proxy/field.cc" - "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/type/proxy/wrap.cc") + "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/type/proxy/wrap.cc" + "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/io/feather/proxy/feather_writer.cc") + set(MATLAB_ARROW_LIBMEXCLASS_CLIENT_PROXY_FACTORY_INCLUDE_DIR "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/proxy")