Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 2 additions & 4 deletions matlab/src/cpp/arrow/matlab/io/feather/proxy/reader.cc
Original file line number Diff line number Diff line change
Expand Up @@ -72,10 +72,8 @@ namespace arrow::matlab::io::feather::proxy {
std::shared_ptr<arrow::Table> table = nullptr;
MATLAB_ERROR_IF_NOT_OK_WITH_CONTEXT(reader->Read(&table), context, error::FEATHER_FAILED_TO_READ_TABLE);

// Get the first RecordBatch from the Table.
arrow::TableBatchReader table_batch_reader{table};
std::shared_ptr<arrow::RecordBatch> record_batch = nullptr;
MATLAB_ERROR_IF_NOT_OK_WITH_CONTEXT(table_batch_reader.ReadNext(&record_batch), context, error::FEATHER_FAILED_TO_READ_RECORD_BATCH);
// Combine all the chunks of the Table into a single RecordBatch.
MATLAB_ASSIGN_OR_ERROR_WITH_CONTEXT(const auto record_batch, table->CombineChunksToBatch(arrow::default_memory_pool()), context, error::FEATHER_FAILED_TO_READ_RECORD_BATCH);

// Create a Proxy from the first RecordBatch.
auto record_batch_proxy = std::make_shared<RecordBatchProxy>(record_batch);
Expand Down
85 changes: 33 additions & 52 deletions matlab/src/matlab/featherread.m
Original file line number Diff line number Diff line change
Expand Up @@ -23,64 +23,45 @@
% specific language governing permissions and limitations
% under the License.

import arrow.util.*;
arguments
filename(1, 1) string {mustBeNonmissing, mustBeNonzeroLengthText}
end

% Validate input arguments.
narginchk(1, 1);
filename = convertStringsToChars(filename);
if ~ischar(filename)
error('MATLAB:arrow:InvalidFilenameDatatype', ...
'Filename must be a character vector or string scalar.');
end
typesToCast = [arrow.type.ID.UInt8, ...
arrow.type.ID.UInt16, ...
arrow.type.ID.UInt32, ...
arrow.type.ID.UInt64, ...
arrow.type.ID.Int8, ...
arrow.type.ID.Int16, ...
arrow.type.ID.Int32, ...
arrow.type.ID.Int64, ...
arrow.type.ID.Boolean];

% FOPEN can be used to search for files without an extension on the MATLAB
% path.
fid = fopen(filename);
if fid ~= -1
filename = fopen(fid);
fclose(fid);
else
error('MATLAB:arrow:UnableToOpenFile', ...
'Unable to open file %s.', filename);
end
reader = arrow.internal.io.feather.Reader(filename);
recordBatch = reader.read();

% Read table variables and metadata from the given Feather file using
% libarrow.
[variables, metadata] = arrow.cpp.call('featherread', filename);
% Convert RecordBatch to a MATLAB table.
t = table(recordBatch);

% Make valid MATLAB table variable names out of any of the
% Feather table column names that are not valid MATLAB table
% variable names.
[variableNames, variableDescriptions] = makeValidMATLABTableVariableNames({variables.Name});

% Iterate over each table variable, handling invalid (null) entries
% and invalid MATLAB table variable names appropriately.
% Note: All Arrow arrays can have an associated validity (null) bitmap.
% The Apache Arrow specification defines 0 (false) to represent an
% invalid (null) array entry and 1 (true) to represent a valid
% (non-null) array entry.
for ii = 1:length(variables)
if ~all(variables(ii).Valid)
switch variables(ii).Type
case {'uint8', 'uint16', 'uint32', 'uint64', 'int8', 'int16', 'int32', 'int64'}
% MATLAB does not support missing values for integer types, so
% cast to double and set missing values to NaN in this case.
variables(ii).Data = double(variables(ii).Data);
% Cast integer and boolean columns containing null values
% to double. Substitute null values with NaN.
for ii = 1:recordBatch.NumColumns
array = recordBatch.column(ii);
type = array.Type.ID;
if any(type == typesToCast) && any(~array.Valid)
% Cast to double.
t.(ii) = double(t.(ii));
% Substitute null values with NaN.
t{~array.Valid, ii} = NaN;
end

% Set invalid (null) entries to the appropriate MATLAB missing value using
% logical indexing.
variables(ii).Data(~variables(ii).Valid) = missing;
end
end

% Construct a MATLAB table from the Feather file data.
t = table(variables.Data, 'VariableNames', cellstr(variableNames));

% Store original Feather table column names in the table.Properties.VariableDescriptions
% property if they were modified to be valid MATLAB table variable names.
if ~isempty(variableDescriptions)
t.Properties.VariableDescriptions = cellstr(variableDescriptions);
end
% Store original Feather table column names in the table.Properties.VariableDescriptions
% property if they were modified to be valid MATLAB table variable names.
modifiedColumnNameIndices = t.Properties.VariableNames ~= recordBatch.ColumnNames;
if any(modifiedColumnNameIndices)
originalColumnNames = recordBatch.ColumnNames(modifiedColumnNameIndices);
t.Properties.VariableDescriptions(modifiedColumnNameIndices) = compose("Original variable name: '%s'", originalColumnNames);
end

end
45 changes: 39 additions & 6 deletions matlab/test/tfeather.m
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@ function zeroByNTable(testCase)
function ErrorIfUnableToOpenFile(testCase)
filename = fullfile(pwd, 'temp.feather');

testCase.verifyError(@() featherread(filename), 'MATLAB:arrow:UnableToOpenFile');
testCase.verifyError(@() featherread(filename), 'arrow:io:FailedToOpenFileForRead');
end

function ErrorIfCorruptedFeatherFile(testCase)
Expand All @@ -156,16 +156,13 @@ function ErrorIfCorruptedFeatherFile(testCase)
fwrite(fileID, [1; 5]);
fclose(fileID);

testCase.verifyError(@() featherread(filename), 'MATLAB:arrow:status:Invalid');
testCase.verifyError(@() featherread(filename), 'arrow:io:feather:FailedToCreateReader');
end

function ErrorIfInvalidFilenameDatatype(testCase)
filename = fullfile(pwd, 'temp.feather');

t = createTable;

testCase.verifyError(@() featherwrite({table}, t), 'MATLAB:validation:UnableToConvert');
testCase.verifyError(@() featherread({filename}), 'MATLAB:arrow:InvalidFilenameDatatype');
end

function ErrorIfTooManyInputs(testCase)
Expand All @@ -179,7 +176,7 @@ function ErrorIfTooManyInputs(testCase)

function ErrorIfTooFewInputs(testCase)
testCase.verifyError(@() featherwrite(), 'MATLAB:minrhs');
testCase.verifyError(@() featherread(), 'MATLAB:narginchk:notEnoughInputs');
testCase.verifyError(@() featherread(), 'MATLAB:minrhs');
end

function ErrorIfMultiColVarExist(testCase)
Expand Down Expand Up @@ -218,5 +215,41 @@ function NumericComplexUnsupported(testCase)

testCase.verifyError(@() featherwrite(filename, actualTable) ,'arrow:array:ComplexNumeric');
end

function SupportedTypes(testCase)
filename = fullfile(pwd, 'temp.feather');

% Create a table with all supported MATLAB types.
expectedTable = table(int8 ([1, 2, 3]'), ...
int16 ([1, 2, 3]'), ...
int32 ([1, 2, 3]'), ...
int64 ([1, 2, 3]'), ...
uint8 ([1, 2, 3]'), ...
uint16 ([1, 2, 3]'), ...
uint32 ([1, 2, 3]'), ...
uint64 ([1, 2, 3]'), ...
logical([1, 0, 1]'), ...
single ([1, 2, 3]'), ...
double ([1, 2, 3]'), ...
string (["A", "B", "C"]'), ...
datetime(2023, 6, 28) + days(0:2)');

actualTable = featherRoundTrip(filename, expectedTable);
testCase.verifyEqual(actualTable, expectedTable);
end

function UnicodeVariableNames(testCase)
filename = fullfile(pwd, 'temp.feather');

smiley = "😀";
tree = "🌲";
mango = "🥭";
columnNames = [smiley, tree, mango];
expectedTable = table(1, 2, 3, VariableNames=columnNames);

actualTable = featherRoundTrip(filename, expectedTable);
testCase.verifyEqual(actualTable, expectedTable);
end

end
end
6 changes: 3 additions & 3 deletions matlab/test/tfeathermex.m
Original file line number Diff line number Diff line change
Expand Up @@ -50,17 +50,17 @@ function InvalidMATLABTableVariableNames(testCase)
filename = fullfile(pwd, 'temp.feather');

% Create a table with an invalid MATLAB table variable name.
invalidVariable = arrow.util.createVariableStruct('double', 1, true, '@');
invalidVariable = arrow.util.createVariableStruct('double', 1, true, ':');
validVariable = arrow.util.createVariableStruct('double', 1, true, 'Valid');
variables = [invalidVariable, validVariable];
metadata = arrow.util.createMetadataStruct(1, 2);
arrow.cpp.call('featherwrite', filename, variables, metadata);
t = featherread(filename);

testCase.verifyEqual(t.Properties.VariableNames{1}, 'x_');
testCase.verifyEqual(t.Properties.VariableNames{1}, ':_1');
testCase.verifyEqual(t.Properties.VariableNames{2}, 'Valid');

testCase.verifyEqual(t.Properties.VariableDescriptions{1}, 'Original variable name: ''@''');
testCase.verifyEqual(t.Properties.VariableDescriptions{1}, 'Original variable name: '':''');
testCase.verifyEqual(t.Properties.VariableDescriptions{2}, '');
end
end
Expand Down