From d7d64075fb5df00d25925190088d5ac58b4d6883 Mon Sep 17 00:00:00 2001 From: Wes McKinney Date: Thu, 23 Mar 2017 17:27:23 -0400 Subject: [PATCH 1/3] Draft Tensor flatbuffer type Change-Id: I29a980e132d31711a49ddd3a68824dfeca262d50 --- cpp/src/arrow/ipc/CMakeLists.txt | 1 + format/Message.fbs | 3 +- format/Tensor.fbs | 52 ++++++++++++++++++++++++++++++++ java/format/pom.xml | 3 +- 4 files changed, 57 insertions(+), 2 deletions(-) create mode 100644 format/Tensor.fbs diff --git a/cpp/src/arrow/ipc/CMakeLists.txt b/cpp/src/arrow/ipc/CMakeLists.txt index 056e7dba538..d6ee9309b44 100644 --- a/cpp/src/arrow/ipc/CMakeLists.txt +++ b/cpp/src/arrow/ipc/CMakeLists.txt @@ -114,6 +114,7 @@ set(FBS_SRC ${CMAKE_SOURCE_DIR}/../format/Message.fbs ${CMAKE_SOURCE_DIR}/../format/File.fbs ${CMAKE_SOURCE_DIR}/../format/Schema.fbs + ${CMAKE_SOURCE_DIR}/../format/Tensor.fbs ${CMAKE_CURRENT_SOURCE_DIR}/feather.fbs) foreach(FIL ${FBS_SRC}) diff --git a/format/Message.fbs b/format/Message.fbs index 2cb60953c6a..f4a95713cea 100644 --- a/format/Message.fbs +++ b/format/Message.fbs @@ -16,6 +16,7 @@ // under the License. include "Schema.fbs"; +include "Tensor.fbs"; namespace org.apache.arrow.flatbuf; @@ -82,7 +83,7 @@ table DictionaryBatch { /// which may include experimental metadata types. For maximum compatibility, /// it is best to send data using RecordBatch union MessageHeader { - Schema, DictionaryBatch, RecordBatch + Schema, DictionaryBatch, RecordBatch, Tensor } table Message { diff --git a/format/Tensor.fbs b/format/Tensor.fbs new file mode 100644 index 00000000000..7d07552c613 --- /dev/null +++ b/format/Tensor.fbs @@ -0,0 +1,52 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +/// EXPERIMENTAL: Metadata for n-dimensional arrays, aka "tensors" or +/// "ndarrays". Arrow implementations in general are not required to implement +/// this type + +include "Schema.fbs"; + +namespace org.apache.arrow.flatbuf; + +/// Shape data for a single axis in a tensor +table TensorDim { + /// Length of dimension + size: long; + + /// Name of the dimension, optional + name: string; +} + +table Tensor { + /// The type of data contained in a value cell. Currently only fixed-width + /// value types are supported, no strings or nested types + type: Type; + + /// The dimensions of the tensor, optionally named + shape: [TensorDim]; + + /// The size of a memory increment necessary to advance 1 cell along a given + /// axis. If the strides member is null or has 0 length, then the strides + /// will be computed from the shape according to row major order + strides: [long]; + + /// The location and size of the tensor's data + data: Buffer; +} + +root_type Tensor; diff --git a/java/format/pom.xml b/java/format/pom.xml index e7a58a4172f..98a113a30cf 100644 --- a/java/format/pom.xml +++ b/java/format/pom.xml @@ -110,8 +110,9 @@ -o ${flatc.generated.files} ../../format/Schema.fbs - ../../format/Message.fbs + ../../format/Tensor.fbs ../../format/File.fbs + ../../format/Message.fbs From 249a9d58540836415670ed35fd0c4208714bd09a Mon Sep 17 00:00:00 2001 From: Wes McKinney Date: Fri, 24 Mar 2017 11:09:50 -0400 Subject: [PATCH 2/3] Replace strides with TensorOrder enum for row major / column major Change-Id: I2ffa55305e770d9aa5c1ecdea74ee65b91589a8a --- format/Tensor.fbs | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/format/Tensor.fbs b/format/Tensor.fbs index 7d07552c613..23f0179dfec 100644 --- a/format/Tensor.fbs +++ b/format/Tensor.fbs @@ -32,6 +32,16 @@ table TensorDim { name: string; } +enum TensorOrder : short { + /// Higher dimensions vary first when traversing data in byte-contiguous + /// order, aka "C order" + ROW_MAJOR, + + /// Lower dimensions vary first when traversing data in byte-contiguous + /// order, aka "Fortran order" + COLUMN_MAJOR +} + table Tensor { /// The type of data contained in a value cell. Currently only fixed-width /// value types are supported, no strings or nested types @@ -40,10 +50,8 @@ table Tensor { /// The dimensions of the tensor, optionally named shape: [TensorDim]; - /// The size of a memory increment necessary to advance 1 cell along a given - /// axis. If the strides member is null or has 0 length, then the strides - /// will be computed from the shape according to row major order - strides: [long]; + /// The memory order of the tensor's data + order: TensorOrder; /// The location and size of the tensor's data data: Buffer; From afac56e8ba9057a898d9fcc655b22b8beca55042 Mon Sep 17 00:00:00 2001 From: Wes McKinney Date: Fri, 24 Mar 2017 11:28:02 -0400 Subject: [PATCH 3/3] Change TensorOrder enum to byte Change-Id: I129312f80ea6f561ee5a56b2393bda2ddcddefca --- format/Tensor.fbs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/format/Tensor.fbs b/format/Tensor.fbs index 23f0179dfec..bc5b6d1289b 100644 --- a/format/Tensor.fbs +++ b/format/Tensor.fbs @@ -32,7 +32,7 @@ table TensorDim { name: string; } -enum TensorOrder : short { +enum TensorOrder : byte { /// Higher dimensions vary first when traversing data in byte-contiguous /// order, aka "C order" ROW_MAJOR,