Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions cpp/src/arrow/ipc/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ set(FBS_SRC
${ARROW_SOURCE_DIR}/../format/File.fbs
${ARROW_SOURCE_DIR}/../format/Schema.fbs
${ARROW_SOURCE_DIR}/../format/Tensor.fbs
${ARROW_SOURCE_DIR}/../format/SparseTensor.fbs
${CMAKE_CURRENT_SOURCE_DIR}/feather.fbs)

foreach(FIL ${FBS_SRC})
Expand Down
3 changes: 2 additions & 1 deletion cpp/src/arrow/ipc/metadata-internal.cc
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,8 @@
#include "arrow/io/interfaces.h"
#include "arrow/ipc/File_generated.h" // IWYU pragma: keep
#include "arrow/ipc/Message_generated.h"
#include "arrow/ipc/Tensor_generated.h" // IWYU pragma: keep
#include "arrow/ipc/SparseTensor_generated.h" // IWYU pragma: keep
#include "arrow/ipc/Tensor_generated.h" // IWYU pragma: keep
#include "arrow/ipc/message.h"
#include "arrow/ipc/util.h"
#include "arrow/sparse_tensor.h"
Expand Down
2 changes: 1 addition & 1 deletion docs/source/format/README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ Currently, the Arrow specification consists of these pieces:
- Logical Types, Schemas, and Record Batch Metadata (see Schema.fbs)
- Encapsulated Messages (see Message.fbs)
- Mechanics of messaging between Arrow systems (IPC, RPC, etc.) (see :doc:`IPC`)
- Tensor (Multi-dimensional array) Metadata (see Tensor.fbs)
- Tensor (Multi-dimensional array) Metadata (see Tensor.fbs and SparseTensor.fbs)

The metadata currently uses Google's `flatbuffers library`_ for serializing a
couple related pieces of information:
Expand Down
1 change: 1 addition & 0 deletions format/Message.fbs
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
// under the License.

include "Schema.fbs";
include "SparseTensor.fbs";
include "Tensor.fbs";

namespace org.apache.arrow.flatbuf;
Expand Down
116 changes: 116 additions & 0 deletions format/SparseTensor.fbs
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

/// EXPERIMENTAL: Metadata for n-dimensional sparse arrays, aka "sparse tensors".
/// Arrow implementations in general are not required to implement this type

include "Tensor.fbs";

namespace org.apache.arrow.flatbuf;

/// ----------------------------------------------------------------------
/// EXPERIMENTAL: Data structures for sparse tensors

/// Coodinate format of sparse tensor index.
table SparseTensorIndexCOO {
/// COO's index list are represented as a NxM matrix,
/// where N is the number of non-zero values,
/// and M is the number of dimensions of a sparse tensor.
/// indicesBuffer stores the location and size of this index matrix.
/// The type of index value is long, so the stride for the index matrix is unnecessary.
///
/// For example, let X be a 2x3x4x5 tensor, and it has the following 6 non-zero values:
///
/// X[0, 1, 2, 0] := 1
/// X[1, 1, 2, 3] := 2
/// X[0, 2, 1, 0] := 3
/// X[0, 1, 3, 0] := 4
/// X[0, 1, 2, 1] := 5
/// X[1, 2, 0, 4] := 6
///
/// In COO format, the index matrix of X is the following 4x6 matrix:
///
/// [[0, 0, 0, 0, 1, 1],
/// [1, 1, 1, 2, 1, 2],
/// [2, 2, 3, 1, 2, 0],
/// [0, 1, 0, 0, 3, 4]]
///
/// Note that the indices are sorted in lexcographical order.
indicesBuffer: Buffer;
}

/// Compressed Sparse Row format, that is matrix-specific.
table SparseMatrixIndexCSR {
/// indptrBuffer stores the location and size of indptr array that
/// represents the range of the rows.
/// The i-th row spans from indptr[i] to indptr[i+1] in the data.
/// The length of this array is 1 + (the number of rows), and the type
/// of index value is long.
///
/// For example, let X be the following 6x4 matrix:
///
/// X := [[0, 1, 2, 0],
/// [0, 0, 3, 0],
/// [0, 4, 0, 5],
/// [0, 0, 0, 0],
/// [6, 0, 7, 8],
/// [0, 9, 0, 0]].
///
/// The array of non-zero values in X is:
///
/// values(X) = [1, 2, 3, 4, 5, 6, 7, 8, 9].
///
/// And the indptr of X is:
///
/// indptr(X) = [0, 2, 3, 5, 5, 8, 10].
indptrBuffer: Buffer;

/// indicesBuffer stores the location and size of the array that
/// contains the column indices of the corresponding non-zero values.
/// The type of index value is long.
///
/// For example, the indices of the above X is:
///
/// indices(X) = [1, 2, 2, 1, 3, 0, 2, 3, 1].
indicesBuffer: Buffer;
}

union SparseTensorIndex {
SparseTensorIndexCOO,
SparseMatrixIndexCSR
}

table SparseTensor {
/// The type of data contained in a value cell.
/// Currently only fixed-width value types are supported,
/// no strings or nested types.
type: Type;

/// The dimensions of the tensor, optionally named.
shape: [TensorDim];

/// The number of non-zero values in a sparse tensor.
non_zero_length: long;

/// Sparse tensor index
sparseIndex: SparseTensorIndex;

/// The location and size of the tensor's data
data: Buffer;
}

root_type SparseTensor;
93 changes: 0 additions & 93 deletions format/Tensor.fbs
Original file line number Diff line number Diff line change
Expand Up @@ -51,96 +51,3 @@ table Tensor {
}

root_type Tensor;

/// ----------------------------------------------------------------------
/// EXPERIMENTAL: Data structures for sparse tensors

/// Coodinate format of sparse tensor index.
table SparseTensorIndexCOO {
/// COO's index list are represented as a NxM matrix,
/// where N is the number of non-zero values,
/// and M is the number of dimensions of a sparse tensor.
/// indicesBuffer stores the location and size of this index matrix.
/// The type of index value is long, so the stride for the index matrix is unnecessary.
///
/// For example, let X be a 2x3x4x5 tensor, and it has the following 6 non-zero values:
///
/// X[0, 1, 2, 0] := 1
/// X[1, 1, 2, 3] := 2
/// X[0, 2, 1, 0] := 3
/// X[0, 1, 3, 0] := 4
/// X[0, 1, 2, 1] := 5
/// X[1, 2, 0, 4] := 6
///
/// In COO format, the index matrix of X is the following 4x6 matrix:
///
/// [[0, 0, 0, 0, 1, 1],
/// [1, 1, 1, 2, 1, 2],
/// [2, 2, 3, 1, 2, 0],
/// [0, 1, 0, 0, 3, 4]]
///
/// Note that the indices are sorted in lexcographical order.
indicesBuffer: Buffer;
}

/// Compressed Sparse Row format, that is matrix-specific.
table SparseMatrixIndexCSR {
/// indptrBuffer stores the location and size of indptr array that
/// represents the range of the rows.
/// The i-th row spans from indptr[i] to indptr[i+1] in the data.
/// The length of this array is 1 + (the number of rows), and the type
/// of index value is long.
///
/// For example, let X be the following 6x4 matrix:
///
/// X := [[0, 1, 2, 0],
/// [0, 0, 3, 0],
/// [0, 4, 0, 5],
/// [0, 0, 0, 0],
/// [6, 0, 7, 8],
/// [0, 9, 0, 0]].
///
/// The array of non-zero values in X is:
///
/// values(X) = [1, 2, 3, 4, 5, 6, 7, 8, 9].
///
/// And the indptr of X is:
///
/// indptr(X) = [0, 2, 3, 5, 5, 8, 10].
indptrBuffer: Buffer;

/// indicesBuffer stores the location and size of the array that
/// contains the column indices of the corresponding non-zero values.
/// The type of index value is long.
///
/// For example, the indices of the above X is:
///
/// indices(X) = [1, 2, 2, 1, 3, 0, 2, 3, 1].
indicesBuffer: Buffer;
}

union SparseTensorIndex {
SparseTensorIndexCOO,
SparseMatrixIndexCSR
}

table SparseTensor {
/// The type of data contained in a value cell.
/// Currently only fixed-width value types are supported,
/// no strings or nested types.
type: Type;

/// The dimensions of the tensor, optionally named.
shape: [TensorDim];

/// The number of non-zero values in a sparse tensor.
non_zero_length: long;

/// Sparse tensor index
sparseIndex: SparseTensorIndex;

/// The location and size of the tensor's data
data: Buffer;
}

root_type SparseTensor;
1 change: 1 addition & 0 deletions java/format/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@
<argument>${flatc.generated.files}</argument>
<argument>../../format/Schema.fbs</argument>
<argument>../../format/Tensor.fbs</argument>
<argument>../../format/SparseTensor.fbs</argument>
<argument>../../format/File.fbs</argument>
<argument>../../format/Message.fbs</argument>
</arguments>
Expand Down