Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
88 changes: 88 additions & 0 deletions cpp/src/arrow/tensor/converter_internal.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

#pragma once

#include "arrow/tensor/converter.h"

#define DISPATCH(ACTION, index_elsize, value_elsize, ...) \
switch (index_elsize) { \
case 1: \
switch (value_elsize) { \
case 1: \
ACTION(uint8_t, uint8_t, __VA_ARGS__); \
break; \
case 2: \
ACTION(uint8_t, uint16_t, __VA_ARGS__); \
break; \
case 4: \
ACTION(uint8_t, uint32_t, __VA_ARGS__); \
break; \
case 8: \
ACTION(uint8_t, uint64_t, __VA_ARGS__); \
break; \
} \
break; \
case 2: \
switch (value_elsize) { \
case 1: \
ACTION(uint16_t, uint8_t, __VA_ARGS__); \
break; \
case 2: \
ACTION(uint16_t, uint16_t, __VA_ARGS__); \
break; \
case 4: \
ACTION(uint16_t, uint32_t, __VA_ARGS__); \
break; \
case 8: \
ACTION(uint16_t, uint64_t, __VA_ARGS__); \
break; \
} \
break; \
case 4: \
switch (value_elsize) { \
case 1: \
ACTION(uint32_t, uint8_t, __VA_ARGS__); \
break; \
case 2: \
ACTION(uint32_t, uint16_t, __VA_ARGS__); \
break; \
case 4: \
ACTION(uint32_t, uint32_t, __VA_ARGS__); \
break; \
case 8: \
ACTION(uint32_t, uint64_t, __VA_ARGS__); \
break; \
} \
break; \
case 8: \
switch (value_elsize) { \
case 1: \
ACTION(int64_t, uint8_t, __VA_ARGS__); \
break; \
case 2: \
ACTION(int64_t, uint16_t, __VA_ARGS__); \
break; \
case 4: \
ACTION(int64_t, uint32_t, __VA_ARGS__); \
break; \
case 8: \
ACTION(int64_t, uint64_t, __VA_ARGS__); \
break; \
} \
break; \
}
140 changes: 118 additions & 22 deletions cpp/src/arrow/tensor/coo_converter.cc
Original file line number Diff line number Diff line change
Expand Up @@ -15,16 +15,19 @@
// specific language governing permissions and limitations
// under the License.

#include "arrow/tensor/converter.h"
#include "arrow/tensor/converter_internal.h"

#include <algorithm>
#include <cstdint>
#include <memory>
#include <numeric>
#include <vector>

#include "arrow/buffer.h"
#include "arrow/status.h"
#include "arrow/type.h"
#include "arrow/util/checked_cast.h"
#include "arrow/util/macros.h"
#include "arrow/visitor_inline.h"

namespace arrow {
Expand All @@ -34,8 +37,9 @@ class MemoryPool;
namespace internal {
namespace {

inline void IncrementIndex(std::vector<int64_t>& coord,
const std::vector<int64_t>& shape) {
template <typename c_index_type>
inline void IncrementRowMajorIndex(std::vector<c_index_type>& coord,
const std::vector<int64_t>& shape) {
const int64_t ndim = shape.size();
++coord[ndim - 1];
if (coord[ndim - 1] == shape[ndim - 1]) {
Expand All @@ -48,6 +52,109 @@ inline void IncrementIndex(std::vector<int64_t>& coord,
}
}

template <typename c_index_type, typename c_value_type>
void ConvertRowMajorTensor(const Tensor& tensor, c_index_type* indices,
c_value_type* values, const int64_t size) {
const auto ndim = tensor.ndim();
const auto& shape = tensor.shape();
const c_value_type* tensor_data =
reinterpret_cast<const c_value_type*>(tensor.raw_data());

constexpr c_value_type zero = 0;
std::vector<c_index_type> coord(ndim, 0);
for (int64_t n = tensor.size(); n > 0; --n) {
const c_value_type x = *tensor_data;
if (ARROW_PREDICT_FALSE(x != zero)) {
std::copy(coord.begin(), coord.end(), indices);
*values++ = x;
indices += ndim;
}

IncrementRowMajorIndex(coord, shape);
++tensor_data;
}
}

template <typename c_index_type, typename c_value_type>
void ConvertColumnMajorTensor(const Tensor& tensor, c_index_type* out_indices,
c_value_type* out_values, const int64_t size) {
const auto ndim = tensor.ndim();
std::vector<c_index_type> indices(ndim * size);
std::vector<c_value_type> values(size);
ConvertRowMajorTensor(tensor, indices.data(), values.data(), size);

// transpose indices
for (int64_t i = 0; i < size; ++i) {
for (int j = 0; j < ndim / 2; ++j) {
std::swap(indices[i * ndim + j], indices[i * ndim + ndim - j - 1]);
}
}

// sort indices
std::vector<int64_t> order(size);
std::iota(order.begin(), order.end(), 0);
std::sort(order.begin(), order.end(), [&](const int64_t xi, const int64_t yi) {
const int64_t x_offset = xi * ndim;
const int64_t y_offset = yi * ndim;
for (int j = 0; j < ndim; ++j) {
const auto x = indices[x_offset + j];
const auto y = indices[y_offset + j];
if (x < y) return true;
if (x > y) return false;
}
return false;
});

// transfer result
const auto* indices_data = indices.data();
for (int64_t i = 0; i < size; ++i) {
out_values[i] = values[i];

std::copy_n(indices_data, ndim, out_indices);
indices_data += ndim;
out_indices += ndim;
}
}

template <typename c_index_type, typename c_value_type>
void ConvertStridedTensor(const Tensor& tensor, c_index_type* indices,
c_value_type* values, const int64_t size) {
using ValueType = typename CTypeTraits<c_value_type>::ArrowType;
const auto& shape = tensor.shape();
const auto ndim = tensor.ndim();
std::vector<int64_t> coord(ndim, 0);

constexpr c_value_type zero = 0;
c_value_type x;
int64_t i;
for (int64_t n = tensor.size(); n > 0; --n) {
x = tensor.Value<ValueType>(coord);
if (ARROW_PREDICT_FALSE(x != zero)) {
*values++ = x;
for (i = 0; i < ndim; ++i) {
*indices++ = static_cast<c_index_type>(coord[i]);
}
}

IncrementRowMajorIndex(coord, shape);
}
}

#define CONVERT_TENSOR(func, index_type, value_type, indices, values, size) \
func<index_type, value_type>(tensor_, reinterpret_cast<index_type*>(indices), \
reinterpret_cast<value_type*>(values), size)

// Using ARROW_EXPAND is necessary to expand __VA_ARGS__ correctly on VC++.
#define CONVERT_ROW_MAJOR_TENSOR(index_type, value_type, ...) \
ARROW_EXPAND(CONVERT_TENSOR(ConvertRowMajorTensor, index_type, value_type, __VA_ARGS__))

#define CONVERT_COLUMN_MAJOR_TENSOR(index_type, value_type, ...) \
ARROW_EXPAND( \
CONVERT_TENSOR(ConvertColumnMajorTensor, index_type, value_type, __VA_ARGS__))

#define CONVERT_STRIDED_TENSOR(index_type, value_type, ...) \
ARROW_EXPAND(CONVERT_TENSOR(ConvertStridedTensor, index_type, value_type, __VA_ARGS__))

// ----------------------------------------------------------------------
// SparseTensorConverter for SparseCOOIndex

Expand Down Expand Up @@ -92,26 +199,15 @@ class SparseCOOTensorConverter : private SparseTensorConverterMixin {
}
tensor_data += value_elsize;
}
} else if (tensor_.is_row_major()) {
DISPATCH(CONVERT_ROW_MAJOR_TENSOR, index_elsize, value_elsize, indices, values,
nonzero_count);
} else if (tensor_.is_column_major()) {
DISPATCH(CONVERT_COLUMN_MAJOR_TENSOR, index_elsize, value_elsize, indices, values,
nonzero_count);
} else {
const std::vector<int64_t>& shape = tensor_.shape();
std::vector<int64_t> coord(ndim, 0); // The current logical coordinates

for (int64_t n = tensor_.size(); n > 0; n--) {
int64_t offset = tensor_.CalculateValueOffset(coord);
if (std::any_of(tensor_data + offset, tensor_data + offset + value_elsize,
IsNonZero)) {
std::copy_n(tensor_data + offset, value_elsize, values);
values += value_elsize;

// Write indices in row-major order.
for (int64_t i = 0; i < ndim; ++i) {
AssignIndex(indices, coord[i], index_elsize);
indices += index_elsize;
}
}

IncrementIndex(coord, shape);
}
DISPATCH(CONVERT_STRIDED_TENSOR, index_elsize, value_elsize, indices, values,
nonzero_count);
}

// make results
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/arrow/tensor/csx_converter.cc
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,7 @@ Result<std::shared_ptr<Tensor>> MakeTensorFromSparseCSXMatrix(

const auto nc = shape[1];

int64_t offset;
int64_t offset = 0;
for (int64_t i = 0; i < indptr->size() - 1; ++i) {
const auto start =
SparseTensorConverterMixin::GetIndexValue(indptr_data, indptr_elsize);
Expand Down
1 change: 1 addition & 0 deletions cpp/src/arrow/util/macros.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@

#include <cstdint>

#define ARROW_EXPAND(x) x
#define ARROW_STRINGIFY(x) #x
#define ARROW_CONCAT(x, y) x##y

Expand Down