Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 10 additions & 2 deletions cpp/src/arrow/array.cc
Original file line number Diff line number Diff line change
Expand Up @@ -224,6 +224,8 @@ Status ListArray::FromArrays(const Array& offsets, const Array& values,

void ListArray::SetData(const std::shared_ptr<ArrayData>& data) {
this->Array::SetData(data);
DCHECK_EQ(data->buffers.size(), 2);

auto value_offsets = data->buffers[1];
raw_value_offsets_ = value_offsets == nullptr
? nullptr
Expand All @@ -246,6 +248,7 @@ BinaryArray::BinaryArray(const std::shared_ptr<ArrayData>& data) {
}

void BinaryArray::SetData(const std::shared_ptr<ArrayData>& data) {
DCHECK_EQ(data->buffers.size(), 3);
auto value_offsets = data->buffers[1];
auto value_data = data->buffers[2];
this->Array::SetData(data);
Expand Down Expand Up @@ -342,6 +345,7 @@ std::shared_ptr<Array> StructArray::field(int i) const {
if (!boxed_fields_[i]) {
DCHECK(MakeArray(data_->child_data[i], &boxed_fields_[i]).ok());
}
DCHECK(boxed_fields_[i]);
return boxed_fields_[i];
}

Expand All @@ -351,6 +355,8 @@ std::shared_ptr<Array> StructArray::field(int i) const {
void UnionArray::SetData(const std::shared_ptr<ArrayData>& data) {
this->Array::SetData(data);

DCHECK_EQ(data->buffers.size(), 3);

auto type_ids = data_->buffers[1];
auto value_offsets = data_->buffers[2];
raw_type_ids_ =
Expand Down Expand Up @@ -385,6 +391,7 @@ std::shared_ptr<Array> UnionArray::child(int i) const {
if (!boxed_fields_[i]) {
DCHECK(MakeArray(data_->child_data[i], &boxed_fields_[i]).ok());
}
DCHECK(boxed_fields_[i]);
return boxed_fields_[i];
}

Expand Down Expand Up @@ -594,10 +601,11 @@ class ArrayDataWrapper {

} // namespace internal

// Remove enclosing namespace after 0.7.0
Status MakeArray(const std::shared_ptr<ArrayData>& data, std::shared_ptr<Array>* out) {
internal::ArrayDataWrapper wrapper_visitor(data, out);
return VisitTypeInline(*data->type, &wrapper_visitor);
RETURN_NOT_OK(VisitTypeInline(*data->type, &wrapper_visitor));
DCHECK(out);
return Status::OK();
}

#ifndef ARROW_NO_DEPRECATED_API
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/arrow/array.h
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ struct Decimal;
/// input array and replace them with newly-allocated data, changing the output
/// data type as well.
struct ARROW_EXPORT ArrayData {
ArrayData() {}
ArrayData() : length(0) {}

ArrayData(const std::shared_ptr<DataType>& type, int64_t length,
int64_t null_count = kUnknownNullCount, int64_t offset = 0)
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/arrow/buffer.h
Original file line number Diff line number Diff line change
Expand Up @@ -201,7 +201,7 @@ class ARROW_EXPORT BufferBuilder {
if (elements == 0) {
return Status::OK();
}
if (capacity_ == 0) {
if (buffer_ == nullptr) {
buffer_ = std::make_shared<PoolBuffer>(pool_);
}
int64_t old_capacity = capacity_;
Expand Down
2 changes: 2 additions & 0 deletions cpp/src/arrow/io/hdfs-internal.cc
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@
#include <boost/filesystem.hpp> // NOLINT

#include "arrow/status.h"
#include "arrow/util/logging.h"

namespace fs = boost::filesystem;

Expand Down Expand Up @@ -346,6 +347,7 @@ bool LibHdfsShim::HasPread() {
tSize LibHdfsShim::Pread(hdfsFS fs, hdfsFile file, tOffset position, void* buffer,
tSize length) {
GET_SYMBOL(this, hdfsPread);
DCHECK(this->hdfsPread);
return this->hdfsPread(fs, file, position, buffer, length);
}

Expand Down
10 changes: 10 additions & 0 deletions cpp/src/arrow/ipc/message.cc
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
#include "arrow/ipc/Schema_generated.h"
#include "arrow/ipc/metadata-internal.h"
#include "arrow/status.h"
#include "arrow/util/logging.h"

namespace arrow {
namespace ipc {
Expand Down Expand Up @@ -194,9 +195,18 @@ std::string FormatMessageType(Message::Type type) {

Status ReadMessage(int64_t offset, int32_t metadata_length, io::RandomAccessFile* file,
std::unique_ptr<Message>* message) {
DCHECK_GT(static_cast<size_t>(metadata_length), sizeof(int32_t));

std::shared_ptr<Buffer> buffer;
RETURN_NOT_OK(file->ReadAt(offset, metadata_length, &buffer));

if (buffer->size() < metadata_length) {
std::stringstream ss;
ss << "Expected to read " << metadata_length << " metadata bytes but got "
<< buffer->size();
return Status::Invalid(ss.str());
}

int32_t flatbuffer_size = *reinterpret_cast<const int32_t*>(buffer->data());

if (flatbuffer_size + static_cast<int>(sizeof(int32_t)) > metadata_length) {
Expand Down
8 changes: 7 additions & 1 deletion cpp/src/arrow/ipc/reader.cc
Original file line number Diff line number Diff line change
Expand Up @@ -349,7 +349,6 @@ Status ReadDictionary(const Buffer& metadata, const DictionaryTypeMap& dictionar
reinterpret_cast<const flatbuf::RecordBatch*>(dictionary_batch->data());
RETURN_NOT_OK(
ReadRecordBatch(batch_meta, dummy_schema, kMaxNestingDepth, file, &batch));

if (batch->num_columns() != 1) {
return Status::Invalid("Dictionary record batch must only contain one field");
}
Expand Down Expand Up @@ -526,6 +525,13 @@ class RecordBatchFileReader::RecordBatchFileReaderImpl {
int file_end_size = static_cast<int>(magic_size + sizeof(int32_t));
RETURN_NOT_OK(file_->ReadAt(footer_offset_ - file_end_size, file_end_size, &buffer));

const int64_t expected_footer_size = magic_size + sizeof(int32_t);
if (buffer->size() < expected_footer_size) {
std::stringstream ss;
ss << "Unable to read " << expected_footer_size << "from end of file";
return Status::Invalid(ss.str());
}

if (memcmp(buffer->data() + sizeof(int32_t), kArrowMagicBytes, magic_size)) {
return Status::Invalid("Not an Arrow file");
}
Expand Down
3 changes: 2 additions & 1 deletion cpp/src/arrow/memory_pool.cc
Original file line number Diff line number Diff line change
Expand Up @@ -112,8 +112,9 @@ Status DefaultMemoryPool::Reallocate(int64_t old_size, int64_t new_size, uint8_t
// Note: We cannot use realloc() here as it doesn't guarantee alignment.

// Allocate new chunk
uint8_t* out;
uint8_t* out = nullptr;
RETURN_NOT_OK(AllocateAligned(new_size, &out));
DCHECK(out);
// Copy contents and release old memory chunk
memcpy(out, *ptr, static_cast<size_t>(std::min(new_size, old_size)));
#ifdef _MSC_VER
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/arrow/python/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ endif()

set(ARROW_PYTHON_MIN_TEST_LIBS
arrow_python_test_main
arrow_python_shared
arrow_python_static
arrow_shared)

set(ARROW_PYTHON_TEST_LINK_LIBS ${ARROW_PYTHON_MIN_TEST_LIBS})
Expand Down
7 changes: 4 additions & 3 deletions cpp/src/arrow/python/arrow_to_pandas.cc
Original file line number Diff line number Diff line change
Expand Up @@ -615,8 +615,8 @@ static Status ConvertDecimals(PandasOptions options, const ChunkedArray& data,
PyAcquireGIL lock;
OwnedRef decimal_ref;
OwnedRef Decimal_ref;
RETURN_NOT_OK(ImportModule("decimal", &decimal_ref));
RETURN_NOT_OK(ImportFromModule(decimal_ref, "Decimal", &Decimal_ref));
RETURN_NOT_OK(internal::ImportModule("decimal", &decimal_ref));
RETURN_NOT_OK(internal::ImportFromModule(decimal_ref, "Decimal", &Decimal_ref));
PyObject* Decimal = Decimal_ref.obj();

for (int c = 0; c < data.num_chunks(); c++) {
Expand All @@ -633,7 +633,8 @@ static Status ConvertDecimals(PandasOptions options, const ChunkedArray& data,
const uint8_t* raw_value = arr->GetValue(i);
std::string decimal_string;
RETURN_NOT_OK(RawDecimalToString(raw_value, precision, scale, &decimal_string));
RETURN_NOT_OK(DecimalFromString(Decimal, decimal_string, out_values++));
*out_values++ = internal::DecimalFromString(Decimal, decimal_string);
RETURN_IF_PYERROR();
}
}
}
Expand Down
4 changes: 4 additions & 0 deletions cpp/src/arrow/python/arrow_to_python.cc
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,10 @@ Status DeserializeDict(PyObject* context, const Array& array, int64_t start_idx,
const auto& data = static_cast<const StructArray&>(array);
ScopedRef keys, vals;
ScopedRef result(PyDict_New());
RETURN_IF_PYERROR();

DCHECK_EQ(2, data.num_fields());

RETURN_NOT_OK(DeserializeList(context, *data.field(0), start_idx, stop_idx, base,
tensors, keys.ref()));
RETURN_NOT_OK(DeserializeList(context, *data.field(1), start_idx, stop_idx, base,
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/arrow/python/builtin_convert.cc
Original file line number Diff line number Diff line change
Expand Up @@ -656,7 +656,7 @@ class DecimalConverter
inline Status AppendItem(const OwnedRef& item) {
/// TODO(phillipc): Check for nan?
std::string string;
RETURN_NOT_OK(PythonDecimalToString(item.obj(), &string));
RETURN_NOT_OK(internal::PythonDecimalToString(item.obj(), &string));

Decimal128 value;
RETURN_NOT_OK(Decimal128::FromString(string, &value));
Expand Down
12 changes: 6 additions & 6 deletions cpp/src/arrow/python/helpers.cc
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,8 @@ std::shared_ptr<DataType> GetPrimitiveType(Type::type type) {
}
}

namespace internal {

Status ImportModule(const std::string& module_name, OwnedRef* ref) {
PyObject* module = PyImport_ImportModule(module_name.c_str());
RETURN_IF_PYERROR();
Expand Down Expand Up @@ -106,22 +108,20 @@ Status InferDecimalPrecisionAndScale(PyObject* python_decimal, int* precision,
return Decimal128::FromString(c_string, nullptr, precision, scale);
}

Status DecimalFromString(PyObject* decimal_constructor, const std::string& decimal_string,
PyObject** out) {
PyObject* DecimalFromString(PyObject* decimal_constructor,
const std::string& decimal_string) {
DCHECK_NE(decimal_constructor, nullptr);
DCHECK_NE(out, nullptr);

auto string_size = decimal_string.size();
DCHECK_GT(string_size, 0);

auto string_bytes = decimal_string.c_str();
DCHECK_NE(string_bytes, nullptr);

*out = PyObject_CallFunction(decimal_constructor, const_cast<char*>("s#"), string_bytes,
return PyObject_CallFunction(decimal_constructor, const_cast<char*>("s#"), string_bytes,
string_size);
RETURN_IF_PYERROR();
return Status::OK();
}

} // namespace internal
} // namespace py
} // namespace arrow
24 changes: 13 additions & 11 deletions cpp/src/arrow/python/helpers.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,26 +28,28 @@
#include "arrow/util/visibility.h"

namespace arrow {

namespace py {

class OwnedRef;

ARROW_EXPORT std::shared_ptr<DataType> GetPrimitiveType(Type::type type);
ARROW_EXPORT
std::shared_ptr<DataType> GetPrimitiveType(Type::type type);

namespace internal {

Status ARROW_EXPORT ImportModule(const std::string& module_name, OwnedRef* ref);
Status ARROW_EXPORT ImportFromModule(const OwnedRef& module,
const std::string& module_name, OwnedRef* ref);
Status ImportModule(const std::string& module_name, OwnedRef* ref);
Status ImportFromModule(const OwnedRef& module, const std::string& module_name,
OwnedRef* ref);

Status ARROW_EXPORT PythonDecimalToString(PyObject* python_decimal, std::string* out);
Status PythonDecimalToString(PyObject* python_decimal, std::string* out);

Status ARROW_EXPORT InferDecimalPrecisionAndScale(PyObject* python_decimal,
int* precision = nullptr,
int* scale = nullptr);
Status InferDecimalPrecisionAndScale(PyObject* python_decimal, int* precision = nullptr,
int* scale = nullptr);

Status ARROW_EXPORT DecimalFromString(PyObject* decimal_constructor,
const std::string& decimal_string, PyObject** out);
PyObject* DecimalFromString(PyObject* decimal_constructor,
const std::string& decimal_string);

} // namespace internal
} // namespace py
} // namespace arrow

Expand Down
25 changes: 13 additions & 12 deletions cpp/src/arrow/python/numpy_to_arrow.cc
Original file line number Diff line number Diff line change
Expand Up @@ -652,16 +652,16 @@ Status NumPyConverter::ConvertDecimals() {
// Import the decimal module and Decimal class
OwnedRef decimal;
OwnedRef Decimal;
RETURN_NOT_OK(ImportModule("decimal", &decimal));
RETURN_NOT_OK(ImportFromModule(decimal, "Decimal", &Decimal));
RETURN_NOT_OK(internal::ImportModule("decimal", &decimal));
RETURN_NOT_OK(internal::ImportFromModule(decimal, "Decimal", &Decimal));

Ndarray1DIndexer<PyObject*> objects(arr_);
PyObject* object = objects[0];

int precision;
int scale;

RETURN_NOT_OK(InferDecimalPrecisionAndScale(object, &precision, &scale));
RETURN_NOT_OK(internal::InferDecimalPrecisionAndScale(object, &precision, &scale));

type_ = std::make_shared<DecimalType>(precision, scale);

Expand All @@ -672,7 +672,7 @@ Status NumPyConverter::ConvertDecimals() {
object = objects[i];
if (PyObject_IsInstance(object, Decimal.obj())) {
std::string string;
RETURN_NOT_OK(PythonDecimalToString(object, &string));
RETURN_NOT_OK(internal::PythonDecimalToString(object, &string));

Decimal128 value;
RETURN_NOT_OK(Decimal128::FromString(string, &value));
Expand Down Expand Up @@ -823,7 +823,7 @@ Status NumPyConverter::ConvertObjectFixedWidthBytes(
const std::shared_ptr<DataType>& type) {
PyAcquireGIL lock;

int32_t byte_width = static_cast<const FixedSizeBinaryType&>(*type).byte_width();
const int32_t byte_width = static_cast<const FixedSizeBinaryType&>(*type).byte_width();

// The output type at this point is inconclusive because there may be bytes
// and unicode mixed in the object array
Expand Down Expand Up @@ -893,8 +893,8 @@ Status NumPyConverter::ConvertObjectsInfer() {

OwnedRef decimal;
OwnedRef Decimal;
RETURN_NOT_OK(ImportModule("decimal", &decimal));
RETURN_NOT_OK(ImportFromModule(decimal, "Decimal", &Decimal));
RETURN_NOT_OK(internal::ImportModule("decimal", &decimal));
RETURN_NOT_OK(internal::ImportFromModule(decimal, "Decimal", &Decimal));

for (int64_t i = 0; i < length_; ++i) {
PyObject* obj = objects[i];
Expand Down Expand Up @@ -935,7 +935,7 @@ Status NumPyConverter::ConvertObjectsInfer() {
Status NumPyConverter::ConvertObjectsInferAndCast() {
size_t position = out_arrays_.size();
RETURN_NOT_OK(ConvertObjectsInfer());

DCHECK_EQ(position + 1, out_arrays_.size());
std::shared_ptr<Array> arr = out_arrays_[position];

// Perform cast
Expand Down Expand Up @@ -1182,10 +1182,10 @@ Status NumPyConverter::ConvertLists(const std::shared_ptr<DataType>& type,
LIST_CASE(DOUBLE, NPY_DOUBLE, DoubleType)
LIST_CASE(STRING, NPY_OBJECT, StringType)
case Type::LIST: {
const ListType& list_type = static_cast<const ListType&>(*type);
const auto& list_type = static_cast<const ListType&>(*type);
auto value_builder = static_cast<ListBuilder*>(builder->value_builder());

auto foreach_item = [&](PyObject* object) {
auto foreach_item = [this, &builder, &value_builder, &list_type](PyObject* object) {
if (PandasObjectIsNull(object)) {
return builder->AppendNull();
} else {
Expand Down Expand Up @@ -1219,8 +1219,9 @@ Status NdarrayToArrow(MemoryPool* pool, PyObject* ao, PyObject* mo,
std::shared_ptr<ChunkedArray>* out) {
NumPyConverter converter(pool, ao, mo, type, use_pandas_null_sentinels);
RETURN_NOT_OK(converter.Convert());
DCHECK(converter.result()[0]);
*out = std::make_shared<ChunkedArray>(converter.result());
const auto& output_arrays = converter.result();
DCHECK_GT(output_arrays.size(), 0);
*out = std::make_shared<ChunkedArray>(output_arrays);
return Status::OK();
}

Expand Down
6 changes: 3 additions & 3 deletions cpp/src/arrow/python/python-test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -39,10 +39,10 @@ TEST(DecimalTest, TestPythonDecimalToString) {

OwnedRef decimal;
OwnedRef Decimal;
ASSERT_OK(ImportModule("decimal", &decimal));
ASSERT_OK(internal::ImportModule("decimal", &decimal));
ASSERT_NE(decimal.obj(), nullptr);

ASSERT_OK(ImportFromModule(decimal, "Decimal", &Decimal));
ASSERT_OK(internal::ImportFromModule(decimal, "Decimal", &Decimal));
ASSERT_NE(Decimal.obj(), nullptr);

std::string decimal_string("-39402950693754869342983");
Expand All @@ -61,7 +61,7 @@ TEST(DecimalTest, TestPythonDecimalToString) {
ASSERT_NE(python_object, nullptr);

std::string string_result;
ASSERT_OK(PythonDecimalToString(python_object, &string_result));
ASSERT_OK(internal::PythonDecimalToString(python_object, &string_result));
}

TEST(PandasConversionTest, TestObjectBlockWriteFails) {
Expand Down
4 changes: 0 additions & 4 deletions cpp/src/arrow/table-test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -140,10 +140,6 @@ TEST_F(TestColumn, BasicAPI) {
ASSERT_EQ(300, column_->length());
ASSERT_EQ(30, column_->null_count());
ASSERT_EQ(3, column_->data()->num_chunks());

// nullptr array should not break
column_.reset(new Column(f0, std::shared_ptr<Array>(nullptr)));
ASSERT_NE(column_.get(), nullptr);
}

TEST_F(TestColumn, ChunksInhomogeneous) {
Expand Down
Loading