diff --git a/cpp/src/arrow/array/concatenate.cc b/cpp/src/arrow/array/concatenate.cc index ec6da9c3e56..9f77fbb5f43 100644 --- a/cpp/src/arrow/array/concatenate.cc +++ b/cpp/src/arrow/array/concatenate.cc @@ -39,7 +39,7 @@ #include "arrow/util/bitmap_ops.h" #include "arrow/util/checked_cast.h" #include "arrow/util/int_util.h" -#include "arrow/util/int_util_internal.h" +#include "arrow/util/int_util_overflow.h" #include "arrow/util/logging.h" #include "arrow/visit_type_inline.h" diff --git a/cpp/src/arrow/array/data.cc b/cpp/src/arrow/array/data.cc index 5a214473972..9dfc76c01ce 100644 --- a/cpp/src/arrow/array/data.cc +++ b/cpp/src/arrow/array/data.cc @@ -29,9 +29,9 @@ #include "arrow/status.h" #include "arrow/type.h" #include "arrow/util/bitmap_ops.h" -#include "arrow/util/int_util_internal.h" #include "arrow/util/logging.h" #include "arrow/util/macros.h" +#include "arrow/util/slice_util_internal.h" namespace arrow { diff --git a/cpp/src/arrow/array/validate.cc b/cpp/src/arrow/array/validate.cc index 158d7a11858..96f8cdef07a 100644 --- a/cpp/src/arrow/array/validate.cc +++ b/cpp/src/arrow/array/validate.cc @@ -28,7 +28,7 @@ #include "arrow/util/bitmap_ops.h" #include "arrow/util/checked_cast.h" #include "arrow/util/decimal.h" -#include "arrow/util/int_util_internal.h" +#include "arrow/util/int_util_overflow.h" #include "arrow/util/logging.h" #include "arrow/util/utf8.h" #include "arrow/visit_data_inline.h" diff --git a/cpp/src/arrow/buffer.cc b/cpp/src/arrow/buffer.cc index 5d9faeb7ed8..e7566354d12 100644 --- a/cpp/src/arrow/buffer.cc +++ b/cpp/src/arrow/buffer.cc @@ -24,8 +24,8 @@ #include "arrow/result.h" #include "arrow/status.h" #include "arrow/util/bit_util.h" -#include "arrow/util/int_util_internal.h" #include "arrow/util/logging.h" +#include "arrow/util/slice_util_internal.h" #include "arrow/util/string.h" namespace arrow { diff --git a/cpp/src/arrow/compute/kernels/aggregate_test.cc b/cpp/src/arrow/compute/kernels/aggregate_test.cc index d96cdad86dc..faae139c130 100644 --- a/cpp/src/arrow/compute/kernels/aggregate_test.cc +++ b/cpp/src/arrow/compute/kernels/aggregate_test.cc @@ -38,7 +38,7 @@ #include "arrow/type_traits.h" #include "arrow/util/bitmap_reader.h" #include "arrow/util/checked_cast.h" -#include "arrow/util/int_util_internal.h" +#include "arrow/util/int_util_overflow.h" #include "arrow/testing/gtest_util.h" #include "arrow/testing/matchers.h" diff --git a/cpp/src/arrow/compute/kernels/base_arithmetic_internal.h b/cpp/src/arrow/compute/kernels/base_arithmetic_internal.h index 1707ed7c137..f416881ccb8 100644 --- a/cpp/src/arrow/compute/kernels/base_arithmetic_internal.h +++ b/cpp/src/arrow/compute/kernels/base_arithmetic_internal.h @@ -23,7 +23,7 @@ #include "arrow/type.h" #include "arrow/type_traits.h" #include "arrow/util/decimal.h" -#include "arrow/util/int_util_internal.h" +#include "arrow/util/int_util_overflow.h" #include "arrow/util/macros.h" namespace arrow { diff --git a/cpp/src/arrow/compute/kernels/hash_aggregate.cc b/cpp/src/arrow/compute/kernels/hash_aggregate.cc index d9ffcda5962..5b92dac2375 100644 --- a/cpp/src/arrow/compute/kernels/hash_aggregate.cc +++ b/cpp/src/arrow/compute/kernels/hash_aggregate.cc @@ -49,7 +49,7 @@ #include "arrow/util/checked_cast.h" #include "arrow/util/cpu_info.h" #include "arrow/util/int128_internal.h" -#include "arrow/util/int_util_internal.h" +#include "arrow/util/int_util_overflow.h" #include "arrow/util/make_unique.h" #include "arrow/util/task_group.h" #include "arrow/util/tdigest.h" diff --git a/cpp/src/arrow/compute/kernels/hash_aggregate_test.cc b/cpp/src/arrow/compute/kernels/hash_aggregate_test.cc index 35b4592e198..932bda48912 100644 --- a/cpp/src/arrow/compute/kernels/hash_aggregate_test.cc +++ b/cpp/src/arrow/compute/kernels/hash_aggregate_test.cc @@ -51,7 +51,7 @@ #include "arrow/util/async_generator.h" #include "arrow/util/bitmap_reader.h" #include "arrow/util/checked_cast.h" -#include "arrow/util/int_util_internal.h" +#include "arrow/util/int_util_overflow.h" #include "arrow/util/key_value_metadata.h" #include "arrow/util/logging.h" #include "arrow/util/thread_pool.h" diff --git a/cpp/src/arrow/compute/kernels/scalar_arithmetic.cc b/cpp/src/arrow/compute/kernels/scalar_arithmetic.cc index 0742fb32c52..3138fb6974c 100644 --- a/cpp/src/arrow/compute/kernels/scalar_arithmetic.cc +++ b/cpp/src/arrow/compute/kernels/scalar_arithmetic.cc @@ -31,7 +31,7 @@ #include "arrow/type.h" #include "arrow/type_traits.h" #include "arrow/util/decimal.h" -#include "arrow/util/int_util_internal.h" +#include "arrow/util/int_util_overflow.h" #include "arrow/util/macros.h" #include "arrow/visit_scalar_inline.h" diff --git a/cpp/src/arrow/dataset/partition.cc b/cpp/src/arrow/dataset/partition.cc index 6b4d601db01..89d90c758fc 100644 --- a/cpp/src/arrow/dataset/partition.cc +++ b/cpp/src/arrow/dataset/partition.cc @@ -34,7 +34,7 @@ #include "arrow/dataset/dataset_internal.h" #include "arrow/filesystem/path_util.h" #include "arrow/scalar.h" -#include "arrow/util/int_util_internal.h" +#include "arrow/util/int_util_overflow.h" #include "arrow/util/logging.h" #include "arrow/util/make_unique.h" #include "arrow/util/string_view.h" diff --git a/cpp/src/arrow/memory_pool.cc b/cpp/src/arrow/memory_pool.cc index 2fab6f38255..9130b297485 100644 --- a/cpp/src/arrow/memory_pool.cc +++ b/cpp/src/arrow/memory_pool.cc @@ -36,7 +36,7 @@ #include "arrow/status.h" #include "arrow/util/bit_util.h" #include "arrow/util/debug.h" -#include "arrow/util/int_util_internal.h" +#include "arrow/util/int_util_overflow.h" #include "arrow/util/io_util.h" #include "arrow/util/logging.h" // IWYU pragma: keep #include "arrow/util/optional.h" diff --git a/cpp/src/arrow/pretty_print.cc b/cpp/src/arrow/pretty_print.cc index 01d7f212908..25392a069ca 100644 --- a/cpp/src/arrow/pretty_print.cc +++ b/cpp/src/arrow/pretty_print.cc @@ -38,7 +38,7 @@ #include "arrow/type_traits.h" #include "arrow/util/checked_cast.h" #include "arrow/util/formatting.h" -#include "arrow/util/int_util_internal.h" +#include "arrow/util/int_util_overflow.h" #include "arrow/util/key_value_metadata.h" #include "arrow/util/string.h" #include "arrow/util/string_view.h" diff --git a/cpp/src/arrow/python/python_to_arrow.cc b/cpp/src/arrow/python/python_to_arrow.cc index e72e127da8b..27d2ad184db 100644 --- a/cpp/src/arrow/python/python_to_arrow.cc +++ b/cpp/src/arrow/python/python_to_arrow.cc @@ -41,7 +41,7 @@ #include "arrow/util/checked_cast.h" #include "arrow/util/converter.h" #include "arrow/util/decimal.h" -#include "arrow/util/int_util_internal.h" +#include "arrow/util/int_util_overflow.h" #include "arrow/util/logging.h" #include "arrow/python/datetime.h" diff --git a/cpp/src/arrow/tensor.cc b/cpp/src/arrow/tensor.cc index dd99370c979..f20a057b1e6 100644 --- a/cpp/src/arrow/tensor.cc +++ b/cpp/src/arrow/tensor.cc @@ -31,7 +31,7 @@ #include "arrow/type.h" #include "arrow/type_traits.h" #include "arrow/util/checked_cast.h" -#include "arrow/util/int_util_internal.h" +#include "arrow/util/int_util_overflow.h" #include "arrow/util/logging.h" #include "arrow/visit_type_inline.h" diff --git a/cpp/src/arrow/util/basic_decimal.cc b/cpp/src/arrow/util/basic_decimal.cc index 116f6d1f0cd..b8f218ede98 100644 --- a/cpp/src/arrow/util/basic_decimal.cc +++ b/cpp/src/arrow/util/basic_decimal.cc @@ -31,7 +31,7 @@ #include "arrow/util/config.h" // for ARROW_USE_NATIVE_INT128 #include "arrow/util/endian.h" #include "arrow/util/int128_internal.h" -#include "arrow/util/int_util_internal.h" +#include "arrow/util/int_util_overflow.h" #include "arrow/util/logging.h" #include "arrow/util/macros.h" diff --git a/cpp/src/arrow/util/decimal.cc b/cpp/src/arrow/util/decimal.cc index d35223a1f4a..7bda91cf100 100644 --- a/cpp/src/arrow/util/decimal.cc +++ b/cpp/src/arrow/util/decimal.cc @@ -33,7 +33,7 @@ #include "arrow/util/endian.h" #include "arrow/util/formatting.h" #include "arrow/util/int128_internal.h" -#include "arrow/util/int_util_internal.h" +#include "arrow/util/int_util_overflow.h" #include "arrow/util/logging.h" #include "arrow/util/macros.h" #include "arrow/util/value_parsing.h" diff --git a/cpp/src/arrow/util/int_util.h b/cpp/src/arrow/util/int_util.h index e8728ccacac..e631f4dc1e7 100644 --- a/cpp/src/arrow/util/int_util.h +++ b/cpp/src/arrow/util/int_util.h @@ -21,6 +21,7 @@ #include #include "arrow/status.h" + #include "arrow/util/visibility.h" namespace arrow { @@ -113,5 +114,21 @@ Status CheckIntegersInRange(const Datum& datum, const Scalar& bound_lower, ARROW_EXPORT Status IntegersCanFit(const Datum& datum, const DataType& target_type); +/// Upcast an integer to the largest possible width (currently 64 bits) + +template +typename std::enable_if< + std::is_integral::value && std::is_signed::value, int64_t>::type +UpcastInt(Integer v) { + return v; +} + +template +typename std::enable_if< + std::is_integral::value && std::is_unsigned::value, uint64_t>::type +UpcastInt(Integer v) { + return v; +} + } // namespace internal } // namespace arrow diff --git a/cpp/src/arrow/util/int_util_internal.h b/cpp/src/arrow/util/int_util_overflow.h similarity index 79% rename from cpp/src/arrow/util/int_util_internal.h rename to cpp/src/arrow/util/int_util_overflow.h index 8a19876f2ba..de58d116ae6 100644 --- a/cpp/src/arrow/util/int_util_internal.h +++ b/cpp/src/arrow/util/int_util_overflow.h @@ -114,41 +114,5 @@ SignedInt SafeLeftShift(SignedInt u, Shift shift) { return static_cast(static_cast(u) << shift); } -/// Upcast an integer to the largest possible width (currently 64 bits) - -template -typename std::enable_if< - std::is_integral::value && std::is_signed::value, int64_t>::type -UpcastInt(Integer v) { - return v; -} - -template -typename std::enable_if< - std::is_integral::value && std::is_unsigned::value, uint64_t>::type -UpcastInt(Integer v) { - return v; -} - -static inline Status CheckSliceParams(int64_t object_length, int64_t slice_offset, - int64_t slice_length, const char* object_name) { - if (ARROW_PREDICT_FALSE(slice_offset < 0)) { - return Status::IndexError("Negative ", object_name, " slice offset"); - } - if (ARROW_PREDICT_FALSE(slice_length < 0)) { - return Status::IndexError("Negative ", object_name, " slice length"); - } - int64_t offset_plus_length; - if (ARROW_PREDICT_FALSE( - internal::AddWithOverflow(slice_offset, slice_length, &offset_plus_length))) { - return Status::IndexError(object_name, " slice would overflow"); - } - if (ARROW_PREDICT_FALSE(offset_plus_length > object_length)) { - return Status::IndexError(object_name, " slice would exceed ", object_name, - " length"); - } - return Status::OK(); -} - } // namespace internal } // namespace arrow diff --git a/cpp/src/arrow/util/int_util_test.cc b/cpp/src/arrow/util/int_util_test.cc index fc6f069242a..d20640060bb 100644 --- a/cpp/src/arrow/util/int_util_test.cc +++ b/cpp/src/arrow/util/int_util_test.cc @@ -29,7 +29,7 @@ #include "arrow/testing/random.h" #include "arrow/type.h" #include "arrow/util/int_util.h" -#include "arrow/util/int_util_internal.h" +#include "arrow/util/int_util_overflow.h" namespace arrow { namespace internal { diff --git a/cpp/src/arrow/util/slice_util_internal.h b/cpp/src/arrow/util/slice_util_internal.h new file mode 100644 index 00000000000..8b8d3d18367 --- /dev/null +++ b/cpp/src/arrow/util/slice_util_internal.h @@ -0,0 +1,46 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include "arrow/util/int_util_overflow.h" + +namespace arrow { +namespace internal { + +static inline Status CheckSliceParams(int64_t object_length, int64_t slice_offset, + int64_t slice_length, const char* object_name) { + if (ARROW_PREDICT_FALSE(slice_offset < 0)) { + return Status::IndexError("Negative ", object_name, " slice offset"); + } + if (ARROW_PREDICT_FALSE(slice_length < 0)) { + return Status::IndexError("Negative ", object_name, " slice length"); + } + int64_t offset_plus_length; + if (ARROW_PREDICT_FALSE( + internal::AddWithOverflow(slice_offset, slice_length, &offset_plus_length))) { + return Status::IndexError(object_name, " slice would overflow"); + } + if (ARROW_PREDICT_FALSE(offset_plus_length > object_length)) { + return Status::IndexError(object_name, " slice would exceed ", object_name, + " length"); + } + return Status::OK(); +} + +} // namespace internal +} // namespace arrow diff --git a/cpp/src/arrow/vendored/CMakeLists.txt b/cpp/src/arrow/vendored/CMakeLists.txt index 0fdabc49f7c..3be026aeaa6 100644 --- a/cpp/src/arrow/vendored/CMakeLists.txt +++ b/cpp/src/arrow/vendored/CMakeLists.txt @@ -20,3 +20,5 @@ arrow_install_all_headers("arrow/vendored") add_subdirectory(datetime) add_subdirectory(double-conversion) add_subdirectory(pcg) +add_subdirectory(portable-snippets) +add_subdirectory(xxhash) diff --git a/cpp/src/arrow/vendored/portable-snippets/CMakeLists.txt b/cpp/src/arrow/vendored/portable-snippets/CMakeLists.txt new file mode 100644 index 00000000000..432b0d2dcbf --- /dev/null +++ b/cpp/src/arrow/vendored/portable-snippets/CMakeLists.txt @@ -0,0 +1,18 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +arrow_install_all_headers("arrow/vendored/portable-snippets") diff --git a/cpp/src/arrow/vendored/xxhash/CMakeLists.txt b/cpp/src/arrow/vendored/xxhash/CMakeLists.txt new file mode 100644 index 00000000000..25d2fa45980 --- /dev/null +++ b/cpp/src/arrow/vendored/xxhash/CMakeLists.txt @@ -0,0 +1,18 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +arrow_install_all_headers("arrow/vendored/xxhash") diff --git a/cpp/src/parquet/arrow/reader_internal.cc b/cpp/src/parquet/arrow/reader_internal.cc index d8aad0d0f2f..64fcc451808 100644 --- a/cpp/src/parquet/arrow/reader_internal.cc +++ b/cpp/src/parquet/arrow/reader_internal.cc @@ -41,7 +41,7 @@ #include "arrow/util/bit_util.h" #include "arrow/util/checked_cast.h" #include "arrow/util/endian.h" -#include "arrow/util/int_util_internal.h" +#include "arrow/util/int_util_overflow.h" #include "arrow/util/logging.h" #include "arrow/util/string_view.h" #include "arrow/util/ubsan.h" diff --git a/cpp/src/parquet/column_reader.cc b/cpp/src/parquet/column_reader.cc index 76476c5da73..1f25268b84b 100644 --- a/cpp/src/parquet/column_reader.cc +++ b/cpp/src/parquet/column_reader.cc @@ -38,7 +38,7 @@ #include "arrow/util/bit_util.h" #include "arrow/util/checked_cast.h" #include "arrow/util/compression.h" -#include "arrow/util/int_util_internal.h" +#include "arrow/util/int_util_overflow.h" #include "arrow/util/logging.h" #include "arrow/util/rle_encoding.h" #include "parquet/column_page.h" diff --git a/cpp/src/parquet/encoding.cc b/cpp/src/parquet/encoding.cc index af0e543c3e7..6af7af50cba 100644 --- a/cpp/src/parquet/encoding.cc +++ b/cpp/src/parquet/encoding.cc @@ -39,7 +39,7 @@ #include "arrow/util/byte_stream_split.h" #include "arrow/util/checked_cast.h" #include "arrow/util/hashing.h" -#include "arrow/util/int_util_internal.h" +#include "arrow/util/int_util_overflow.h" #include "arrow/util/logging.h" #include "arrow/util/rle_encoding.h" #include "arrow/util/string_view.h" diff --git a/cpp/src/parquet/file_reader.cc b/cpp/src/parquet/file_reader.cc index 3f4c2cb76a8..1f45f03c72d 100644 --- a/cpp/src/parquet/file_reader.cc +++ b/cpp/src/parquet/file_reader.cc @@ -30,7 +30,7 @@ #include "arrow/io/memory.h" #include "arrow/util/checked_cast.h" #include "arrow/util/future.h" -#include "arrow/util/int_util_internal.h" +#include "arrow/util/int_util_overflow.h" #include "arrow/util/logging.h" #include "arrow/util/ubsan.h" #include "parquet/column_reader.h"