Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions ci/cpp-msvc-build-main.bat
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ cmake -G "%GENERATOR%" %CMAKE_ARGS% ^
-DARROW_FLIGHT=%ARROW_BUILD_FLIGHT% ^
-DARROW_GANDIVA=%ARROW_BUILD_GANDIVA% ^
-DARROW_DATASET=ON ^
-DARROW_ENGINE=ON ^
-DARROW_S3=%ARROW_S3% ^
-DARROW_MIMALLOC=ON ^
-DARROW_PARQUET=ON ^
Expand Down
1 change: 1 addition & 0 deletions ci/docker/conda-cpp.dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ ENTRYPOINT [ "/bin/bash", "-c", "-l" ]
ENV ARROW_BUILD_TESTS=ON \
ARROW_DEPENDENCY_SOURCE=CONDA \
ARROW_DATASET=ON \
ARROW_ENGINE=ON \
ARROW_FLIGHT=ON \
ARROW_GANDIVA=ON \
ARROW_HOME=$CONDA_PREFIX \
Expand Down
1 change: 1 addition & 0 deletions ci/docker/conda-integration.dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ ENV ARROW_BUILD_INTEGRATION=ON \
ARROW_FLIGHT=ON \
ARROW_ORC=OFF \
ARROW_DATASET=OFF \
ARROW_ENGINE=OFF \
ARROW_GANDIVA=OFF \
ARROW_PLASMA=OFF \
ARROW_FILESYSTEM=OFF \
Expand Down
1 change: 1 addition & 0 deletions ci/docker/cuda-10.0-cpp.dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ ENV ARROW_BUILD_STATIC=OFF \
ARROW_CSV=OFF \
ARROW_CUDA=ON \
ARROW_DATASET=OFF \
ARROW_ENGINE=OFF \
ARROW_DEPENDENCY_SOURCE=SYSTEM \
ARROW_FILESYSTEM=OFF \
ARROW_FLIGHT=OFF \
Expand Down
1 change: 1 addition & 0 deletions ci/docker/cuda-10.1-cpp.dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ ENV ARROW_BUILD_STATIC=OFF \
ARROW_CSV=OFF \
ARROW_CUDA=ON \
ARROW_DATASET=OFF \
ARROW_ENGINE=OFF \
ARROW_DEPENDENCY_SOURCE=SYSTEM \
ARROW_FILESYSTEM=OFF \
ARROW_FLIGHT=OFF \
Expand Down
1 change: 1 addition & 0 deletions ci/docker/cuda-9.1-cpp.dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ ENV ARROW_BUILD_STATIC=OFF \
ARROW_CSV=OFF \
ARROW_CUDA=ON \
ARROW_DATASET=OFF \
ARROW_ENGINE=OFF \
ARROW_DEPENDENCY_SOURCE=SYSTEM \
ARROW_FILESYSTEM=OFF \
ARROW_FLIGHT=OFF \
Expand Down
1 change: 1 addition & 0 deletions ci/docker/debian-10-cpp.dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ RUN export DEBIAN_FRONTEND=noninteractive && \
ENV ARROW_BUILD_TESTS=ON \
ARROW_DEPENDENCY_SOURCE=SYSTEM \
ARROW_DATASET=ON \
ARROW_ENGINE=ON \
ARROW_FLIGHT=ON \
ARROW_GANDIVA=ON \
ARROW_HOME=/usr/local \
Expand Down
1 change: 1 addition & 0 deletions ci/docker/fedora-29-cpp.dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ RUN dnf update -y && \
ENV ARROW_BUILD_TESTS=ON \
ARROW_DEPENDENCY_SOURCE=SYSTEM \
ARROW_DATASET=ON \
ARROW_ENGINE=ON \
ARROW_FLIGHT=ON \
ARROW_GANDIVA_JAVA=ON \
ARROW_GANDIVA=OFF \
Expand Down
1 change: 1 addition & 0 deletions ci/docker/ubuntu-14.04-cpp.dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ RUN apt-get update -y -q && \
ENV ARROW_BUILD_TESTS=ON \
ARROW_DEPENDENCY_SOURCE=SYSTEM \
ARROW_DATASET=ON \
ARROW_ENGINE=ON \
ARROW_FLIGHT=OFF \
ARROW_GANDIVA_JAVA=OFF \
ARROW_GANDIVA=OFF \
Expand Down
1 change: 1 addition & 0 deletions ci/docker/ubuntu-16.04-cpp.dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ RUN apt-get update -y -q && \
ENV ARROW_BUILD_BENCHMARKS=OFF \
ARROW_BUILD_TESTS=ON \
ARROW_DATASET=ON \
ARROW_ENGINE=ON \
ARROW_DEPENDENCY_SOURCE=SYSTEM \
ARROW_GANDIVA_JAVA=OFF \
ARROW_GANDIVA=ON \
Expand Down
1 change: 1 addition & 0 deletions ci/docker/ubuntu-18.04-cpp.dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,7 @@ RUN apt-get update -y -q && \
ENV ARROW_BUILD_TESTS=ON \
ARROW_DEPENDENCY_SOURCE=SYSTEM \
ARROW_DATASET=ON \
ARROW_ENGINE=ON \
ARROW_FLIGHT=OFF \
ARROW_GANDIVA=ON \
ARROW_HDFS=ON \
Expand Down
1 change: 1 addition & 0 deletions ci/scripts/PKGBUILD
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ build() {
-DARROW_COMPUTE=ON \
-DARROW_CSV=ON \
-DARROW_DATASET=ON \
-DARROW_ENGINE=ON \
-DARROW_FILESYSTEM=ON \
-DARROW_HDFS=OFF \
-DARROW_JEMALLOC=OFF \
Expand Down
1 change: 1 addition & 0 deletions ci/scripts/cpp_build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ cmake -G "${CMAKE_GENERATOR:-Ninja}" \
-DARROW_CUDA=${ARROW_CUDA:-OFF} \
-DARROW_CXXFLAGS=${ARROW_CXXFLAGS:-} \
-DARROW_DATASET=${ARROW_DATASET:-ON} \
-DARROW_ENGINE=${ARROW_ENGINE:-ON} \
-DARROW_DEPENDENCY_SOURCE=${ARROW_DEPENDENCY_SOURCE:-AUTO} \
-DARROW_EXTRA_ERROR_CONTEXT=${ARROW_EXTRA_ERROR_CONTEXT:-OFF} \
-DARROW_ENABLE_TIMING_TESTS=${ARROW_ENABLE_TIMING_TESTS:-ON} \
Expand Down
5 changes: 5 additions & 0 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -296,7 +296,12 @@ if(ARROW_CUDA OR ARROW_FLIGHT OR ARROW_PARQUET OR ARROW_BUILD_TESTS)
set(ARROW_IPC ON)
endif()

if(ARROW_ENGINE)
set(ARROW_DATASET ON)
endif()

if(ARROW_DATASET)
set(ARROW_PARQUET ON)
set(ARROW_FILESYSTEM ON)
endif()

Expand Down
2 changes: 2 additions & 0 deletions cpp/cmake_modules/DefineOptions.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,8 @@ if("${CMAKE_SOURCE_DIR}" STREQUAL "${CMAKE_CURRENT_SOURCE_DIR}")

define_option(ARROW_DATASET "Build the Arrow Dataset Modules" OFF)

define_option(ARROW_ENGINE "Build the Arrow Query Engine Modules" OFF)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Don't want to bikeshed about this but ARROW_QUERY_ENGINE or something more "obvious" might aid readability, unless we come up with some other name for the project


define_option(ARROW_FILESYSTEM "Build the Arrow Filesystem Layer" OFF)

define_option(ARROW_FLIGHT
Expand Down
98 changes: 98 additions & 0 deletions cpp/cmake_modules/FindArrowEngine.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

# - Find Arrow Query Engine (arrow/engine/api.h, libarrow_engine.a, libarrow_engine.so)
#
# This module requires Arrow from which it uses
# arrow_find_package()
#
# This module defines
# ARROW_ENGINE_FOUND, whether Arrow Query Engine has been found
# ARROW_ENGINE_IMPORT_LIB,
# path to libarrow_engine's import library (Windows only)
# ARROW_ENGINE_INCLUDE_DIR, directory containing headers
# ARROW_ENGINE_LIB_DIR, directory containing Arrow Query Engine libraries
# ARROW_ENGINE_SHARED_LIB, path to libarrow_engine's shared library
# ARROW_ENGINE_STATIC_LIB, path to libarrow_engine.a

if(DEFINED ARROW_ENGINE_FOUND)
return()
endif()

set(find_package_arguments)
if(${CMAKE_FIND_PACKAGE_NAME}_FIND_VERSION)
list(APPEND find_package_arguments
"${${CMAKE_FIND_PACKAGE_NAME}_FIND_VERSION}")
endif()
if(${CMAKE_FIND_PACKAGE_NAME}_FIND_REQUIRED)
list(APPEND find_package_arguments REQUIRED)
endif()
if(${CMAKE_FIND_PACKAGE_NAME}_FIND_QUIETLY)
list(APPEND find_package_arguments QUIET)
endif()
find_package(Arrow ${find_package_arguments})
find_package(ArrowEngine ${find_package_arguments})

if(ARROW_FOUND AND ARROW_DATASET_FOUND)
arrow_find_package(ARROW_ENGINE
"${ARROW_HOME}"
arrow_engine
arrow/engine/api.h
ArrowEngine
arrow-engine)
if(NOT ARROW_ENGINE_VERSION)
set(ARROW_ENGINE_VERSION "${ARROW_VERSION}")
endif()
endif()

if("${ARROW_ENGINE_VERSION}" VERSION_EQUAL "${ARROW_VERSION}")
set(ARROW_ENGINE_VERSION_MATCH TRUE)
else()
set(ARROW_ENGINE_VERSION_MATCH FALSE)
endif()

mark_as_advanced(ARROW_ENGINE_IMPORT_LIB
ARROW_ENGINE_INCLUDE_DIR
ARROW_ENGINE_LIBS
ARROW_ENGINE_LIB_DIR
ARROW_ENGINE_SHARED_IMP_LIB
ARROW_ENGINE_SHARED_LIB
ARROW_ENGINE_STATIC_LIB
ARROW_ENGINE_VERSION
ARROW_ENGINE_VERSION_MATCH)

find_package_handle_standard_args(ArrowEngine
REQUIRED_VARS
ARROW_ENGINE_INCLUDE_DIR
ARROW_ENGINE_LIB_DIR
ARROW_ENGINE_VERSION_MATCH
VERSION_VAR
ARROW_ENGINE_VERSION)
set(ARROW_ENGINE_FOUND ${ArrowEngine_FOUND})

if(ArrowEngine_FOUND AND NOT ArrowEngine_FIND_QUIETLY)
message(STATUS "Found the Arrow Engine by ${ARROW_ENGINE_FIND_APPROACH}")
message(
STATUS "Found the Arrow Engine shared library: ${ARROW_ENGINE_SHARED_LIB}"
)
message(
STATUS "Found the Arrow Engine import library: ${ARROW_ENGINE_IMPORT_LIB}"
)
message(
STATUS "Found the Arrow Engine static library: ${ARROW_ENGINE_STATIC_LIB}"
)
endif()
4 changes: 4 additions & 0 deletions cpp/src/arrow/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -586,6 +586,10 @@ if(ARROW_DATASET)
add_subdirectory(dataset)
endif()

if(ARROW_ENGINE)
add_subdirectory(engine)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Anyone have name opinions. Is arrow/query or arrow/query_engine better? More curious about opinions than anything

Some examples

  • Impala uses exec
  • MapD uses QueryEngine
  • Clickhouse seems to be called "Interpreters"

🤷‍♂

endif()

if(ARROW_FILESYSTEM)
add_subdirectory(filesystem)
endif()
Expand Down
26 changes: 26 additions & 0 deletions cpp/src/arrow/compute/kernels/sum_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,32 @@ struct FindAccumulatorType<I, enable_if_floating_point<I>> {
using Type = DoubleType;
};

#define ACCUMULATOR_TYPE_CASE(ID, TYPE) \
case Type::ID: \
return TypeTraits<FindAccumulatorType<TYPE>::Type>::type_singleton();

static inline std::shared_ptr<DataType> GetAccumulatorType(
const std::shared_ptr<DataType>& type) {
switch (type->id()) {
ACCUMULATOR_TYPE_CASE(INT8, Int8Type)
ACCUMULATOR_TYPE_CASE(INT16, Int16Type)
ACCUMULATOR_TYPE_CASE(INT32, Int32Type)
ACCUMULATOR_TYPE_CASE(INT64, Int64Type)
ACCUMULATOR_TYPE_CASE(UINT8, UInt8Type)
ACCUMULATOR_TYPE_CASE(UINT16, UInt16Type)
ACCUMULATOR_TYPE_CASE(UINT32, UInt32Type)
ACCUMULATOR_TYPE_CASE(UINT64, UInt64Type)
ACCUMULATOR_TYPE_CASE(FLOAT, FloatType)
ACCUMULATOR_TYPE_CASE(DOUBLE, DoubleType)
default:
return nullptr;
}

ARROW_UNREACHABLE;
}

#undef ACCUMULATOR_TYPE_CASE

template <typename ArrowType, typename StateType>
class SumAggregateFunction final : public AggregateFunctionStaticState<StateType> {
using CType = typename TypeTraits<ArrowType>::CType;
Expand Down
37 changes: 37 additions & 0 deletions cpp/src/arrow/engine/ArrowEngineConfig.cmake.in
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
# This config sets the following variables in your project::
#
# ArrowEngine_FOUND - true if Arrow Query engine is found on the system
#
# This config sets the following targets in your project::
#
# arrow_engine_shared - for linked as shared library if shared library is built
# arrow_engine_static - for linked as static library if static library is built

@PACKAGE_INIT@

include(CMakeFindDependencyMacro)
find_dependency(Arrow)
find_dependency(ArrowDataset)

# Load targets only once. If we load targets multiple times, CMake reports
# already existent target error.
if(NOT (TARGET arrow_engine_shared OR TARGET arrow_engine_static))
include("${CMAKE_CURRENT_LIST_DIR}/ArrowEngineTargets.cmake")
endif()
Loading