diff --git a/ci/conda_env_cpp.txt b/ci/conda_env_cpp.txt index 731b49fa462..50b91b7a656 100644 --- a/ci/conda_env_cpp.txt +++ b/ci/conda_env_cpp.txt @@ -31,8 +31,8 @@ gflags glog gmock>=1.10.0 google-cloud-cpp>=1.34.0 -grpc-cpp<=1.50.1 gtest>=1.10.0 +libgrpc libprotobuf libutf8proc lz4-c diff --git a/cpp/meson.build b/cpp/meson.build index 81143ed1e28..ea7b50ec94b 100644 --- a/cpp/meson.build +++ b/cpp/meson.build @@ -95,12 +95,13 @@ needs_testing = (get_option('testing').enabled() or needs_integration ) needs_json = get_option('json').enabled() or needs_testing +needs_orc = get_option('orc').enabled() needs_brotli = get_option('brotli').enabled() or needs_fuzzing needs_bz2 = get_option('bz2').enabled() -needs_lz4 = get_option('lz4').enabled() -needs_snappy = get_option('snappy').enabled() -needs_zlib = get_option('zlib').enabled() -needs_zstd = get_option('zstd').enabled() +needs_lz4 = get_option('lz4').enabled() or needs_orc +needs_snappy = get_option('snappy').enabled() or needs_orc +needs_zlib = get_option('zlib').enabled() or needs_orc +needs_zstd = get_option('zstd').enabled() or needs_orc needs_utilities = get_option('utilities').enabled() subdir('src/arrow') diff --git a/cpp/meson.options b/cpp/meson.options index 668f440ee72..baf51c8c5bd 100644 --- a/cpp/meson.options +++ b/cpp/meson.options @@ -84,6 +84,12 @@ option('git_description', type: 'string') option('lz4', type: 'feature', description: 'Build with lz4 compression') +option( + 'orc', + type: 'feature', + description: 'Build the Arrow ORC adapter', +) + option( 'package_kind', type: 'string', diff --git a/cpp/src/arrow/adapters/orc/meson.build b/cpp/src/arrow/adapters/orc/meson.build new file mode 100644 index 00000000000..dacea07108b --- /dev/null +++ b/cpp/src/arrow/adapters/orc/meson.build @@ -0,0 +1,39 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +install_headers(['adapter.h', 'options.h'], subdir: 'arrow/adapters/orc') + +arrow_orc_dep = declare_dependency( + include_directories: include_directories('.'), + dependencies: [arrow_dep], +) + +meson.override_dependency('arrow-orc', arrow_orc_dep) + +pkg.generate( + filebase: 'arrow-orc', + name: 'Apache Arrow ORC', + description: 'ORC modules for Apache Arrow', + requires: ['arrow'], +) + +exc = executable( + 'arrow-orc-adapter-test', + sources: ['adapter_test.cc'], + dependencies: [arrow_test_dep, orc_dep], +) +test('arrow-orc-adapter-test', exc) diff --git a/cpp/src/arrow/adapters/orc/util.cc b/cpp/src/arrow/adapters/orc/util.cc index 6974faae59b..68d062f125f 100644 --- a/cpp/src/arrow/adapters/orc/util.cc +++ b/cpp/src/arrow/adapters/orc/util.cc @@ -212,7 +212,10 @@ Status AppendTimestampBatch(liborc::ColumnVectorBatch* column_vector_batch, const int64_t* seconds = batch->data.data() + offset; const int64_t* nanos = batch->nanoseconds.data() + offset; - auto transform_timestamp = [seconds, nanos](int64_t index) { + auto transform_timestamp = [seconds, nanos, valid_bytes](int64_t index) -> int64_t { + if (valid_bytes && !valid_bytes[index]) { + return 0; + } return seconds[index] * kOneSecondNanos + nanos[index]; }; diff --git a/cpp/src/arrow/meson.build b/cpp/src/arrow/meson.build index a04fdf88c2d..baf9ca1359c 100644 --- a/cpp/src/arrow/meson.build +++ b/cpp/src/arrow/meson.build @@ -472,6 +472,22 @@ if needs_json } endif +if needs_orc + orc_dep = dependency('orc') + arrow_components += { + 'arrow_orc': { + 'sources': files( + 'adapters/orc/adapter.cc', + 'adapters/orc/options.cc', + 'adapters/orc/util.cc', + ), + 'dependencies': [orc_dep], + }, + } +else + orc_dep = disabler() +endif + arrow_srcs = [] include_dir = include_directories('..') arrow_includes = [include_dir] @@ -835,6 +851,10 @@ if needs_json subdir('json') endif +if needs_orc + subdir('adapters/orc') +endif + if needs_ipc subdir('ipc') endif diff --git a/cpp/src/arrow/util/meson.build b/cpp/src/arrow/util/meson.build index 2fbbedbb931..6edcd716896 100644 --- a/cpp/src/arrow/util/meson.build +++ b/cpp/src/arrow/util/meson.build @@ -49,7 +49,7 @@ conf_data.set('ARROW_JEMALLOC', false) conf_data.set('ARROW_JEMALLOC_VENDORED', false) conf_data.set('ARROW_JSON', needs_json) conf_data.set('ARROW_MIMALLOC', false) -conf_data.set('ARROW_ORC', false) +conf_data.set('ARROW_ORC', needs_orc) conf_data.set('ARROW_PARQUET', needs_parquet) conf_data.set('ARROW_SUBSTRAIT', false) conf_data.set('ARROW_AZURE', false) diff --git a/cpp/subprojects/apache-orc.wrap b/cpp/subprojects/apache-orc.wrap new file mode 100644 index 00000000000..b95657cf5bd --- /dev/null +++ b/cpp/subprojects/apache-orc.wrap @@ -0,0 +1,27 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +[wrap-file] +directory = orc-2.2.0 +source_url = https://www.apache.org/dyn/closer.lua?action=download&filename=orc/orc-2.2.0/orc-2.2.0.tar.gz +source_fallback_url = https://github.com/mesonbuild/wrapdb/releases/download/apache-orc_2.2.0-1/orc-2.2.0.tar.gz +source_filename = orc-2.2.0.tar.gz +source_hash = b15aca45a7e73ffbd1bbc36a78cd1422d41f07721092a25f43448e6e16f4763b +wrapdb_version = 2.2.0-1 + +[provide] +orc = orc_dep