From 678cf5a3c15f4f41a53593ef3489152a3d55f46b Mon Sep 17 00:00:00 2001 From: Yin Huai Date: Mon, 10 Feb 2020 14:48:37 -0800 Subject: [PATCH 1/6] [SPARK-30783] Exclude hive-service-rpc --- dev/deps/spark-deps-hadoop-2.7-hive-2.3 | 1 - dev/deps/spark-deps-hadoop-3.2-hive-2.3 | 1 - pom.xml | 20 ++++++++++++++++++++ 3 files changed, 20 insertions(+), 2 deletions(-) diff --git a/dev/deps/spark-deps-hadoop-2.7-hive-2.3 b/dev/deps/spark-deps-hadoop-2.7-hive-2.3 index 42bdf112efccb..c50cf96dc9065 100644 --- a/dev/deps/spark-deps-hadoop-2.7-hive-2.3 +++ b/dev/deps/spark-deps-hadoop-2.7-hive-2.3 @@ -87,7 +87,6 @@ hive-jdbc/2.3.6//hive-jdbc-2.3.6.jar hive-llap-common/2.3.6//hive-llap-common-2.3.6.jar hive-metastore/2.3.6//hive-metastore-2.3.6.jar hive-serde/2.3.6//hive-serde-2.3.6.jar -hive-service-rpc/2.3.6//hive-service-rpc-2.3.6.jar hive-shims-0.23/2.3.6//hive-shims-0.23-2.3.6.jar hive-shims-common/2.3.6//hive-shims-common-2.3.6.jar hive-shims-scheduler/2.3.6//hive-shims-scheduler-2.3.6.jar diff --git a/dev/deps/spark-deps-hadoop-3.2-hive-2.3 b/dev/deps/spark-deps-hadoop-3.2-hive-2.3 index 6006fa4b43f42..c37ce7fab36f6 100644 --- a/dev/deps/spark-deps-hadoop-3.2-hive-2.3 +++ b/dev/deps/spark-deps-hadoop-3.2-hive-2.3 @@ -86,7 +86,6 @@ hive-jdbc/2.3.6//hive-jdbc-2.3.6.jar hive-llap-common/2.3.6//hive-llap-common-2.3.6.jar hive-metastore/2.3.6//hive-metastore-2.3.6.jar hive-serde/2.3.6//hive-serde-2.3.6.jar -hive-service-rpc/2.3.6//hive-service-rpc-2.3.6.jar hive-shims-0.23/2.3.6//hive-shims-0.23-2.3.6.jar hive-shims-common/2.3.6//hive-shims-common-2.3.6.jar hive-shims-scheduler/2.3.6//hive-shims-scheduler-2.3.6.jar diff --git a/pom.xml b/pom.xml index a8d6ac932bac2..925fa28a291a4 100644 --- a/pom.xml +++ b/pom.xml @@ -1452,6 +1452,11 @@ ${hive.group} hive-service + + + ${hive.group} + hive-service-rpc + ${hive.group} hive-shims @@ -1508,6 +1513,11 @@ ${hive.group} hive-service + + + ${hive.group} + hive-service-rpc + ${hive.group} hive-shims @@ -1761,6 +1771,11 @@ ${hive.group} hive-service + + + ${hive.group} + hive-service-rpc + ${hive.group} hive-shims @@ -1911,6 +1926,11 @@ groovy-all + + + ${hive.group} + hive-service-rpc + org.apache.parquet From c39f5db37138a972e9e88b5170366329d85032bd Mon Sep 17 00:00:00 2001 From: Yin Huai Date: Mon, 10 Feb 2020 19:38:52 -0800 Subject: [PATCH 2/6] Change maven to use orc-nohive --- dev/deps/spark-deps-hadoop-2.7-hive-2.3 | 4 ++-- dev/deps/spark-deps-hadoop-3.2-hive-2.3 | 4 ++-- pom.xml | 3 +-- 3 files changed, 5 insertions(+), 6 deletions(-) diff --git a/dev/deps/spark-deps-hadoop-2.7-hive-2.3 b/dev/deps/spark-deps-hadoop-2.7-hive-2.3 index c50cf96dc9065..52dfe8db1fd82 100644 --- a/dev/deps/spark-deps-hadoop-2.7-hive-2.3 +++ b/dev/deps/spark-deps-hadoop-2.7-hive-2.3 @@ -175,8 +175,8 @@ objenesis/2.5.1//objenesis-2.5.1.jar okhttp/3.12.6//okhttp-3.12.6.jar okio/1.15.0//okio-1.15.0.jar opencsv/2.3//opencsv-2.3.jar -orc-core/1.5.9//orc-core-1.5.9.jar -orc-mapreduce/1.5.9//orc-mapreduce-1.5.9.jar +orc-core/1.5.9/nohive/orc-core-1.5.9-nohive.jar +orc-mapreduce/1.5.9/nohive/orc-mapreduce-1.5.9-nohive.jar orc-shims/1.5.9//orc-shims-1.5.9.jar oro/2.0.8//oro-2.0.8.jar osgi-resource-locator/1.0.3//osgi-resource-locator-1.0.3.jar diff --git a/dev/deps/spark-deps-hadoop-3.2-hive-2.3 b/dev/deps/spark-deps-hadoop-3.2-hive-2.3 index c37ce7fab36f6..1d3e1ed2da779 100644 --- a/dev/deps/spark-deps-hadoop-3.2-hive-2.3 +++ b/dev/deps/spark-deps-hadoop-3.2-hive-2.3 @@ -190,8 +190,8 @@ okhttp/2.7.5//okhttp-2.7.5.jar okhttp/3.12.6//okhttp-3.12.6.jar okio/1.15.0//okio-1.15.0.jar opencsv/2.3//opencsv-2.3.jar -orc-core/1.5.9//orc-core-1.5.9.jar -orc-mapreduce/1.5.9//orc-mapreduce-1.5.9.jar +orc-core/1.5.9/nohive/orc-core-1.5.9-nohive.jar +orc-mapreduce/1.5.9/nohive/orc-mapreduce-1.5.9-nohive.jar orc-shims/1.5.9//orc-shims-1.5.9.jar oro/2.0.8//oro-2.0.8.jar osgi-resource-locator/1.0.3//osgi-resource-locator-1.0.3.jar diff --git a/pom.xml b/pom.xml index 925fa28a291a4..6c1c42e66d5e7 100644 --- a/pom.xml +++ b/pom.xml @@ -136,7 +136,7 @@ 10.12.1.1 1.10.1 1.5.9 - + nohive com.twitter 1.6.0 9.4.18.v20190429 @@ -3037,7 +3037,6 @@ provided provided provided - nohive 3.2.10 From 35d9a3f5148d969dd0d7d1ecbf0e75f52e90f2d9 Mon Sep 17 00:00:00 2001 From: Yin Huai Date: Mon, 10 Feb 2020 19:56:23 -0800 Subject: [PATCH 3/6] compile --- .../sql/execution/datasources/orc/OrcColumnVector.java | 2 +- .../sql/execution/datasources/orc/OrcFilters.scala | 10 +++++----- .../sql/execution/datasources/orc/OrcShimUtils.scala | 10 +++++----- .../sql/execution/datasources/orc/OrcFilterSuite.scala | 2 +- 4 files changed, 12 insertions(+), 12 deletions(-) diff --git a/sql/core/v2.3/src/main/java/org/apache/spark/sql/execution/datasources/orc/OrcColumnVector.java b/sql/core/v2.3/src/main/java/org/apache/spark/sql/execution/datasources/orc/OrcColumnVector.java index 2f1925e69a337..9bfad1e83ee7b 100644 --- a/sql/core/v2.3/src/main/java/org/apache/spark/sql/execution/datasources/orc/OrcColumnVector.java +++ b/sql/core/v2.3/src/main/java/org/apache/spark/sql/execution/datasources/orc/OrcColumnVector.java @@ -19,7 +19,7 @@ import java.math.BigDecimal; -import org.apache.hadoop.hive.ql.exec.vector.*; +import org.apache.orc.storage.ql.exec.vector.*; import org.apache.spark.sql.types.DataType; import org.apache.spark.sql.types.Decimal; diff --git a/sql/core/v2.3/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilters.scala b/sql/core/v2.3/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilters.scala index 948ab44a8c19c..66a59759c6582 100644 --- a/sql/core/v2.3/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilters.scala +++ b/sql/core/v2.3/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilters.scala @@ -17,11 +17,11 @@ package org.apache.spark.sql.execution.datasources.orc -import org.apache.hadoop.hive.common.`type`.HiveDecimal -import org.apache.hadoop.hive.ql.io.sarg.{PredicateLeaf, SearchArgument} -import org.apache.hadoop.hive.ql.io.sarg.SearchArgument.Builder -import org.apache.hadoop.hive.ql.io.sarg.SearchArgumentFactory.newBuilder -import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable +import org.apache.orc.storage.common.`type`.HiveDecimal +import org.apache.orc.storage.ql.io.sarg.{PredicateLeaf, SearchArgument} +import org.apache.orc.storage.ql.io.sarg.SearchArgument.Builder +import org.apache.orc.storage.ql.io.sarg.SearchArgumentFactory.newBuilder +import org.apache.orc.storage.serde2.io.HiveDecimalWritable import org.apache.spark.SparkException import org.apache.spark.sql.sources.Filter diff --git a/sql/core/v2.3/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcShimUtils.scala b/sql/core/v2.3/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcShimUtils.scala index c32f024476e69..68503aba22b40 100644 --- a/sql/core/v2.3/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcShimUtils.scala +++ b/sql/core/v2.3/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcShimUtils.scala @@ -19,11 +19,11 @@ package org.apache.spark.sql.execution.datasources.orc import java.sql.Date -import org.apache.hadoop.hive.common.`type`.HiveDecimal -import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch -import org.apache.hadoop.hive.ql.io.sarg.{SearchArgument => OrcSearchArgument} -import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf.{Operator => OrcOperator} -import org.apache.hadoop.hive.serde2.io.{DateWritable, HiveDecimalWritable} +import org.apache.orc.storage.common.`type`.HiveDecimal +import org.apache.orc.storage.ql.exec.vector.VectorizedRowBatch +import org.apache.orc.storage.ql.io.sarg.{SearchArgument => OrcSearchArgument} +import org.apache.orc.storage.ql.io.sarg.PredicateLeaf.{Operator => OrcOperator} +import org.apache.orc.storage.serde2.io.{DateWritable, HiveDecimalWritable} import org.apache.spark.sql.catalyst.expressions.SpecializedGetters import org.apache.spark.sql.types.Decimal diff --git a/sql/core/v2.3/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilterSuite.scala b/sql/core/v2.3/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilterSuite.scala index 1baa69e82bb18..82ffcd22e66db 100644 --- a/sql/core/v2.3/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilterSuite.scala +++ b/sql/core/v2.3/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilterSuite.scala @@ -23,7 +23,7 @@ import java.sql.{Date, Timestamp} import scala.collection.JavaConverters._ -import org.apache.hadoop.hive.ql.io.sarg.{PredicateLeaf, SearchArgument} +import org.apache.orc.storage.ql.io.sarg.{PredicateLeaf, SearchArgument} import org.apache.spark.SparkConf import org.apache.spark.sql.{AnalysisException, Column, DataFrame} From 26e8bcffecfeab82cdca55549195dd0a5b685b18 Mon Sep 17 00:00:00 2001 From: Yin Huai Date: Mon, 10 Feb 2020 21:06:50 -0800 Subject: [PATCH 4/6] Revert "[SPARK-30783] Exclude hive-service-rpc" This reverts commit 678cf5a3c15f4f41a53593ef3489152a3d55f46b. --- dev/deps/spark-deps-hadoop-2.7-hive-2.3 | 1 + dev/deps/spark-deps-hadoop-3.2-hive-2.3 | 1 + pom.xml | 20 -------------------- 3 files changed, 2 insertions(+), 20 deletions(-) diff --git a/dev/deps/spark-deps-hadoop-2.7-hive-2.3 b/dev/deps/spark-deps-hadoop-2.7-hive-2.3 index 52dfe8db1fd82..49f100b043680 100644 --- a/dev/deps/spark-deps-hadoop-2.7-hive-2.3 +++ b/dev/deps/spark-deps-hadoop-2.7-hive-2.3 @@ -87,6 +87,7 @@ hive-jdbc/2.3.6//hive-jdbc-2.3.6.jar hive-llap-common/2.3.6//hive-llap-common-2.3.6.jar hive-metastore/2.3.6//hive-metastore-2.3.6.jar hive-serde/2.3.6//hive-serde-2.3.6.jar +hive-service-rpc/2.3.6//hive-service-rpc-2.3.6.jar hive-shims-0.23/2.3.6//hive-shims-0.23-2.3.6.jar hive-shims-common/2.3.6//hive-shims-common-2.3.6.jar hive-shims-scheduler/2.3.6//hive-shims-scheduler-2.3.6.jar diff --git a/dev/deps/spark-deps-hadoop-3.2-hive-2.3 b/dev/deps/spark-deps-hadoop-3.2-hive-2.3 index 1d3e1ed2da779..b11c5e058b586 100644 --- a/dev/deps/spark-deps-hadoop-3.2-hive-2.3 +++ b/dev/deps/spark-deps-hadoop-3.2-hive-2.3 @@ -86,6 +86,7 @@ hive-jdbc/2.3.6//hive-jdbc-2.3.6.jar hive-llap-common/2.3.6//hive-llap-common-2.3.6.jar hive-metastore/2.3.6//hive-metastore-2.3.6.jar hive-serde/2.3.6//hive-serde-2.3.6.jar +hive-service-rpc/2.3.6//hive-service-rpc-2.3.6.jar hive-shims-0.23/2.3.6//hive-shims-0.23-2.3.6.jar hive-shims-common/2.3.6//hive-shims-common-2.3.6.jar hive-shims-scheduler/2.3.6//hive-shims-scheduler-2.3.6.jar diff --git a/pom.xml b/pom.xml index 6c1c42e66d5e7..ba62aafe0da89 100644 --- a/pom.xml +++ b/pom.xml @@ -1452,11 +1452,6 @@ ${hive.group} hive-service - - - ${hive.group} - hive-service-rpc - ${hive.group} hive-shims @@ -1513,11 +1508,6 @@ ${hive.group} hive-service - - - ${hive.group} - hive-service-rpc - ${hive.group} hive-shims @@ -1771,11 +1761,6 @@ ${hive.group} hive-service - - - ${hive.group} - hive-service-rpc - ${hive.group} hive-shims @@ -1926,11 +1911,6 @@ groovy-all - - - ${hive.group} - hive-service-rpc - org.apache.parquet From 9e8791e2863acb4707f247c9beab12815e2f6aea Mon Sep 17 00:00:00 2001 From: Yin Huai Date: Tue, 11 Feb 2020 15:13:19 -0800 Subject: [PATCH 5/6] do not pull in hive-storage-api --- dev/deps/spark-deps-hadoop-2.7-hive-2.3 | 1 - dev/deps/spark-deps-hadoop-3.2-hive-2.3 | 1 - sql/core/pom.xml | 4 ---- 3 files changed, 6 deletions(-) diff --git a/dev/deps/spark-deps-hadoop-2.7-hive-2.3 b/dev/deps/spark-deps-hadoop-2.7-hive-2.3 index 49f100b043680..1e48975c00808 100644 --- a/dev/deps/spark-deps-hadoop-2.7-hive-2.3 +++ b/dev/deps/spark-deps-hadoop-2.7-hive-2.3 @@ -92,7 +92,6 @@ hive-shims-0.23/2.3.6//hive-shims-0.23-2.3.6.jar hive-shims-common/2.3.6//hive-shims-common-2.3.6.jar hive-shims-scheduler/2.3.6//hive-shims-scheduler-2.3.6.jar hive-shims/2.3.6//hive-shims-2.3.6.jar -hive-storage-api/2.7.1//hive-storage-api-2.7.1.jar hive-vector-code-gen/2.3.6//hive-vector-code-gen-2.3.6.jar hk2-api/2.6.1//hk2-api-2.6.1.jar hk2-locator/2.6.1//hk2-locator-2.6.1.jar diff --git a/dev/deps/spark-deps-hadoop-3.2-hive-2.3 b/dev/deps/spark-deps-hadoop-3.2-hive-2.3 index b11c5e058b586..bcd82e4312b17 100644 --- a/dev/deps/spark-deps-hadoop-3.2-hive-2.3 +++ b/dev/deps/spark-deps-hadoop-3.2-hive-2.3 @@ -91,7 +91,6 @@ hive-shims-0.23/2.3.6//hive-shims-0.23-2.3.6.jar hive-shims-common/2.3.6//hive-shims-common-2.3.6.jar hive-shims-scheduler/2.3.6//hive-shims-scheduler-2.3.6.jar hive-shims/2.3.6//hive-shims-2.3.6.jar -hive-storage-api/2.7.1//hive-storage-api-2.7.1.jar hive-vector-code-gen/2.3.6//hive-vector-code-gen-2.3.6.jar hk2-api/2.6.1//hk2-api-2.6.1.jar hk2-locator/2.6.1//hk2-locator-2.6.1.jar diff --git a/sql/core/pom.xml b/sql/core/pom.xml index 0e664eca6a820..f3494892b5bab 100644 --- a/sql/core/pom.xml +++ b/sql/core/pom.xml @@ -95,10 +95,6 @@ orc-mapreduce ${orc.classifier} - - org.apache.hive - hive-storage-api - org.apache.parquet parquet-column From a5039ab4541bc337d263665093aee77c2fd6c035 Mon Sep 17 00:00:00 2001 From: Yin Huai Date: Tue, 11 Feb 2020 21:48:51 -0800 Subject: [PATCH 6/6] Revert "do not pull in hive-storage-api" This reverts commit 9e8791e2863acb4707f247c9beab12815e2f6aea. --- dev/deps/spark-deps-hadoop-2.7-hive-2.3 | 1 + dev/deps/spark-deps-hadoop-3.2-hive-2.3 | 1 + sql/core/pom.xml | 4 ++++ 3 files changed, 6 insertions(+) diff --git a/dev/deps/spark-deps-hadoop-2.7-hive-2.3 b/dev/deps/spark-deps-hadoop-2.7-hive-2.3 index 1e48975c00808..49f100b043680 100644 --- a/dev/deps/spark-deps-hadoop-2.7-hive-2.3 +++ b/dev/deps/spark-deps-hadoop-2.7-hive-2.3 @@ -92,6 +92,7 @@ hive-shims-0.23/2.3.6//hive-shims-0.23-2.3.6.jar hive-shims-common/2.3.6//hive-shims-common-2.3.6.jar hive-shims-scheduler/2.3.6//hive-shims-scheduler-2.3.6.jar hive-shims/2.3.6//hive-shims-2.3.6.jar +hive-storage-api/2.7.1//hive-storage-api-2.7.1.jar hive-vector-code-gen/2.3.6//hive-vector-code-gen-2.3.6.jar hk2-api/2.6.1//hk2-api-2.6.1.jar hk2-locator/2.6.1//hk2-locator-2.6.1.jar diff --git a/dev/deps/spark-deps-hadoop-3.2-hive-2.3 b/dev/deps/spark-deps-hadoop-3.2-hive-2.3 index bcd82e4312b17..b11c5e058b586 100644 --- a/dev/deps/spark-deps-hadoop-3.2-hive-2.3 +++ b/dev/deps/spark-deps-hadoop-3.2-hive-2.3 @@ -91,6 +91,7 @@ hive-shims-0.23/2.3.6//hive-shims-0.23-2.3.6.jar hive-shims-common/2.3.6//hive-shims-common-2.3.6.jar hive-shims-scheduler/2.3.6//hive-shims-scheduler-2.3.6.jar hive-shims/2.3.6//hive-shims-2.3.6.jar +hive-storage-api/2.7.1//hive-storage-api-2.7.1.jar hive-vector-code-gen/2.3.6//hive-vector-code-gen-2.3.6.jar hk2-api/2.6.1//hk2-api-2.6.1.jar hk2-locator/2.6.1//hk2-locator-2.6.1.jar diff --git a/sql/core/pom.xml b/sql/core/pom.xml index f3494892b5bab..0e664eca6a820 100644 --- a/sql/core/pom.xml +++ b/sql/core/pom.xml @@ -95,6 +95,10 @@ orc-mapreduce ${orc.classifier} + + org.apache.hive + hive-storage-api + org.apache.parquet parquet-column