From 3c06e89bee2ca6542841fd7e8410e39898150d55 Mon Sep 17 00:00:00 2001 From: sandeep katta Date: Wed, 21 Aug 2019 20:09:24 +0530 Subject: [PATCH 1/5] Fix the class loader issue --- .../hive/thriftserver/SparkSQLCLIDriver.scala | 5 ++++ .../sql/hive/thriftserver/CliSuite.scala | 26 +++++++++++++++++++ 2 files changed, 31 insertions(+) diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala index b9614d49eadbd..2451304cb3120 100644 --- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala +++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala @@ -111,6 +111,11 @@ private[hive] object SparkSQLCLIDriver extends Logging { // Set all properties specified via command line. val conf: HiveConf = sessionState.getConf + // Hive 2.0.0 onwards HiveConf.getClassLoader returns the UDFClassLoader (created by Hive). + // Because of this spark cannot find the jars as class loader got changed + // Hive changed the class loader because of HIVE-11878, so it is required to use old + // classLoader as sparks loaded all the jars in this classLoader + conf.setClassLoader(Thread.currentThread().getContextClassLoader) sessionState.cmdProperties.entrySet().asScala.foreach { item => val key = item.getKey.toString val value = item.getValue.toString diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala index a979fe6b89679..2c82ed62b2a6e 100644 --- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala +++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala @@ -305,4 +305,30 @@ class CliSuite extends SparkFunSuite with BeforeAndAfterAll with Logging { "SELECT example_format('%o', 93);" -> "135" ) } + + test("SPARK-28840 test --jars command") { + val jarFile = new File("../../sql/hive/src/test/resources/SPARK-21101-1.0.jar").getCanonicalPath + runCliWithin( + 1.minute, + Seq("--jars", s"$jarFile"))( + "CREATE TEMPORARY FUNCTION testjar AS" + + " 'org.apache.spark.sql.hive.execution.UDTFStack';" -> "", + "SELECT testjar(1,'A', 10);" -> "A\t10" + ) + } + + test("SPARK-28840 test --jars and hive.aux.jars.path command") { + val jarFile = new File("../../sql/hive/src/test/resources/SPARK-21101-1.0.jar").getCanonicalPath + val hiveContribJar = HiveTestUtils.getHiveContribJar.getCanonicalPath + runCliWithin( + 1.minute, + Seq("--jars", s"$jarFile", "--conf", + s"spark.hadoop.${ConfVars.HIVEAUXJARS}=$hiveContribJar"))( + "CREATE TEMPORARY FUNCTION testjar AS" + + " 'org.apache.spark.sql.hive.execution.UDTFStack';" -> "", + "SELECT testjar(1,'A', 10);" -> "A\t10", + s"CREATE TEMPORARY FUNCTION example_max AS '${classOf[UDAFExampleMax].getName}';" -> "", + "SELECT example_max(1);" -> "1" + ) + } } From be6d653a036bc5dfc188f0e5c58126fa4f2b1de7 Mon Sep 17 00:00:00 2001 From: angerszhu Date: Wed, 4 Sep 2019 14:08:55 +0800 Subject: [PATCH 2/5] spark-28954 Use SessionResourceLoader to cover HIVEAUXJARS conf jars --- .../hive/thriftserver/SparkSQLCLIDriver.scala | 24 ++++++++----------- .../thriftserver/ThriftserverShimUtils.scala | 7 ------ .../thriftserver/ThriftserverShimUtils.scala | 10 -------- 3 files changed, 10 insertions(+), 31 deletions(-) diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala index 2451304cb3120..9f554b200f775 100644 --- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala +++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala @@ -138,20 +138,7 @@ private[hive] object SparkSQLCLIDriver extends Logging { // Clean up after we exit ShutdownHookManager.addShutdownHook { () => SparkSQLEnv.stop() } - val remoteMode = isRemoteMode(sessionState) - // "-h" option has been passed, so connect to Hive thrift server. - if (!remoteMode) { - // Hadoop-20 and above - we need to augment classpath using hiveconf - // components. - // See also: code in ExecDriver.java - var loader = conf.getClassLoader - val auxJars = HiveConf.getVar(conf, HiveConf.ConfVars.HIVEAUXJARS) - if (StringUtils.isNotBlank(auxJars)) { - loader = ThriftserverShimUtils.addToClassPath(loader, StringUtils.split(auxJars, ",")) - } - conf.setClassLoader(loader) - Thread.currentThread().setContextClassLoader(loader) - } else { + if (isRemoteMode(sessionState)) { // Hive 1.2 + not supported in CLI throw new RuntimeException("Remote operations not supported") } @@ -169,6 +156,15 @@ private[hive] object SparkSQLCLIDriver extends Logging { val cli = new SparkSQLCLIDriver cli.setHiveVariables(oproc.getHiveVariables) + // In SparkSQL CLI, we may want to use jars augmented by hiveconf + // hive.aux.jars.path, here we add jars augmented by hiveconf to + // Spark's SessionResourceLoader to obtain these jars. + val auxJars = HiveConf.getVar(conf, HiveConf.ConfVars.HIVEAUXJARS) + if (StringUtils.isNotBlank(auxJars)) { + val resourceLoader = SparkSQLEnv.sqlContext.sessionState.resourceLoader + StringUtils.split(auxJars, ",").foreach(resourceLoader.addJar(_)) + } + // TODO work around for set the log output to console, because the HiveContext // will set the output into an invalid buffer. sessionState.in = System.in diff --git a/sql/hive-thriftserver/v1.2.1/src/main/scala/org/apache/spark/sql/hive/thriftserver/ThriftserverShimUtils.scala b/sql/hive-thriftserver/v1.2.1/src/main/scala/org/apache/spark/sql/hive/thriftserver/ThriftserverShimUtils.scala index 837861a77bf54..fbfc698ecb4bf 100644 --- a/sql/hive-thriftserver/v1.2.1/src/main/scala/org/apache/spark/sql/hive/thriftserver/ThriftserverShimUtils.scala +++ b/sql/hive-thriftserver/v1.2.1/src/main/scala/org/apache/spark/sql/hive/thriftserver/ThriftserverShimUtils.scala @@ -18,7 +18,6 @@ package org.apache.spark.sql.hive.thriftserver import org.apache.commons.logging.LogFactory -import org.apache.hadoop.hive.ql.exec.Utilities import org.apache.hadoop.hive.ql.session.SessionState import org.apache.hive.service.cli.{RowSet, RowSetFactory, TableSchema, Type} import org.apache.hive.service.cli.Type._ @@ -60,12 +59,6 @@ private[thriftserver] object ThriftserverShimUtils { ARRAY_TYPE, MAP_TYPE, STRUCT_TYPE) } - private[thriftserver] def addToClassPath( - loader: ClassLoader, - auxJars: Array[String]): ClassLoader = { - Utilities.addToClassPath(loader, auxJars) - } - private[thriftserver] val testedProtocolVersions = Seq( HIVE_CLI_SERVICE_PROTOCOL_V1, HIVE_CLI_SERVICE_PROTOCOL_V2, diff --git a/sql/hive-thriftserver/v2.3.5/src/main/scala/org/apache/spark/sql/hive/thriftserver/ThriftserverShimUtils.scala b/sql/hive-thriftserver/v2.3.5/src/main/scala/org/apache/spark/sql/hive/thriftserver/ThriftserverShimUtils.scala index cb32ebed0ac11..055f6a976885a 100644 --- a/sql/hive-thriftserver/v2.3.5/src/main/scala/org/apache/spark/sql/hive/thriftserver/ThriftserverShimUtils.scala +++ b/sql/hive-thriftserver/v2.3.5/src/main/scala/org/apache/spark/sql/hive/thriftserver/ThriftserverShimUtils.scala @@ -19,9 +19,6 @@ package org.apache.spark.sql.hive.thriftserver import java.security.AccessController -import scala.collection.JavaConverters._ - -import org.apache.hadoop.hive.ql.exec.AddToClassPathAction import org.apache.hadoop.hive.ql.session.SessionState import org.apache.hadoop.hive.serde2.thrift.Type import org.apache.hadoop.hive.serde2.thrift.Type._ @@ -65,13 +62,6 @@ private[thriftserver] object ThriftserverShimUtils { ARRAY_TYPE, MAP_TYPE, STRUCT_TYPE) } - private[thriftserver] def addToClassPath( - loader: ClassLoader, - auxJars: Array[String]): ClassLoader = { - val addAction = new AddToClassPathAction(loader, auxJars.toList.asJava) - AccessController.doPrivileged(addAction) - } - private[thriftserver] val testedProtocolVersions = Seq( HIVE_CLI_SERVICE_PROTOCOL_V1, HIVE_CLI_SERVICE_PROTOCOL_V2, From 30ff7a295c73614e623fb87dd50326c02bea1ccf Mon Sep 17 00:00:00 2001 From: sandeep katta Date: Fri, 6 Sep 2019 16:27:38 +0530 Subject: [PATCH 3/5] fix review comments --- .../org/apache/spark/sql/hive/thriftserver/CliSuite.scala | 6 +++--- .../spark/sql/hive/thriftserver/ThriftserverShimUtils.scala | 2 -- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala index 2c82ed62b2a6e..6d5e898f846fe 100644 --- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala +++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala @@ -313,7 +313,7 @@ class CliSuite extends SparkFunSuite with BeforeAndAfterAll with Logging { Seq("--jars", s"$jarFile"))( "CREATE TEMPORARY FUNCTION testjar AS" + " 'org.apache.spark.sql.hive.execution.UDTFStack';" -> "", - "SELECT testjar(1,'A', 10);" -> "A\t10" + "SELECT testjar(1,'TEST-SPARK-TEST-jar', 28840);" -> "TEST-SPARK-TEST-jar\t28840" ) } @@ -326,9 +326,9 @@ class CliSuite extends SparkFunSuite with BeforeAndAfterAll with Logging { s"spark.hadoop.${ConfVars.HIVEAUXJARS}=$hiveContribJar"))( "CREATE TEMPORARY FUNCTION testjar AS" + " 'org.apache.spark.sql.hive.execution.UDTFStack';" -> "", - "SELECT testjar(1,'A', 10);" -> "A\t10", + "SELECT testjar(1,'TEST-SPARK-TEST-jar', 28840);" -> "TEST-SPARK-TEST-jar\t28840", s"CREATE TEMPORARY FUNCTION example_max AS '${classOf[UDAFExampleMax].getName}';" -> "", - "SELECT example_max(1);" -> "1" + "SELECT concat_ws(',', 'First', example_max(1234321), 'Third');" -> "First,1234321,Third" ) } } diff --git a/sql/hive-thriftserver/v2.3.5/src/main/scala/org/apache/spark/sql/hive/thriftserver/ThriftserverShimUtils.scala b/sql/hive-thriftserver/v2.3.5/src/main/scala/org/apache/spark/sql/hive/thriftserver/ThriftserverShimUtils.scala index 055f6a976885a..850382fe2bfd7 100644 --- a/sql/hive-thriftserver/v2.3.5/src/main/scala/org/apache/spark/sql/hive/thriftserver/ThriftserverShimUtils.scala +++ b/sql/hive-thriftserver/v2.3.5/src/main/scala/org/apache/spark/sql/hive/thriftserver/ThriftserverShimUtils.scala @@ -17,8 +17,6 @@ package org.apache.spark.sql.hive.thriftserver -import java.security.AccessController - import org.apache.hadoop.hive.ql.session.SessionState import org.apache.hadoop.hive.serde2.thrift.Type import org.apache.hadoop.hive.serde2.thrift.Type._ From 47f8632f1e9959611d5a7ffca4453cedabc621ee Mon Sep 17 00:00:00 2001 From: sandeep katta Date: Tue, 10 Sep 2019 22:23:23 +0530 Subject: [PATCH 4/5] avoild using ClassOf[HiveCode] --- .../scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala index 6d5e898f846fe..069fd78670006 100644 --- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala +++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala @@ -327,7 +327,7 @@ class CliSuite extends SparkFunSuite with BeforeAndAfterAll with Logging { "CREATE TEMPORARY FUNCTION testjar AS" + " 'org.apache.spark.sql.hive.execution.UDTFStack';" -> "", "SELECT testjar(1,'TEST-SPARK-TEST-jar', 28840);" -> "TEST-SPARK-TEST-jar\t28840", - s"CREATE TEMPORARY FUNCTION example_max AS '${classOf[UDAFExampleMax].getName}';" -> "", + "CREATE TEMPORARY FUNCTION example_max AS 'org.apache.hadoop.hive.contrib.udaf.example.UDAFExampleMax';" -> "", "SELECT concat_ws(',', 'First', example_max(1234321), 'Third');" -> "First,1234321,Third" ) } From 4a79580407396c071ca0e6bc98fc2ff7851ea72f Mon Sep 17 00:00:00 2001 From: sandeep katta Date: Wed, 11 Sep 2019 08:34:37 +0530 Subject: [PATCH 5/5] avoild using ClassOf[HiveCode] --- .../org/apache/spark/sql/hive/thriftserver/CliSuite.scala | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala index 069fd78670006..6d45041e12821 100644 --- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala +++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala @@ -327,7 +327,8 @@ class CliSuite extends SparkFunSuite with BeforeAndAfterAll with Logging { "CREATE TEMPORARY FUNCTION testjar AS" + " 'org.apache.spark.sql.hive.execution.UDTFStack';" -> "", "SELECT testjar(1,'TEST-SPARK-TEST-jar', 28840);" -> "TEST-SPARK-TEST-jar\t28840", - "CREATE TEMPORARY FUNCTION example_max AS 'org.apache.hadoop.hive.contrib.udaf.example.UDAFExampleMax';" -> "", + "CREATE TEMPORARY FUNCTION example_max AS " + + "'org.apache.hadoop.hive.contrib.udaf.example.UDAFExampleMax';" -> "", "SELECT concat_ws(',', 'First', example_max(1234321), 'Third');" -> "First,1234321,Third" ) }