From 09ea8f16e7ccbbdbdef0372156c5711daaf65231 Mon Sep 17 00:00:00 2001 From: Kent Yao Date: Fri, 27 Mar 2020 23:02:04 +0800 Subject: [PATCH 1/3] [WIP][TEST] Eliminate CliSuite flakiness --- .../sql/hive/thriftserver/CliSuite.scala | 90 +++++++++++-------- 1 file changed, 53 insertions(+), 37 deletions(-) diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala index c393054051fc4..793ee1fd1a5fa 100644 --- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala +++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala @@ -39,7 +39,7 @@ import org.apache.spark.util.{ThreadUtils, Utils} /** * A test suite for the `spark-sql` CLI tool. */ -class CliSuite extends SparkFunSuite with BeforeAndAfterAll with BeforeAndAfterEach with Logging { +class CliSuite extends SparkFunSuite with BeforeAndAfterAll with Logging { val warehousePath = Utils.createTempDir() val metastorePath = Utils.createTempDir() val scratchDirPath = Utils.createTempDir() @@ -62,12 +62,6 @@ class CliSuite extends SparkFunSuite with BeforeAndAfterAll with BeforeAndAfterE } } - override def afterEach(): Unit = { - // Only running `runCliWithin` in a single test case will share the same temporary - // Hive metastore - Utils.deleteRecursively(metastorePath) - } - /** * Run a CLI operation and expect all the queries and expected answers to be returned. * @@ -84,7 +78,8 @@ class CliSuite extends SparkFunSuite with BeforeAndAfterAll with BeforeAndAfterE extraArgs: Seq[String] = Seq.empty, errorResponses: Seq[String] = Seq("Error:"), maybeWarehouse: Option[File] = Some(warehousePath), - useExternalHiveFile: Boolean = false)( + useExternalHiveFile: Boolean = false, + maybeMetastore: Option[File] = Some(metastorePath))( queriesAndExpectedAnswers: (String, String)*): Unit = { val (queries, expectedAnswers) = queriesAndExpectedAnswers.unzip @@ -98,9 +93,11 @@ class CliSuite extends SparkFunSuite with BeforeAndAfterAll with BeforeAndAfterE } val warehouseConf = maybeWarehouse.map(dir => s"--hiveconf ${ConfVars.METASTOREWAREHOUSE}=$dir").getOrElse("") + // whether to use a separated derby metastore + val metastore = maybeMetastore.getOrElse(metastorePath) val command = { val cliScript = "../../bin/spark-sql".split("/").mkString(File.separator) - val jdbcUrl = s"jdbc:derby:;databaseName=$metastorePath;create=true" + val jdbcUrl = s"jdbc:derby:;databaseName=$metastore;create=true" s"""$cliScript | --master local | --driver-java-options -Dderby.system.durability=test @@ -177,9 +174,18 @@ class CliSuite extends SparkFunSuite with BeforeAndAfterAll with BeforeAndAfterE } test("load warehouse dir from hive-site.xml") { - runCliWithin(1.minute, maybeWarehouse = None, useExternalHiveFile = true)( - "desc database default;" -> "hive_one", - "set spark.sql.warehouse.dir;" -> "hive_one") + val metastore = Utils.createTempDir() + metastore.delete() + try { + runCliWithin(1.minute, + maybeWarehouse = None, + useExternalHiveFile = true, + maybeMetastore = Some(metastore))( + "desc database default;" -> "hive_one", + "set spark.sql.warehouse.dir;" -> "hive_one") + } finally { + Utils.deleteRecursively(metastore) + } } test("load warehouse dir from --hiveconf") { @@ -193,35 +199,45 @@ class CliSuite extends SparkFunSuite with BeforeAndAfterAll with BeforeAndAfterE test("load warehouse dir from --conf spark(.hadoop).hive.*") { // override conf from hive-site.xml - runCliWithin( - 2.minute, - extraArgs = Seq("--conf", s"spark.hadoop.${ConfVars.METASTOREWAREHOUSE}=$sparkWareHouseDir"), - maybeWarehouse = None, - useExternalHiveFile = true)( - "desc database default;" -> sparkWareHouseDir.getAbsolutePath, - "create database cliTestDb;" -> "", - "desc database cliTestDb;" -> sparkWareHouseDir.getAbsolutePath, - "set spark.sql.warehouse.dir;" -> sparkWareHouseDir.getAbsolutePath) - - // override conf from --hiveconf too - runCliWithin( - 2.minute, - extraArgs = Seq("--conf", s"spark.${ConfVars.METASTOREWAREHOUSE}=$sparkWareHouseDir"))( - "desc database default;" -> sparkWareHouseDir.getAbsolutePath, - "create database cliTestDb;" -> "", - "desc database cliTestDb;" -> sparkWareHouseDir.getAbsolutePath, - "set spark.sql.warehouse.dir;" -> sparkWareHouseDir.getAbsolutePath) + val metastore = Utils.createTempDir() + metastore.delete() + try { + runCliWithin(2.minute, + extraArgs = + Seq("--conf", s"spark.hadoop.${ConfVars.METASTOREWAREHOUSE}=$sparkWareHouseDir"), + maybeWarehouse = None, + useExternalHiveFile = true, + maybeMetastore = Some(metastore))( + "desc database default;" -> sparkWareHouseDir.getAbsolutePath, + "create database cliTestDb;" -> "", + "desc database cliTestDb;" -> sparkWareHouseDir.getAbsolutePath, + "set spark.sql.warehouse.dir;" -> sparkWareHouseDir.getAbsolutePath) + + // override conf from --hiveconf too + runCliWithin(2.minute, + extraArgs = Seq("--conf", s"spark.${ConfVars.METASTOREWAREHOUSE}=$sparkWareHouseDir"), + maybeMetastore = Some(metastore))( + "desc database default;" -> sparkWareHouseDir.getAbsolutePath, + "create database cliTestDb;" -> "", + "desc database cliTestDb;" -> sparkWareHouseDir.getAbsolutePath, + "set spark.sql.warehouse.dir;" -> sparkWareHouseDir.getAbsolutePath) + } finally { + Utils.deleteRecursively(metastore) + } } test("load warehouse dir from spark.sql.warehouse.dir") { // spark.sql.warehouse.dir overrides all hive ones - runCliWithin( - 2.minute, - extraArgs = - Seq("--conf", - s"${StaticSQLConf.WAREHOUSE_PATH.key}=${sparkWareHouseDir}1", - "--conf", s"spark.hadoop.${ConfVars.METASTOREWAREHOUSE}=${sparkWareHouseDir}2"))( - "desc database default;" -> sparkWareHouseDir.getAbsolutePath.concat("1")) + val metastore = Utils.createTempDir() + metastore.delete() + try { + runCliWithin(2.minute, + extraArgs = Seq( + "--conf", s"${StaticSQLConf.WAREHOUSE_PATH.key}=${sparkWareHouseDir}1", + "--conf", s"spark.hadoop.${ConfVars.METASTOREWAREHOUSE}=${sparkWareHouseDir}2"), + maybeMetastore = Some(metastore))( + "desc database default;" -> sparkWareHouseDir.getAbsolutePath.concat("1")) + } } test("Simple commands") { From 1ef7ffd5b3051beaa6d3650f1a0278c2afe7f1f6 Mon Sep 17 00:00:00 2001 From: Kent Yao Date: Thu, 30 Apr 2020 01:20:21 +0800 Subject: [PATCH 2/3] address comments and add comments --- .../spark/sql/hive/thriftserver/CliSuite.scala | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala index 1987d1bd33495..08851611010a6 100644 --- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala +++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala @@ -71,6 +71,12 @@ class CliSuite extends SparkFunSuite with BeforeAndAfterAll with Logging { * is taken as an immediate error condition. That is: if a line containing * with one of these strings is found, fail the test immediately. * The default value is `Seq("Error:")` + * @param maybeWarehouse an option for warehouse path, which will be set via + * `hive.metastore.warehouse.dir`. + * @param useExternalHiveFile whether to load the hive-site.xml from `src/test/noclasspath` or + * not, disabled by default + * @param metastore which path the embedded derby database for metastore locates. Use the the + * global `metastorePath` by default * @param queriesAndExpectedAnswers one or more tuples of query + answer */ def runCliWithin( @@ -79,7 +85,7 @@ class CliSuite extends SparkFunSuite with BeforeAndAfterAll with Logging { errorResponses: Seq[String] = Seq("Error:"), maybeWarehouse: Option[File] = Some(warehousePath), useExternalHiveFile: Boolean = false, - maybeMetastore: Option[File] = Some(metastorePath))( + metastore: File = metastorePath)( queriesAndExpectedAnswers: (String, String)*): Unit = { // Explicitly adds ENTER for each statement to make sure they are actually entered into the CLI. @@ -110,7 +116,6 @@ class CliSuite extends SparkFunSuite with BeforeAndAfterAll with Logging { val warehouseConf = maybeWarehouse.map(dir => s"--hiveconf ${ConfVars.METASTOREWAREHOUSE}=$dir").getOrElse("") // whether to use a separated derby metastore - val metastore = maybeMetastore.getOrElse(metastorePath) val command = { val cliScript = "../../bin/spark-sql".split("/").mkString(File.separator) val jdbcUrl = s"jdbc:derby:;databaseName=$metastore;create=true" @@ -205,7 +210,7 @@ class CliSuite extends SparkFunSuite with BeforeAndAfterAll with Logging { runCliWithin(1.minute, maybeWarehouse = None, useExternalHiveFile = true, - maybeMetastore = Some(metastore))( + metastore = metastore)( "desc database default;" -> "hive_one", "set spark.sql.warehouse.dir;" -> "hive_one") } finally { @@ -232,7 +237,7 @@ class CliSuite extends SparkFunSuite with BeforeAndAfterAll with Logging { Seq("--conf", s"spark.hadoop.${ConfVars.METASTOREWAREHOUSE}=$sparkWareHouseDir"), maybeWarehouse = None, useExternalHiveFile = true, - maybeMetastore = Some(metastore))( + metastore = metastore)( "desc database default;" -> sparkWareHouseDir.getAbsolutePath, "create database cliTestDb;" -> "", "desc database cliTestDb;" -> sparkWareHouseDir.getAbsolutePath, @@ -241,7 +246,7 @@ class CliSuite extends SparkFunSuite with BeforeAndAfterAll with Logging { // override conf from --hiveconf too runCliWithin(2.minute, extraArgs = Seq("--conf", s"spark.${ConfVars.METASTOREWAREHOUSE}=$sparkWareHouseDir"), - maybeMetastore = Some(metastore))( + metastore = metastore)( "desc database default;" -> sparkWareHouseDir.getAbsolutePath, "create database cliTestDb;" -> "", "desc database cliTestDb;" -> sparkWareHouseDir.getAbsolutePath, @@ -260,7 +265,7 @@ class CliSuite extends SparkFunSuite with BeforeAndAfterAll with Logging { extraArgs = Seq( "--conf", s"${StaticSQLConf.WAREHOUSE_PATH.key}=${sparkWareHouseDir}1", "--conf", s"spark.hadoop.${ConfVars.METASTOREWAREHOUSE}=${sparkWareHouseDir}2"), - maybeMetastore = Some(metastore))( + metastore = metastore)( "desc database default;" -> sparkWareHouseDir.getAbsolutePath.concat("1")) } } From cfb95c0f2ed8532edf0339556468e92f70567089 Mon Sep 17 00:00:00 2001 From: Kent Yao Date: Sat, 16 May 2020 13:52:14 +0800 Subject: [PATCH 3/3] address comments --- .../org/apache/spark/sql/hive/thriftserver/CliSuite.scala | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala index 08851611010a6..3b6dad69a78a1 100644 --- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala +++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala @@ -115,7 +115,6 @@ class CliSuite extends SparkFunSuite with BeforeAndAfterAll with Logging { } val warehouseConf = maybeWarehouse.map(dir => s"--hiveconf ${ConfVars.METASTOREWAREHOUSE}=$dir").getOrElse("") - // whether to use a separated derby metastore val command = { val cliScript = "../../bin/spark-sql".split("/").mkString(File.separator) val jdbcUrl = s"jdbc:derby:;databaseName=$metastore;create=true" @@ -267,6 +266,8 @@ class CliSuite extends SparkFunSuite with BeforeAndAfterAll with Logging { "--conf", s"spark.hadoop.${ConfVars.METASTOREWAREHOUSE}=${sparkWareHouseDir}2"), metastore = metastore)( "desc database default;" -> sparkWareHouseDir.getAbsolutePath.concat("1")) + } finally { + Utils.deleteRecursively(metastore) } }