From 7cc67414a24859975e22796de9fe7e2aacc747fa Mon Sep 17 00:00:00 2001 From: Michael Armbrust Date: Mon, 24 Aug 2015 16:54:57 -0700 Subject: [PATCH 1/3] [SPARK-10198][SQL] Turn off partition verification by default --- .../scala/org/apache/spark/sql/SQLConf.scala | 2 +- .../spark/sql/hive/QueryPartitionSuite.scala | 63 ++++++++++--------- 2 files changed, 35 insertions(+), 30 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SQLConf.scala b/sql/core/src/main/scala/org/apache/spark/sql/SQLConf.scala index e9de14f025502..2974055a141b1 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/SQLConf.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/SQLConf.scala @@ -312,7 +312,7 @@ private[spark] object SQLConf { doc = "When true, enable filter pushdown for ORC files.") val HIVE_VERIFY_PARTITION_PATH = booleanConf("spark.sql.hive.verifyPartitionPath", - defaultValue = Some(true), + defaultValue = Some(false), doc = "") val HIVE_METASTORE_PARTITION_PRUNING = booleanConf("spark.sql.hive.metastorePartitionPruning", diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/QueryPartitionSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/QueryPartitionSuite.scala index 017bc2adc103b..707da7ef1fcf1 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/QueryPartitionSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/QueryPartitionSuite.scala @@ -18,50 +18,55 @@ package org.apache.spark.sql.hive import com.google.common.io.Files +import org.apache.spark.sql.test.SQLTestUtils import org.apache.spark.sql.{QueryTest, _} import org.apache.spark.util.Utils -class QueryPartitionSuite extends QueryTest { +class QueryPartitionSuite extends QueryTest with SQLTestUtils { private lazy val ctx = org.apache.spark.sql.hive.test.TestHive import ctx.implicits._ import ctx.sql + def _sqlContext = ctx + test("SPARK-5068: query data when path doesn't exist"){ - val testData = ctx.sparkContext.parallelize( - (1 to 10).map(i => TestData(i, i.toString))).toDF() - testData.registerTempTable("testData") + withSQLConf((SQLConf.HIVE_VERIFY_PARTITION_PATH.key, "false")) { + val testData = ctx.sparkContext.parallelize( + (1 to 10).map(i => TestData(i, i.toString))).toDF() + testData.registerTempTable("testData") - val tmpDir = Files.createTempDir() - // create the table for test - sql(s"CREATE TABLE table_with_partition(key int,value string) " + - s"PARTITIONED by (ds string) location '${tmpDir.toURI.toString}' ") - sql("INSERT OVERWRITE TABLE table_with_partition partition (ds='1') " + - "SELECT key,value FROM testData") - sql("INSERT OVERWRITE TABLE table_with_partition partition (ds='2') " + - "SELECT key,value FROM testData") - sql("INSERT OVERWRITE TABLE table_with_partition partition (ds='3') " + - "SELECT key,value FROM testData") - sql("INSERT OVERWRITE TABLE table_with_partition partition (ds='4') " + - "SELECT key,value FROM testData") + val tmpDir = Files.createTempDir() + // create the table for test + sql(s"CREATE TABLE table_with_partition(key int,value string) " + + s"PARTITIONED by (ds string) location '${tmpDir.toURI.toString}' ") + sql("INSERT OVERWRITE TABLE table_with_partition partition (ds='1') " + + "SELECT key,value FROM testData") + sql("INSERT OVERWRITE TABLE table_with_partition partition (ds='2') " + + "SELECT key,value FROM testData") + sql("INSERT OVERWRITE TABLE table_with_partition partition (ds='3') " + + "SELECT key,value FROM testData") + sql("INSERT OVERWRITE TABLE table_with_partition partition (ds='4') " + + "SELECT key,value FROM testData") - // test for the exist path - checkAnswer(sql("select key,value from table_with_partition"), - testData.toDF.collect ++ testData.toDF.collect - ++ testData.toDF.collect ++ testData.toDF.collect) + // test for the exist path + checkAnswer(sql("select key,value from table_with_partition"), + testData.toDF.collect ++ testData.toDF.collect + ++ testData.toDF.collect ++ testData.toDF.collect) - // delete the path of one partition - tmpDir.listFiles - .find { f => f.isDirectory && f.getName().startsWith("ds=") } - .foreach { f => Utils.deleteRecursively(f) } + // delete the path of one partition + tmpDir.listFiles + .find { f => f.isDirectory && f.getName().startsWith("ds=") } + .foreach { f => Utils.deleteRecursively(f) } - // test for after delete the path - checkAnswer(sql("select key,value from table_with_partition"), - testData.toDF.collect ++ testData.toDF.collect ++ testData.toDF.collect) + // test for after delete the path + checkAnswer(sql("select key,value from table_with_partition"), + testData.toDF.collect ++ testData.toDF.collect ++ testData.toDF.collect) - sql("DROP TABLE table_with_partition") - sql("DROP TABLE createAndInsertTest") + sql("DROP TABLE table_with_partition") + sql("DROP TABLE createAndInsertTest") + } } } From 5826f16ccda3dc90f2b5caa8e04bd47fae4ec908 Mon Sep 17 00:00:00 2001 From: Michael Armbrust Date: Mon, 24 Aug 2015 17:21:42 -0700 Subject: [PATCH 2/3] style --- .../scala/org/apache/spark/sql/hive/QueryPartitionSuite.scala | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/QueryPartitionSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/QueryPartitionSuite.scala index 707da7ef1fcf1..22530d4816663 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/QueryPartitionSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/QueryPartitionSuite.scala @@ -28,9 +28,8 @@ class QueryPartitionSuite extends QueryTest with SQLTestUtils { private lazy val ctx = org.apache.spark.sql.hive.test.TestHive import ctx.implicits._ - import ctx.sql - def _sqlContext = ctx + protected def _sqlContext = ctx test("SPARK-5068: query data when path doesn't exist"){ withSQLConf((SQLConf.HIVE_VERIFY_PARTITION_PATH.key, "false")) { From 6a4d6dbc2400294f23f0002b78f4c8d56513c355 Mon Sep 17 00:00:00 2001 From: Michael Armbrust Date: Tue, 25 Aug 2015 09:50:13 -0700 Subject: [PATCH 3/3] Update QueryPartitionSuite.scala --- .../scala/org/apache/spark/sql/hive/QueryPartitionSuite.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/QueryPartitionSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/QueryPartitionSuite.scala index 22530d4816663..1cc8a93e83411 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/QueryPartitionSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/QueryPartitionSuite.scala @@ -32,7 +32,7 @@ class QueryPartitionSuite extends QueryTest with SQLTestUtils { protected def _sqlContext = ctx test("SPARK-5068: query data when path doesn't exist"){ - withSQLConf((SQLConf.HIVE_VERIFY_PARTITION_PATH.key, "false")) { + withSQLConf((SQLConf.HIVE_VERIFY_PARTITION_PATH.key, "true")) { val testData = ctx.sparkContext.parallelize( (1 to 10).map(i => TestData(i, i.toString))).toDF() testData.registerTempTable("testData")