From eee85e2a2095bc7f8bd162b3577b9d1eeeb3dcee Mon Sep 17 00:00:00 2001 From: ulysses Date: Thu, 13 Aug 2020 12:57:13 +0800 Subject: [PATCH 1/8] init --- .../sql/hive/client/HiveClientImpl.scala | 27 +++++++++++++++++-- .../sql/hive/execution/HiveSQLViewSuite.scala | 22 ++++++++++++++- 2 files changed, 46 insertions(+), 3 deletions(-) diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala index 8aa60365f3c1d..882530e3ef324 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala @@ -986,7 +986,8 @@ private[hive] object HiveClientImpl extends Logging { val typeString = if (c.metadata.contains(HIVE_TYPE_STRING)) { c.metadata.getString(HIVE_TYPE_STRING) } else { - c.dataType.catalogString + // replace NullType to HiveVoidType since Hive parse void not null. + HiveVoidType.replaceVoidType(c.dataType).catalogString } new FieldSchema(c.name, typeString, c.getComment().orNull) } @@ -1005,7 +1006,8 @@ private[hive] object HiveClientImpl extends Logging { /** Builds the native StructField from Hive's FieldSchema. */ def fromHiveColumn(hc: FieldSchema): StructField = { val columnType = getSparkSQLDataType(hc) - val metadata = if (hc.getType != columnType.catalogString) { + val metadata = if (hc.getType != columnType.catalogString && + hc.getType != HiveVoidType.catalogString) { new MetadataBuilder().putString(HIVE_TYPE_STRING, hc.getType).build() } else { Metadata.empty @@ -1273,3 +1275,24 @@ private[hive] object HiveClientImpl extends Logging { hiveConf } } + +class HiveVoidType extends DataType { + override def defaultSize: Int = 1 + override def asNullable: HiveVoidType = this + override def simpleString: String = "void" +} + +case object HiveVoidType extends HiveVoidType { + def replaceVoidType(dt: DataType): DataType = dt match { + case ArrayType(et, nullable) => + ArrayType(replaceVoidType(et), nullable) + case MapType(kt, vt, nullable) => + MapType(replaceVoidType(kt), replaceVoidType(vt), nullable) + case StructType(fields) => + StructType(fields.map { field => + field.copy(dataType = replaceVoidType(field.dataType)) + }) + case _: NullType => HiveVoidType + case _ => dt + } +} \ No newline at end of file diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveSQLViewSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveSQLViewSuite.scala index fa43ff14fd796..cd22433540125 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveSQLViewSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveSQLViewSuite.scala @@ -22,7 +22,7 @@ import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.catalyst.catalog.{CatalogStorageFormat, CatalogTable, CatalogTableType} import org.apache.spark.sql.execution.SQLViewSuite import org.apache.spark.sql.hive.test.{TestHive, TestHiveSingleton} -import org.apache.spark.sql.types.StructType +import org.apache.spark.sql.types.{NullType, StructType} /** * A test suite for Hive view related functionality. @@ -137,4 +137,24 @@ class HiveSQLViewSuite extends SQLViewSuite with TestHiveSingleton { } } } + + test("Add HiveVoidType to compatible with Hive void type") { + withView("v1") { + sql("create view v1 as select null as c") + val df = sql("select * from v1") + assert(df.schema.fields.head.dataType == NullType) + checkAnswer( + df, + Row(null) + ) + + sql("alter view v1 as select null as c1, 1 as c2") + val df2 = sql("select * from v1") + assert(df2.schema.fields.head.dataType == NullType) + checkAnswer( + df2, + Row(null, 1) + ) + } + } } From de51cbc9fc50315042a5c2825fbba3728aa26c31 Mon Sep 17 00:00:00 2001 From: ulysses Date: Thu, 13 Aug 2020 12:59:07 +0800 Subject: [PATCH 2/8] fix type --- .../scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala index 882530e3ef324..1c387dd745d76 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala @@ -1007,7 +1007,7 @@ private[hive] object HiveClientImpl extends Logging { def fromHiveColumn(hc: FieldSchema): StructField = { val columnType = getSparkSQLDataType(hc) val metadata = if (hc.getType != columnType.catalogString && - hc.getType != HiveVoidType.catalogString) { + hc.getType != HiveVoidType.catalogString) { new MetadataBuilder().putString(HIVE_TYPE_STRING, hc.getType).build() } else { Metadata.empty From ad890a4bfeb25732dd162dee3009d1e802fcbec8 Mon Sep 17 00:00:00 2001 From: ulysses Date: Thu, 13 Aug 2020 13:08:07 +0800 Subject: [PATCH 3/8] fix typo --- .../scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala index 1c387dd745d76..c746de1b006a5 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala @@ -1295,4 +1295,4 @@ case object HiveVoidType extends HiveVoidType { case _: NullType => HiveVoidType case _ => dt } -} \ No newline at end of file +} From d20a82fbf43e2a3c5f114c5c7906d8872abf19dc Mon Sep 17 00:00:00 2001 From: ulysses Date: Thu, 13 Aug 2020 19:27:22 +0800 Subject: [PATCH 4/8] fix --- .../org/apache/spark/sql/hive/execution/HiveSQLViewSuite.scala | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveSQLViewSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveSQLViewSuite.scala index cd22433540125..b78d891a6ae19 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveSQLViewSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveSQLViewSuite.scala @@ -138,8 +138,9 @@ class HiveSQLViewSuite extends SQLViewSuite with TestHiveSingleton { } } - test("Add HiveVoidType to compatible with Hive void type") { + test("SPARK-20680: Add HiveVoidType to compatible with Hive void type") { withView("v1") { + sql("drop view if exists v1") sql("create view v1 as select null as c") val df = sql("select * from v1") assert(df.schema.fields.head.dataType == NullType) From 5150279f394f03e5cb4ea757dbdc1ca8417cdc28 Mon Sep 17 00:00:00 2001 From: ulysses Date: Thu, 13 Aug 2020 19:29:21 +0800 Subject: [PATCH 5/8] single object --- .../org/apache/spark/sql/hive/client/HiveClientImpl.scala | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala index c746de1b006a5..9ab9a7b21764b 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala @@ -1276,13 +1276,11 @@ private[hive] object HiveClientImpl extends Logging { } } -class HiveVoidType extends DataType { +case object HiveVoidType extends DataType { override def defaultSize: Int = 1 - override def asNullable: HiveVoidType = this + override def asNullable: DataType = HiveVoidType override def simpleString: String = "void" -} -case object HiveVoidType extends HiveVoidType { def replaceVoidType(dt: DataType): DataType = dt match { case ArrayType(et, nullable) => ArrayType(replaceVoidType(et), nullable) From d068b42bac8f5121ff32739696583e3e7b809ed9 Mon Sep 17 00:00:00 2001 From: ulysses Date: Thu, 13 Aug 2020 22:00:57 +0800 Subject: [PATCH 6/8] inner fields --- .../org/apache/spark/sql/hive/client/HiveClientImpl.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala index 9ab9a7b21764b..805bcb2bc3a60 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala @@ -1006,8 +1006,8 @@ private[hive] object HiveClientImpl extends Logging { /** Builds the native StructField from Hive's FieldSchema. */ def fromHiveColumn(hc: FieldSchema): StructField = { val columnType = getSparkSQLDataType(hc) - val metadata = if (hc.getType != columnType.catalogString && - hc.getType != HiveVoidType.catalogString) { + val replacedVoidType = HiveVoidType.replaceVoidType(columnType) + val metadata = if (hc.getType != replacedVoidType.catalogString) { new MetadataBuilder().putString(HIVE_TYPE_STRING, hc.getType).build() } else { Metadata.empty From b2c152a514ef30c113419dde247c262ad323023f Mon Sep 17 00:00:00 2001 From: ulysses Date: Thu, 13 Aug 2020 22:03:31 +0800 Subject: [PATCH 7/8] drop view --- .../test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala | 1 + .../org/apache/spark/sql/hive/execution/HiveSQLViewSuite.scala | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala index 2e25c40cecd78..68031cf831fec 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala @@ -731,6 +731,7 @@ abstract class SQLViewSuite extends QueryTest with SQLTestUtils { sql("CREATE DATABASE IF NOT EXISTS db2") sql("USE db2") checkAnswer(spark.table("default.v1"), Row(1)) + sql("DROP VIEW v1") } } } diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveSQLViewSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveSQLViewSuite.scala index b78d891a6ae19..da7dfd05f33d6 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveSQLViewSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveSQLViewSuite.scala @@ -140,7 +140,6 @@ class HiveSQLViewSuite extends SQLViewSuite with TestHiveSingleton { test("SPARK-20680: Add HiveVoidType to compatible with Hive void type") { withView("v1") { - sql("drop view if exists v1") sql("create view v1 as select null as c") val df = sql("select * from v1") assert(df.schema.fields.head.dataType == NullType) From 57d8fd86c93caf34d1586175f96df173a6239946 Mon Sep 17 00:00:00 2001 From: ulysses Date: Fri, 14 Aug 2020 08:00:55 +0800 Subject: [PATCH 8/8] fix v1 --- .../scala/org/apache/spark/sql/execution/SQLViewSuite.scala | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala index 68031cf831fec..f3cae24527d60 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala @@ -723,7 +723,7 @@ abstract class SQLViewSuite extends QueryTest with SQLTestUtils { test("sparkSession API view resolution with different default database") { withDatabase("db2") { - withView("v1") { + withView("default.v1") { withTable("t1") { sql("USE default") sql("CREATE TABLE t1 USING parquet AS SELECT 1 AS c0") @@ -731,7 +731,6 @@ abstract class SQLViewSuite extends QueryTest with SQLTestUtils { sql("CREATE DATABASE IF NOT EXISTS db2") sql("USE db2") checkAnswer(spark.table("default.v1"), Row(1)) - sql("DROP VIEW v1") } } }