From 92474c5a142fb9db2c86549c8347f910fc01fcbd Mon Sep 17 00:00:00 2001 From: gatorsmile Date: Sun, 28 Aug 2016 15:28:15 -0700 Subject: [PATCH 1/2] remove stats-related props --- .../spark/sql/execution/command/tables.scala | 6 +++-- .../sql/hive/client/HiveClientImpl.scala | 4 ++- .../spark/sql/hive/ShowCreateTableSuite.scala | 27 ++++++++++++------- 3 files changed, 25 insertions(+), 12 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala index b4a15b8b2882e..5201b2c460ff3 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala @@ -794,8 +794,10 @@ case class ShowCreateTableCommand(table: TableIdentifier) extends RunnableComman private def showHiveTableProperties(metadata: CatalogTable, builder: StringBuilder): Unit = { if (metadata.properties.nonEmpty) { val filteredProps = metadata.properties.filterNot { - // Skips "EXTERNAL" property for external tables - case (key, _) => key == "EXTERNAL" && metadata.tableType == EXTERNAL + // Skips all the stats info (See the JIRA: HIVE-13792) + case (key, _) => + key == "numFiles" || key == "numRows" || key == "totalSize" || key == "numPartitions" || + key == "rawDataSize" || key == "COLUMN_STATS_ACCURATE" } val props = filteredProps.map { case (key, value) => diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala index 81d5a124e9d4a..bc40ec24bfae1 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala @@ -400,7 +400,9 @@ private[hive] class HiveClientImpl( properties = Option(h.getTTable.getSd.getSerdeInfo.getParameters) .map(_.asScala.toMap).orNull ), - properties = properties.filter(kv => kv._1 != "comment"), + // For EXTERNAL_TABLE, the table properties has a particular field "EXTERNAL". This is added + // in the function toHiveTable. + properties = properties.filter(kv => kv._1 != "comment" && kv._1 != "EXTERNAL"), comment = properties.get("comment"), viewOriginalText = Option(h.getViewOriginalText), viewText = Option(h.getViewExpandedText), diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/ShowCreateTableSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/ShowCreateTableSuite.scala index e925921165d6a..762435b1b2066 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/ShowCreateTableSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/ShowCreateTableSuite.scala @@ -277,12 +277,24 @@ class ShowCreateTableSuite extends QueryTest with SQLTestUtils with TestHiveSing checkCreateTableOrView(TableIdentifier(table, Some("default")), "VIEW") } + // These table properties should not be included in the output statement of SHOW CREATE TABLE + val excludedTableProperties = Set( + // The following are hive-generated statistics fields + "COLUMN_STATS_ACCURATE", + "numFiles", + "numPartitions", + "numRows", + "rawDataSize", + "totalSize" + ) + private def checkCreateTableOrView(table: TableIdentifier, checkType: String): Unit = { val db = table.database.getOrElse("default") val expected = spark.sharedState.externalCatalog.getTable(db, table.table) val shownDDL = sql(s"SHOW CREATE TABLE ${table.quotedString}").head().getString(0) sql(s"DROP $checkType ${table.quotedString}") + checkExcludedTableProperties(shownDDL) try { sql(shownDDL) val actual = spark.sharedState.externalCatalog.getTable(db, table.table) @@ -292,6 +304,10 @@ class ShowCreateTableSuite extends QueryTest with SQLTestUtils with TestHiveSing } } + private def checkExcludedTableProperties(shownDDL: String): Unit = { + excludedTableProperties.foreach(p => assert(!shownDDL.contains(p))) + } + private def checkCatalogTables(expected: CatalogTable, actual: CatalogTable): Unit = { def normalize(table: CatalogTable): CatalogTable = { val nondeterministicProps = Set( @@ -302,18 +318,11 @@ class ShowCreateTableSuite extends QueryTest with SQLTestUtils with TestHiveSing "last_modified_by", "last_modified_time", "Owner:", - "COLUMN_STATS_ACCURATE", // The following are hive specific schema parameters which we do not need to match exactly. - "numFiles", - "numRows", - "rawDataSize", - "totalSize", "totalNumberFiles", "maxFileSize", - "minFileSize", - // EXTERNAL is not non-deterministic, but it is filtered out for external tables. - "EXTERNAL" - ) + "minFileSize" + ) ++ excludedTableProperties table.copy( createTime = 0L, From ce8e8b89a5b61648daaa59578e2b6a99ec2f6d74 Mon Sep 17 00:00:00 2001 From: gatorsmile Date: Sun, 28 Aug 2016 22:24:05 -0700 Subject: [PATCH 2/2] address comments --- .../spark/sql/execution/command/tables.scala | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala index 5201b2c460ff3..8421357c66e38 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala @@ -791,13 +791,22 @@ case class ShowCreateTableCommand(table: TableIdentifier) extends RunnableComman } } + // These table properties should not be included in the output statement of SHOW CREATE TABLE + val excludedTableProperties = Set( + // The following are hive-generated statistics fields + "COLUMN_STATS_ACCURATE", + "numFiles", + "numPartitions", + "numRows", + "rawDataSize", + "totalSize" + ) + private def showHiveTableProperties(metadata: CatalogTable, builder: StringBuilder): Unit = { if (metadata.properties.nonEmpty) { val filteredProps = metadata.properties.filterNot { // Skips all the stats info (See the JIRA: HIVE-13792) - case (key, _) => - key == "numFiles" || key == "numRows" || key == "totalSize" || key == "numPartitions" || - key == "rawDataSize" || key == "COLUMN_STATS_ACCURATE" + case (key, _) => excludedTableProperties.contains(key) } val props = filteredProps.map { case (key, value) =>