diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala index b4a15b8b2882e..8421357c66e38 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala @@ -791,11 +791,22 @@ case class ShowCreateTableCommand(table: TableIdentifier) extends RunnableComman } } + // These table properties should not be included in the output statement of SHOW CREATE TABLE + val excludedTableProperties = Set( + // The following are hive-generated statistics fields + "COLUMN_STATS_ACCURATE", + "numFiles", + "numPartitions", + "numRows", + "rawDataSize", + "totalSize" + ) + private def showHiveTableProperties(metadata: CatalogTable, builder: StringBuilder): Unit = { if (metadata.properties.nonEmpty) { val filteredProps = metadata.properties.filterNot { - // Skips "EXTERNAL" property for external tables - case (key, _) => key == "EXTERNAL" && metadata.tableType == EXTERNAL + // Skips all the stats info (See the JIRA: HIVE-13792) + case (key, _) => excludedTableProperties.contains(key) } val props = filteredProps.map { case (key, value) => diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala index 81d5a124e9d4a..bc40ec24bfae1 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala @@ -400,7 +400,9 @@ private[hive] class HiveClientImpl( properties = Option(h.getTTable.getSd.getSerdeInfo.getParameters) .map(_.asScala.toMap).orNull ), - properties = properties.filter(kv => kv._1 != "comment"), + // For EXTERNAL_TABLE, the table properties has a particular field "EXTERNAL". This is added + // in the function toHiveTable. + properties = properties.filter(kv => kv._1 != "comment" && kv._1 != "EXTERNAL"), comment = properties.get("comment"), viewOriginalText = Option(h.getViewOriginalText), viewText = Option(h.getViewExpandedText), diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/ShowCreateTableSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/ShowCreateTableSuite.scala index e925921165d6a..762435b1b2066 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/ShowCreateTableSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/ShowCreateTableSuite.scala @@ -277,12 +277,24 @@ class ShowCreateTableSuite extends QueryTest with SQLTestUtils with TestHiveSing checkCreateTableOrView(TableIdentifier(table, Some("default")), "VIEW") } + // These table properties should not be included in the output statement of SHOW CREATE TABLE + val excludedTableProperties = Set( + // The following are hive-generated statistics fields + "COLUMN_STATS_ACCURATE", + "numFiles", + "numPartitions", + "numRows", + "rawDataSize", + "totalSize" + ) + private def checkCreateTableOrView(table: TableIdentifier, checkType: String): Unit = { val db = table.database.getOrElse("default") val expected = spark.sharedState.externalCatalog.getTable(db, table.table) val shownDDL = sql(s"SHOW CREATE TABLE ${table.quotedString}").head().getString(0) sql(s"DROP $checkType ${table.quotedString}") + checkExcludedTableProperties(shownDDL) try { sql(shownDDL) val actual = spark.sharedState.externalCatalog.getTable(db, table.table) @@ -292,6 +304,10 @@ class ShowCreateTableSuite extends QueryTest with SQLTestUtils with TestHiveSing } } + private def checkExcludedTableProperties(shownDDL: String): Unit = { + excludedTableProperties.foreach(p => assert(!shownDDL.contains(p))) + } + private def checkCatalogTables(expected: CatalogTable, actual: CatalogTable): Unit = { def normalize(table: CatalogTable): CatalogTable = { val nondeterministicProps = Set( @@ -302,18 +318,11 @@ class ShowCreateTableSuite extends QueryTest with SQLTestUtils with TestHiveSing "last_modified_by", "last_modified_time", "Owner:", - "COLUMN_STATS_ACCURATE", // The following are hive specific schema parameters which we do not need to match exactly. - "numFiles", - "numRows", - "rawDataSize", - "totalSize", "totalNumberFiles", "maxFileSize", - "minFileSize", - // EXTERNAL is not non-deterministic, but it is filtered out for external tables. - "EXTERNAL" - ) + "minFileSize" + ) ++ excludedTableProperties table.copy( createTime = 0L,