From fb97896a92c0d96ec84c7476123af4e497c9ce76 Mon Sep 17 00:00:00 2001 From: Cheng Pan Date: Fri, 29 Aug 2025 02:47:52 +0800 Subject: [PATCH 1/2] [SPARK-53422][SQL][TEST] Make SPARK-30269 test robust --- .../test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala index 2af4d01fcfb80..9323dd4c2679e 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala @@ -1616,11 +1616,10 @@ class StatisticsSuite extends StatisticsCollectionTestBase with TestHiveSingleto Seq(tbl, ext_tbl).foreach { tblName => sql(s"INSERT INTO $tblName VALUES (1, 'a', '2019-12-13')") - val expectedSize = 690 // analyze table sql(s"ANALYZE TABLE $tblName COMPUTE STATISTICS NOSCAN") var tableStats = getTableStats(tblName) - assert(tableStats.sizeInBytes == expectedSize) + val expectedSize = tableStats.sizeInBytes assert(tableStats.rowCount.isEmpty) sql(s"ANALYZE TABLE $tblName COMPUTE STATISTICS") From beee306c891bf87768e31bb2ba6bd530052355a1 Mon Sep 17 00:00:00 2001 From: Cheng Pan Date: Fri, 29 Aug 2025 11:12:30 +0800 Subject: [PATCH 2/2] assert stats is none before executing ANALYZE TABLE --- .../spark/sql/hive/StatisticsSuite.scala | 69 ++++++++++--------- 1 file changed, 36 insertions(+), 33 deletions(-) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala index 9323dd4c2679e..80a213c9466b1 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala @@ -1602,41 +1602,44 @@ class StatisticsSuite extends StatisticsCollectionTestBase with TestHiveSingleto val tbl = "SPARK_30269" val ext_tbl = "SPARK_30269_external" withTempDir { dir => - withTable(tbl, ext_tbl) { - sql(s"CREATE TABLE $tbl (key INT, value STRING, ds STRING)" + - "USING parquet PARTITIONED BY (ds)") - sql( - s""" - | CREATE TABLE $ext_tbl (key INT, value STRING, ds STRING) - | USING PARQUET - | PARTITIONED BY (ds) - | LOCATION '${dir.toURI}' + withSQLConf(SQLConf.AUTO_SIZE_UPDATE_ENABLED.key -> "false") { + withTable(tbl, ext_tbl) { + sql(s"CREATE TABLE $tbl (key INT, value STRING, ds STRING)" + + "USING parquet PARTITIONED BY (ds)") + sql( + s""" + | CREATE TABLE $ext_tbl (key INT, value STRING, ds STRING) + | USING PARQUET + | PARTITIONED BY (ds) + | LOCATION '${dir.toURI}' """.stripMargin) - Seq(tbl, ext_tbl).foreach { tblName => - sql(s"INSERT INTO $tblName VALUES (1, 'a', '2019-12-13')") - - // analyze table - sql(s"ANALYZE TABLE $tblName COMPUTE STATISTICS NOSCAN") - var tableStats = getTableStats(tblName) - val expectedSize = tableStats.sizeInBytes - assert(tableStats.rowCount.isEmpty) - - sql(s"ANALYZE TABLE $tblName COMPUTE STATISTICS") - tableStats = getTableStats(tblName) - assert(tableStats.sizeInBytes == expectedSize) - assert(tableStats.rowCount.get == 1) - - // analyze a single partition - sql(s"ANALYZE TABLE $tblName PARTITION (ds='2019-12-13') COMPUTE STATISTICS NOSCAN") - var partStats = getPartitionStats(tblName, Map("ds" -> "2019-12-13")) - assert(partStats.sizeInBytes == expectedSize) - assert(partStats.rowCount.isEmpty) - - sql(s"ANALYZE TABLE $tblName PARTITION (ds='2019-12-13') COMPUTE STATISTICS") - partStats = getPartitionStats(tblName, Map("ds" -> "2019-12-13")) - assert(partStats.sizeInBytes == expectedSize) - assert(partStats.rowCount.get == 1) + Seq(tbl, ext_tbl).foreach { tblName => + sql(s"INSERT INTO $tblName VALUES (1, 'a', '2019-12-13')") + assert(getCatalogTable(tblName).stats.isEmpty) + + // analyze table + sql(s"ANALYZE TABLE $tblName COMPUTE STATISTICS NOSCAN") + var tableStats = getTableStats(tblName) + val expectedSize = tableStats.sizeInBytes + assert(tableStats.rowCount.isEmpty) + + sql(s"ANALYZE TABLE $tblName COMPUTE STATISTICS") + tableStats = getTableStats(tblName) + assert(tableStats.sizeInBytes == expectedSize) + assert(tableStats.rowCount.get == 1) + + // analyze a single partition + sql(s"ANALYZE TABLE $tblName PARTITION (ds='2019-12-13') COMPUTE STATISTICS NOSCAN") + var partStats = getPartitionStats(tblName, Map("ds" -> "2019-12-13")) + assert(partStats.sizeInBytes == expectedSize) + assert(partStats.rowCount.isEmpty) + + sql(s"ANALYZE TABLE $tblName PARTITION (ds='2019-12-13') COMPUTE STATISTICS") + partStats = getPartitionStats(tblName, Map("ds" -> "2019-12-13")) + assert(partStats.sizeInBytes == expectedSize) + assert(partStats.rowCount.get == 1) + } } } }