diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java index 4f62c3b875ae2c..3a6344e55478e6 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java @@ -544,7 +544,9 @@ public void createTaskForEachColumns(AnalysisInfo jobInfo, Map> colToParts = new HashMap<>(); + colToParts.put(colName, entry.getValue()); + colTaskInfoBuilder.setColToPartitions(colToParts); } AnalysisInfo analysisInfo = colTaskInfoBuilder.setColName(colName).setIndexId(indexId) .setTaskId(taskId).setLastExecTimeInMs(System.currentTimeMillis()).build(); diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseAnalysisTask.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseAnalysisTask.java index cdd5c3fc69f139..27101a1d66fa1f 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseAnalysisTask.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseAnalysisTask.java @@ -58,8 +58,8 @@ public abstract class BaseAnalysisTask { + " COUNT(1) AS `row_count`, " + " NDV(`${colName}`) AS `ndv`, " + " COUNT(1) - COUNT(`${colName}`) AS `null_count`, " - + " CAST(MIN(`${colName}`) AS STRING) AS `min`, " - + " CAST(MAX(`${colName}`) AS STRING) AS `max`, " + + " SUBSTRING(CAST(MIN(`${colName}`) AS STRING), 1, 1024) AS `min`, " + + " SUBSTRING(CAST(MAX(`${colName}`) AS STRING), 1, 1024) AS `max`, " + " ${dataSizeFunction} AS `data_size`, " + " NOW() AS `update_time` " + " FROM `${catalogName}`.`${dbName}`.`${tblName}`"; @@ -75,8 +75,8 @@ public abstract class BaseAnalysisTask { + "${rowCount} AS `row_count`, " + "${ndvFunction} as `ndv`, " + "ROUND(SUM(CASE WHEN `${colName}` IS NULL THEN 1 ELSE 0 END) * ${scaleFactor}) AS `null_count`, " - + "${min} AS `min`, " - + "${max} AS `max`, " + + "SUBSTRING(CAST(${min} AS STRING), 1, 1024) AS `min`, " + + "SUBSTRING(CAST(${max} AS STRING), 1, 1024) AS `max`, " + "${dataSizeFunction} * ${scaleFactor} AS `data_size`, " + "NOW() " + "FROM `${catalogName}`.`${dbName}`.`${tblName}` ${sampleHints} ${limit}"; @@ -92,8 +92,8 @@ public abstract class BaseAnalysisTask { + "${rowCount} AS `row_count`, " + "${ndvFunction} as `ndv`, " + "IFNULL(SUM(IF(`t1`.`column_key` IS NULL, `t1`.`count`, 0)), 0) * ${scaleFactor} as `null_count`, " - + "${min} AS `min`, " - + "${max} AS `max`, " + + "SUBSTRING(CAST(${min} AS STRING), 1, 1024) AS `min`, " + + "SUBSTRING(CAST(${max} AS STRING), 1, 1024) AS `max`, " + "${dataSizeFunction} * ${scaleFactor} AS `data_size`, " + "NOW() " + "FROM ( " @@ -115,8 +115,8 @@ public abstract class BaseAnalysisTask { + "${row_count} AS `row_count`, " + "${ndv} AS `ndv`, " + "${null_count} AS `null_count`, " - + "${min} AS `min`, " - + "${max} AS `max`, " + + "SUBSTRING(CAST(${min} AS STRING), 1, 1024) AS `min`, " + + "SUBSTRING(CAST(${max} AS STRING), 1, 1024) AS `max`, " + "${data_size} AS `data_size`, " + "NOW() "; diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/JdbcAnalysisTask.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/JdbcAnalysisTask.java index 50c437fa8f9945..2bf72843a71f5e 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/JdbcAnalysisTask.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/JdbcAnalysisTask.java @@ -41,8 +41,8 @@ public class JdbcAnalysisTask extends BaseAnalysisTask { + "COUNT(1) AS row_count, " + "NDV(`${colName}`) AS ndv, " + "SUM(CASE WHEN `${colName}` IS NULL THEN 1 ELSE 0 END) AS null_count, " - + "MIN(`${colName}`) AS min, " - + "MAX(`${colName}`) AS max, " + + "SUBSTRING(CAST(MIN(`${colName}`) AS STRING), 1, 1024) AS min, " + + "SUBSTRING(CAST(MAX(`${colName}`) AS STRING), 1, 1024) AS max, " + "${dataSizeFunction} AS data_size, " + "NOW() " + "FROM `${catalogName}`.`${dbName}`.`${tblName}`"; diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java index bf144b1a1160f4..89ed24e7904a86 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java @@ -46,8 +46,8 @@ public class OlapAnalysisTask extends BaseAnalysisTask { private static final String BASIC_STATS_TEMPLATE = "SELECT " - + "MIN(`${colName}`) as min, " - + "MAX(`${colName}`) as max " + + "SUBSTRING(CAST(MIN(`${colName}`) AS STRING), 1, 1024) as min, " + + "SUBSTRING(CAST(MAX(`${colName}`) AS STRING), 1, 1024) as max " + "FROM `${dbName}`.`${tblName}`"; @VisibleForTesting diff --git a/fe/fe-core/src/test/java/org/apache/doris/statistics/OlapAnalysisTaskTest.java b/fe/fe-core/src/test/java/org/apache/doris/statistics/OlapAnalysisTaskTest.java index ed9122f70b6560..5b27c79c8633ed 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/statistics/OlapAnalysisTaskTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/statistics/OlapAnalysisTaskTest.java @@ -150,7 +150,20 @@ public ResultRow collectBasicStat(AutoCloseConnectContext context) { @Mock public void runQuery(String sql) { - Assertions.assertEquals("SELECT CONCAT('30001', '-', '-1', '-', 'null') AS `id`, 10001 AS `catalog_id`, 20001 AS `db_id`, 30001 AS `tbl_id`, -1 AS `idx_id`, 'null' AS `col_id`, NULL AS `part_id`, 500 AS `row_count`, SUM(`t1`.`count`) * COUNT(1) / (SUM(`t1`.`count`) - SUM(IF(`t1`.`count` = 1, 1, 0)) + SUM(IF(`t1`.`count` = 1, 1, 0)) * SUM(`t1`.`count`) / 500) as `ndv`, IFNULL(SUM(IF(`t1`.`column_key` IS NULL, `t1`.`count`, 0)), 0) * 5.0 as `null_count`, '1' AS `min`, '2' AS `max`, SUM(LENGTH(`column_key`) * count) * 5.0 AS `data_size`, NOW() FROM ( SELECT t0.`${colName}` as `column_key`, COUNT(1) as `count` FROM (SELECT `${colName}` FROM `catalogName`.`${dbName}`.`${tblName}` limit 100) as `t0` GROUP BY `t0`.`${colName}` ) as `t1` ", sql); + Assertions.assertEquals("SELECT CONCAT('30001', '-', '-1', '-', 'null') " + + "AS `id`, 10001 AS `catalog_id`, 20001 AS `db_id`, 30001 AS `tbl_id`, " + + "-1 AS `idx_id`, 'null' AS `col_id`, NULL AS `part_id`, 500 AS" + + " `row_count`, SUM(`t1`.`count`) * COUNT(1) / (SUM(`t1`.`count`)" + + " - SUM(IF(`t1`.`count` = 1, 1, 0)) + SUM(IF(`t1`.`count` = 1, 1, 0))" + + " * SUM(`t1`.`count`) / 500) as `ndv`, IFNULL(SUM(IF(`t1`.`column_key`" + + " IS NULL, `t1`.`count`, 0)), 0) * 5.0 as `null_count`, " + + "SUBSTRING(CAST('1' AS STRING), 1, 1024) AS `min`," + + " SUBSTRING(CAST('2' AS STRING), 1, 1024) AS `max`, " + + "SUM(LENGTH(`column_key`) * count) * 5.0 AS `data_size`, NOW() " + + "FROM ( SELECT t0.`${colName}` as `column_key`, COUNT(1) " + + "as `count` FROM (SELECT `${colName}` FROM " + + "`catalogName`.`${dbName}`.`${tblName}` " + + " limit 100) as `t0` GROUP BY `t0`.`${colName}` ) as `t1` ", sql); return; } }; @@ -216,7 +229,16 @@ public ResultRow collectBasicStat(AutoCloseConnectContext context) { @Mock public void runQuery(String sql) { - Assertions.assertEquals(" SELECT CONCAT(30001, '-', -1, '-', 'null') AS `id`, 10001 AS `catalog_id`, 20001 AS `db_id`, 30001 AS `tbl_id`, -1 AS `idx_id`, 'null' AS `col_id`, NULL AS `part_id`, 500 AS `row_count`, ROUND(NDV(`${colName}`) * 5.0) as `ndv`, ROUND(SUM(CASE WHEN `${colName}` IS NULL THEN 1 ELSE 0 END) * 5.0) AS `null_count`, '1' AS `min`, '2' AS `max`, SUM(LENGTH(`${colName}`)) * 5.0 AS `data_size`, NOW() FROM `catalogName`.`${dbName}`.`${tblName}` limit 100", sql); + Assertions.assertEquals(" " + + "SELECT CONCAT(30001, '-', -1, '-', 'null') AS `id`, " + + "10001 AS `catalog_id`, 20001 AS `db_id`, 30001 AS `tbl_id`, " + + "-1 AS `idx_id`, 'null' AS `col_id`, NULL AS `part_id`, " + + "500 AS `row_count`, ROUND(NDV(`${colName}`) * 5.0) as `ndv`, " + + "ROUND(SUM(CASE WHEN `${colName}` IS NULL THEN 1 ELSE 0 END) * 5.0) " + + "AS `null_count`, SUBSTRING(CAST('1' AS STRING), 1, 1024) AS `min`, " + + "SUBSTRING(CAST('2' AS STRING), 1, 1024) AS `max`, " + + "SUM(LENGTH(`${colName}`)) * 5.0 AS `data_size`, NOW() " + + "FROM `catalogName`.`${dbName}`.`${tblName}` limit 100", sql); return; } }; @@ -289,7 +311,18 @@ public ResultRow collectBasicStat(AutoCloseConnectContext context) { @Mock public void runQuery(String sql) { - Assertions.assertEquals("SELECT CONCAT('30001', '-', '-1', '-', 'null') AS `id`, 10001 AS `catalog_id`, 20001 AS `db_id`, 30001 AS `tbl_id`, -1 AS `idx_id`, 'null' AS `col_id`, NULL AS `part_id`, 500 AS `row_count`, SUM(`t1`.`count`) * COUNT(1) / (SUM(`t1`.`count`) - SUM(IF(`t1`.`count` = 1, 1, 0)) + SUM(IF(`t1`.`count` = 1, 1, 0)) * SUM(`t1`.`count`) / 500) as `ndv`, IFNULL(SUM(IF(`t1`.`column_key` IS NULL, `t1`.`count`, 0)), 0) * 5.0 as `null_count`, '1' AS `min`, '2' AS `max`, SUM(LENGTH(`column_key`) * count) * 5.0 AS `data_size`, NOW() FROM ( SELECT t0.`${colName}` as `column_key`, COUNT(1) as `count` FROM (SELECT `${colName}` FROM `catalogName`.`${dbName}`.`${tblName}` limit 100) as `t0` GROUP BY `t0`.`${colName}` ) as `t1` ", sql); + System.out.println(sql); + Assertions.assertEquals("SELECT CONCAT('30001', '-', '-1', '-', 'null') " + + "AS `id`, 10001 AS `catalog_id`, 20001 AS `db_id`, 30001 AS `tbl_id`, " + + "-1 AS `idx_id`, 'null' AS `col_id`, NULL AS `part_id`," + + " 500 AS `row_count`, SUM(`t1`.`count`) * COUNT(1) / (SUM(`t1`.`count`) " + + "- SUM(IF(`t1`.`count` = 1, 1, 0)) + SUM(IF(`t1`.`count` = 1, 1, 0)) * " + + "SUM(`t1`.`count`) / 500) as `ndv`, IFNULL(SUM(IF(`t1`.`column_key` " + + "IS NULL, `t1`.`count`, 0)), 0) * 5.0 as `null_count`, " + + "SUBSTRING(CAST('1' AS STRING), 1, 1024) AS `min`, " + + "SUBSTRING(CAST('2' AS STRING), 1, 1024) AS `max`, " + + "SUM(LENGTH(`column_key`) * count) * 5.0 AS `data_size`, NOW() " + + "FROM ( SELECT t0.`${colName}` as `column_key`, COUNT(1) as `count` FROM (SELECT `${colName}` FROM `catalogName`.`${dbName}`.`${tblName}` limit 100) as `t0` GROUP BY `t0`.`${colName}` ) as `t1` ", sql); return; } }; diff --git a/regression-test/suites/statistics/analyze_stats.groovy b/regression-test/suites/statistics/analyze_stats.groovy index 333772972ece9a..9a06ed772b5b36 100644 --- a/regression-test/suites/statistics/analyze_stats.groovy +++ b/regression-test/suites/statistics/analyze_stats.groovy @@ -95,7 +95,7 @@ suite("test_analyze") { sql """ SET enable_nereids_planner=true; - + """ sql """ @@ -124,7 +124,7 @@ suite("test_analyze") { Thread.sleep(1000 * 60) sql """ - SELECT * FROM ${tbl}; + SELECT * FROM ${tbl}; """ sql """ @@ -135,7 +135,7 @@ suite("test_analyze") { try { sql """ - SELECT * FROM ${tbl}; + SELECT * FROM ${tbl}; """ } catch (Exception e) { exception = e @@ -150,7 +150,7 @@ suite("test_analyze") { """ sql """ - SELECT * FROM ${tbl}; + SELECT * FROM ${tbl}; """ sql """ @@ -159,7 +159,7 @@ suite("test_analyze") { try { sql """ - SELECT * FROM ${tbl}; + SELECT * FROM ${tbl}; """ } catch (Exception e) { exception = e @@ -268,7 +268,7 @@ suite("test_analyze") { """ sql """ - + CREATE TAbLE IF NOT EXISTS test_600_partition_table_analyze ( `id` INT NOT NULL ) ENGINE=OLAP @@ -882,7 +882,7 @@ PARTITION `p599` VALUES IN (599) ( "replication_num" = "1" ); - + """ sql """ @@ -932,7 +932,7 @@ PARTITION `p599` VALUES IN (599) ( PARTITION `p1` VALUES LESS THAN ('5') ) - + DISTRIBUTED BY HASH(`id`) BUCKETS 3 PROPERTIES ( "replication_num"="1" @@ -1107,7 +1107,7 @@ PARTITION `p599` VALUES IN (599) DISTRIBUTED BY HASH(`col1`) BUCKETS 3 PROPERTIES ( "replication_allocation" = "tag.location.default: 1" - ); + ); """ sql """insert into test_meta_management values(1, 2, 3);""" @@ -1197,7 +1197,7 @@ PARTITION `p599` VALUES IN (599) DISTRIBUTED BY HASH(`col1`) BUCKETS 3 PROPERTIES ( "replication_allocation" = "tag.location.default: 1" - ); + ); """ sql """INSERT INTO test_max_min_lit VALUES("\\'")""" @@ -1244,6 +1244,7 @@ PARTITION `p599` VALUES IN (599) assert all_finished(show_result) + // Test truncate table will drop table stats too. sql """ANALYZE TABLE ${tbl} WITH SYNC""" def result_before_truncate = sql """show column stats ${tbl}""" @@ -1254,4 +1255,26 @@ PARTITION `p599` VALUES IN (599) result_after_truncate = sql """show column cached stats ${tbl}""" assertEquals(0, result_after_truncate.size()) + + + sql """ + delete from ${tbl} where analyzetestlimitedk3 >= -2147483648 + """ + sql """ + INSERT INTO `${tbl}` VALUES (-2103297891,1,101,15248,4761818404925265645,939926.283, + 'UTmCFKMbprf0zSVOIlBJRNOl3JcNBdOsnCDt','2022-09-28','2022-10-28 01:56:56','tVvGDSrN6kyn', + -954349107.187117,-40.46286,'1111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111g1ZP9nqVgaGKya3kPERdBofTWJQ4TIJEz972Xvw4hfPpTpWwlmondiLVTCyld7rSBlSWrE7NJRB0pvPGEFQKOx1s3', + '-1559301292834325905', NULL, NULL, NULL, NULL) + """ + + sql """ + ANALYZE TABLE ${tbl} WITH SYNC + """ + + def truncate_test_result = sql """ + SHOW COLUMN CACHED STATS ${tbl}(analyzetestlimitedk12) + """ + assert "1111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111" == truncate_test_result[0][6].substring(1, 1025) + assert "1111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111" == truncate_test_result[0][7].substring(1, 1025) + }