Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -2282,7 +2282,7 @@ public class Config extends ConfigBase {
@ConfField(mutable = true, masterOnly = false, description = {
"Hive行数估算分区采样数",
"Sample size for hive row count estimation."})
public static int hive_stats_partition_sample_size = 3000;
public static int hive_stats_partition_sample_size = 30;

@ConfField(mutable = true, masterOnly = true, description = {
"启用Hive分桶表",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -875,7 +875,7 @@ private long getRowCountFromFileList() {
}

int totalPartitionSize = partitionValues == null ? 1 : partitionValues.getIdToPartitionItem().size();
if (samplePartitionSize < totalPartitionSize) {
if (samplePartitionSize != 0 && samplePartitionSize < totalPartitionSize) {
totalSize = totalSize * totalPartitionSize / samplePartitionSize;
}
return totalSize / estimatedRowSize;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ public final class GlobalVariable {
+ "Getting file list may be a time-consuming operation. "
+ "If you don't need to estimate the number of rows in the table "
+ "or it affects performance, you can disable this feature."})
public static boolean enable_get_row_count_from_file_list = false;
public static boolean enable_get_row_count_from_file_list = true;

@VariableMgr.VarAttr(name = READ_ONLY, flag = VariableMgr.GLOBAL,
description = {"仅用于兼容MySQL生态,暂无实际意义",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,13 @@
suite("test_hive_partition_column_analyze", "p0,external,hive,external_docker,external_docker_hive") {
String enabled = context.config.otherConfigs.get("enableHiveTest")
if (enabled == null || !enabled.equalsIgnoreCase("true")) {
logger.info("diable Hive test.")
logger.info("disable Hive test.")
return;
}

def enable = sql """show variables like "enable_partition_analyze" """
if (enable.size() != 1 || enable[0][1].equalsIgnoreCase("false")) {
logger.info("partition analyze disabled. " + enable)
return;
}

Expand All @@ -36,161 +42,168 @@ suite("test_hive_partition_column_analyze", "p0,external,hive,external_docker,ex
"""
logger.info("catalog " + catalog_name + " created")

try {
sql """set global enable_get_row_count_from_file_list=true"""

sql """switch ${catalog_name};"""
logger.info("switched to catalog " + catalog_name)

sql """analyze table ${catalog_name}.partition_type.tinyint_partition (tinyint_part) with sync"""
sql """analyze table ${catalog_name}.partition_type.smallint_partition (smallint_part) with sync"""
sql """analyze table ${catalog_name}.partition_type.int_partition (int_part) with sync"""
sql """analyze table ${catalog_name}.partition_type.bigint_partition (bigint_part) with sync"""
sql """analyze table ${catalog_name}.partition_type.char_partition (char_part) with sync"""
sql """analyze table ${catalog_name}.partition_type.varchar_partition (varchar_part) with sync"""
sql """analyze table ${catalog_name}.partition_type.string_partition (string_part) with sync"""
sql """analyze table ${catalog_name}.partition_type.date_partition (date_part) with sync"""
sql """analyze table ${catalog_name}.partition_type.float_partition (float_part) with sync"""
sql """analyze table ${catalog_name}.partition_type.double_partition (double_part) with sync"""
sql """analyze table ${catalog_name}.partition_type.decimal_partition (decimal_part) with sync"""

sql """use partition_type;"""

result = sql """show column stats tinyint_partition (tinyint_part)"""
assertEquals(result.size(), 1)
assertEquals(result[0][0], "tinyint_part")
assertEquals(result[0][2], "141474.0")
assertEquals(result[0][3], "100.0")
assertEquals(result[0][4], "0.0")
assertEquals(result[0][5], "141474.0")
assertEquals(result[0][6], "1.0")
assertEquals(result[0][7], "1")
assertEquals(result[0][8], "100")

result = sql """show column stats smallint_partition (smallint_part)"""
assertEquals(result.size(), 1)
assertEquals(result[0][0], "smallint_part")
assertEquals(result[0][2], "141474.0")
assertEquals(result[0][3], "100.0")
assertEquals(result[0][4], "0.0")
assertEquals(result[0][5], "282948.0")
assertEquals(result[0][6], "2.0")
assertEquals(result[0][7], "1")
assertEquals(result[0][8], "100")

result = sql """show column stats int_partition (int_part)"""
assertEquals(result.size(), 1)
assertEquals(result[0][0], "int_part")
assertEquals(result[0][2], "141474.0")
assertEquals(result[0][3], "100.0")
assertEquals(result[0][4], "0.0")
assertEquals(result[0][5], "565896.0")
assertEquals(result[0][6], "4.0")
assertEquals(result[0][7], "1")
assertEquals(result[0][8], "100")

result = sql """show column stats bigint_partition (bigint_part)"""
assertEquals(result.size(), 1)
assertEquals(result[0][0], "bigint_part")
assertEquals(result[0][2], "141474.0")
assertEquals(result[0][3], "100.0")
assertEquals(result[0][4], "0.0")
assertEquals(result[0][5], "1131792.0")
assertEquals(result[0][6], "8.0")
assertEquals(result[0][7], "1")
assertEquals(result[0][8], "100")

result = sql """show column stats char_partition (char_part)"""
assertEquals(result.size(), 1)
assertEquals(result[0][0], "char_part")
assertEquals(result[0][2], "141474.0")
assertEquals(result[0][3], "101.0")
assertEquals(result[0][4], "0.0")
assertEquals(result[0][5], "2829480.0")
assertEquals(result[0][6], "20.0")
assertEquals(result[0][7], "\'1 \'")
assertEquals(result[0][8], "\'a \'")

result = sql """show column stats varchar_partition (varchar_part)"""
assertEquals(result.size(), 1)
assertEquals(result[0][0], "varchar_part")
assertEquals(result[0][2], "141474.0")
assertEquals(result[0][3], "100.0")
assertEquals(result[0][4], "0.0")
assertEquals(result[0][5], "271630.0")
assertEquals(result[0][6], "1.9199994345250717")
assertEquals(result[0][7], "\'1\'")
assertEquals(result[0][8], "\'99\'")

result = sql """show column stats string_partition (string_part)"""
assertEquals(result.size(), 1)
assertEquals(result[0][0], "string_part")
assertEquals(result[0][2], "141474.0")
assertEquals(result[0][3], "100.0")
assertEquals(result[0][4], "0.0")
assertEquals(result[0][5], "271630.0")
assertEquals(result[0][6], "1.9199994345250717")
assertEquals(result[0][7], "\'1\'")
assertEquals(result[0][8], "\'99\'")

result = sql """show column stats date_partition (date_part)"""
assertEquals(result.size(), 1)
assertEquals(result[0][0], "date_part")
assertEquals(result[0][2], "141474.0")
assertEquals(result[0][3], "100.0")
assertEquals(result[0][4], "0.0")
assertEquals(result[0][5], "565896.0")
assertEquals(result[0][6], "4.0")
assertEquals(result[0][7], "\'2001-10-12\'")
assertEquals(result[0][8], "\'2100-10-12\'")

result = sql """show column stats float_partition (float_part)"""
assertEquals(result.size(), 1)
assertEquals(result[0][0], "float_part")
assertEquals(result[0][2], "141474.0")
assertEquals(result[0][3], "100.0")
assertEquals(result[0][4], "0.0")
assertEquals(result[0][5], "565896.0")
assertEquals(result[0][6], "4.0")
assertEquals(result[0][7], "296.3103")
assertEquals(result[0][8], "32585.627")

result = sql """show column stats double_partition (double_part)"""
assertEquals(result.size(), 1)
assertEquals(result[0][0], "double_part")
assertEquals(result[0][2], "141474.0")
assertEquals(result[0][3], "100.0")
assertEquals(result[0][4], "0.0")
assertEquals(result[0][5], "1131792.0")
assertEquals(result[0][6], "8.0")
assertEquals(result[0][7], "115.14474")
assertEquals(result[0][8], "32761.14458")

result = sql """show column stats decimal_partition (decimal_part)"""
assertEquals(result.size(), 1)
assertEquals(result[0][0], "decimal_part")
assertEquals(result[0][2], "141474.0")
assertEquals(result[0][3], "100.0")
assertEquals(result[0][4], "0.0")
assertEquals(result[0][5], "1131792.0")
assertEquals(result[0][6], "8.0")
assertEquals(result[0][7], "243.2868")
assertEquals(result[0][8], "32527.1543")

sql """analyze table ${catalog_name}.partition_type.decimal_partition (decimal_part) with sync with sql"""
result = sql """show column stats decimal_partition (decimal_part)"""
assertEquals(result.size(), 1)
assertEquals(result[0][0], "decimal_part")
assertEquals(result[0][2], "100000.0")
assertEquals(result[0][3], "100.0")
assertEquals(result[0][4], "0.0")
assertEquals(result[0][5], "800000.0")
assertEquals(result[0][6], "8.0")
assertEquals(result[0][7], "243.2868")
assertEquals(result[0][8], "32527.1543")
} finally {
sql """set global enable_get_row_count_from_file_list=false"""
}
sql """set global enable_get_row_count_from_file_list=true"""
sql """switch ${catalog_name};"""
logger.info("switched to catalog " + catalog_name)

sql """analyze table ${catalog_name}.partition_type.tinyint_partition (tinyint_part) with sync"""
sql """analyze table ${catalog_name}.partition_type.smallint_partition (smallint_part) with sync"""
sql """analyze table ${catalog_name}.partition_type.int_partition (int_part) with sync"""
sql """analyze table ${catalog_name}.partition_type.bigint_partition (bigint_part) with sync"""
sql """analyze table ${catalog_name}.partition_type.char_partition (char_part) with sync"""
sql """analyze table ${catalog_name}.partition_type.varchar_partition (varchar_part) with sync"""
sql """analyze table ${catalog_name}.partition_type.string_partition (string_part) with sync"""
sql """analyze table ${catalog_name}.partition_type.date_partition (date_part) with sync"""
sql """analyze table ${catalog_name}.partition_type.float_partition (float_part) with sync"""
sql """analyze table ${catalog_name}.partition_type.double_partition (double_part) with sync"""
sql """analyze table ${catalog_name}.partition_type.decimal_partition (decimal_part) with sync"""

sql """use partition_type;"""

result = sql """show column stats tinyint_partition (tinyint_part)"""
assertEquals(result.size(), 1)
assertEquals(result[0][0], "tinyint_part")
assertEquals(result[0][1], "tinyint_partition")
assertEquals(result[0][2], "100000.0")
assertEquals(result[0][3], "100.0")
assertEquals(result[0][4], "0.0")
assertEquals(result[0][5], "100000.0")
assertEquals(result[0][6], "1.0")
assertEquals(result[0][7], "1")
assertEquals(result[0][8], "100")

result = sql """show column stats smallint_partition (smallint_part)"""
assertEquals(result.size(), 1)
assertEquals(result[0][0], "smallint_part")
assertEquals(result[0][1], "smallint_partition")
assertEquals(result[0][2], "100000.0")
assertEquals(result[0][3], "100.0")
assertEquals(result[0][4], "0.0")
assertEquals(result[0][5], "200000.0")
assertEquals(result[0][6], "2.0")
assertEquals(result[0][7], "1")
assertEquals(result[0][8], "100")

result = sql """show column stats int_partition (int_part)"""
assertEquals(result.size(), 1)
assertEquals(result[0][0], "int_part")
assertEquals(result[0][1], "int_partition")
assertEquals(result[0][2], "100000.0")
assertEquals(result[0][3], "100.0")
assertEquals(result[0][4], "0.0")
assertEquals(result[0][5], "400000.0")
assertEquals(result[0][6], "4.0")
assertEquals(result[0][7], "1")
assertEquals(result[0][8], "100")

result = sql """show column stats bigint_partition (bigint_part)"""
assertEquals(result.size(), 1)
assertEquals(result[0][0], "bigint_part")
assertEquals(result[0][1], "bigint_partition")
assertEquals(result[0][2], "100000.0")
assertEquals(result[0][3], "100.0")
assertEquals(result[0][4], "0.0")
assertEquals(result[0][5], "800000.0")
assertEquals(result[0][6], "8.0")
assertEquals(result[0][7], "1")
assertEquals(result[0][8], "100")

result = sql """show column stats char_partition (char_part)"""
assertEquals(result.size(), 1)
assertEquals(result[0][0], "char_part")
assertEquals(result[0][1], "char_partition")
assertEquals(result[0][2], "100000.0")
assertEquals(result[0][3], "100.0")
assertEquals(result[0][4], "0.0")
assertEquals(result[0][5], "2000000.0")
assertEquals(result[0][6], "20.0")
assertEquals(result[0][7], "\'1 \'")
assertEquals(result[0][8], "\'99 \'")

result = sql """show column stats varchar_partition (varchar_part)"""
assertEquals(result.size(), 1)
assertEquals(result[0][0], "varchar_part")
assertEquals(result[0][1], "varchar_partition")
assertEquals(result[0][2], "100000.0")
assertEquals(result[0][3], "100.0")
assertEquals(result[0][4], "0.0")
assertEquals(result[0][5], "192000.0")
assertEquals(result[0][6], "1.92")
assertEquals(result[0][7], "\'1\'")
assertEquals(result[0][8], "\'99\'")

result = sql """show column stats string_partition (string_part)"""
assertEquals(result.size(), 1)
assertEquals(result[0][0], "string_part")
assertEquals(result[0][1], "string_partition")
assertEquals(result[0][2], "100000.0")
assertEquals(result[0][3], "100.0")
assertEquals(result[0][4], "0.0")
assertEquals(result[0][5], "192000.0")
assertEquals(result[0][6], "1.92")
assertEquals(result[0][7], "\'1\'")
assertEquals(result[0][8], "\'99\'")

result = sql """show column stats date_partition (date_part)"""
assertEquals(result.size(), 1)
assertEquals(result[0][0], "date_part")
assertEquals(result[0][1], "date_partition")
assertEquals(result[0][2], "100000.0")
assertEquals(result[0][3], "100.0")
assertEquals(result[0][4], "0.0")
assertEquals(result[0][5], "400000.0")
assertEquals(result[0][6], "4.0")
assertEquals(result[0][7], "\'2001-10-12\'")
assertEquals(result[0][8], "\'2100-10-12\'")

result = sql """show column stats float_partition (float_part)"""
assertEquals(result.size(), 1)
assertEquals(result[0][0], "float_part")
assertEquals(result[0][1], "float_partition")
assertEquals(result[0][2], "100000.0")
assertEquals(result[0][3], "100.0")
assertEquals(result[0][4], "0.0")
assertEquals(result[0][5], "400000.0")
assertEquals(result[0][6], "4.0")
assertEquals(result[0][7], "296.3103")
assertEquals(result[0][8], "32585.627")

result = sql """show column stats double_partition (double_part)"""
assertEquals(result.size(), 1)
assertEquals(result[0][0], "double_part")
assertEquals(result[0][1], "double_partition")
assertEquals(result[0][2], "100000.0")
assertEquals(result[0][3], "100.0")
assertEquals(result[0][4], "0.0")
assertEquals(result[0][5], "800000.0")
assertEquals(result[0][6], "8.0")
assertEquals(result[0][7], "115.14474")
assertEquals(result[0][8], "32761.14458")

result = sql """show column stats decimal_partition (decimal_part)"""
assertEquals(result.size(), 1)
assertEquals(result[0][0], "decimal_part")
assertEquals(result[0][1], "decimal_partition")
assertEquals(result[0][2], "100000.0")
assertEquals(result[0][3], "100.0")
assertEquals(result[0][4], "0.0")
assertEquals(result[0][5], "800000.0")
assertEquals(result[0][6], "8.0")
assertEquals(result[0][7], "243.2868")
assertEquals(result[0][8], "32527.1543")

sql """analyze table ${catalog_name}.partition_type.decimal_partition (decimal_part) with sync with sql"""
result = sql """show column stats decimal_partition (decimal_part)"""
assertEquals(result.size(), 1)
assertEquals(result[0][0], "decimal_part")
assertEquals(result[0][1], "decimal_partition")
assertEquals(result[0][2], "100000.0")
assertEquals(result[0][3], "100.0")
assertEquals(result[0][4], "0.0")
assertEquals(result[0][5], "800000.0")
assertEquals(result[0][6], "8.0")
assertEquals(result[0][7], "243.2868")
assertEquals(result[0][8], "32527.1543")
sql """drop catalog ${catalog_name}"""
}
}
Expand Down