Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ public class ShowTableStatsStmt extends ShowStmt {
new ImmutableList.Builder<String>()
.add("updated_rows")
.add("query_times")
.add("row_count(for external_table only)")
.add("row_count")
.add("method")
.add("type")
.add("updated_time")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -480,6 +480,8 @@ private Map<String, Set<String>> validateAndGetPartitions(TableIf table, Set<Str
return columnToPartitions;
}

// Make sure colName of job has all the column as this AnalyzeStmt specified, no matter whether it will be analyzed
// or not.
@VisibleForTesting
public AnalysisInfo buildAnalysisJobInfo(AnalyzeTblStmt stmt) throws DdlException {
AnalysisInfoBuilder infoBuilder = new AnalysisInfoBuilder();
Expand Down Expand Up @@ -733,7 +735,6 @@ public void dropStats(DropStatsStmt dropStatsStmt) throws DdlException {
}
logCreateTableStats(tableStats);
StatisticsRepository.dropStatistics(tblId, cols);

}

public void handleKillAnalyzeStmt(KillAnalysisJobStmt killAnalysisJobStmt) throws DdlException {
Expand Down Expand Up @@ -1044,4 +1045,13 @@ protected SimpleQueue<AnalysisInfo> createSimpleQueue(Collection<AnalysisInfo> c
}, null);
}

// Remove col stats status from TableStats if failed load some col stats after analyze corresponding column so that
// we could make sure it would be analyzed again soon if user or system submit job for that column again.
public void removeColStatsStatus(long tblId, String colName) {
TableStats tableStats = findTableStatsStatus(tblId);
if (tableStats != null) {
tableStats.removeColumn(colName);
}
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -185,7 +185,11 @@ protected void afterExecution() {
if (killed) {
return;
}
Env.getCurrentEnv().getStatisticsCache().syncLoadColStats(tbl.getId(), -1, col.getName());
long tblId = tbl.getId();
String colName = col.getName();
if (!Env.getCurrentEnv().getStatisticsCache().syncLoadColStats(tblId, -1, colName)) {
Env.getCurrentEnv().getAnalysisManager().removeColStatsStatus(tblId, colName);
}
}

protected void setTaskStateToRunning() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@
import org.apache.doris.common.AnalysisException;
import org.apache.doris.statistics.util.StatisticsUtil;

import com.google.common.base.Preconditions;
import com.google.common.collect.Sets;
import com.google.gson.annotations.SerializedName;
import org.apache.logging.log4j.LogManager;
Expand Down Expand Up @@ -136,7 +135,9 @@ public static ColumnStatistic fromResultRow(List<ResultRow> resultRows) {
LOG.debug("Failed to deserialize column stats", t);
return ColumnStatistic.UNKNOWN;
}
Preconditions.checkState(columnStatistic != null, "Column stats is null");
if (columnStatistic == null) {
return ColumnStatistic.UNKNOWN;
}
columnStatistic.partitionIdToColStats.putAll(partitionIdToColStats);
return columnStatistic;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,6 @@ public class StatisticConstants {

public static final int STATISTICS_CACHE_REFRESH_INTERVAL = 24 * 2;

public static final int ROW_COUNT_CACHE_VALID_DURATION_IN_HOURS = 12;

/**
* Bucket count fot column_statistics and analysis_job table.
*/
Expand All @@ -59,12 +57,6 @@ public class StatisticConstants {

public static final int HISTOGRAM_MAX_BUCKET_NUM = 128;

/**
* The health of the table indicates the health of the table statistics, rang in [0, 100].
* Below this threshold will automatically re-collect statistics. TODO make it in fe.conf
*/
public static final int TABLE_STATS_HEALTH_THRESHOLD = 80;

public static final int ANALYZE_MANAGER_INTERVAL_IN_SECS = 60;

public static List<String> STATISTICS_DB_BLACK_LIST = new ArrayList<>();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -212,17 +212,20 @@ private void doPreHeat() {
}
}

public void syncLoadColStats(long tableId, long idxId, String colName) {
/**
* Return false if the log of corresponding stats load is failed.
*/
public boolean syncLoadColStats(long tableId, long idxId, String colName) {
List<ResultRow> columnResults = StatisticsRepository.loadColStats(tableId, idxId, colName);
final StatisticsCacheKey k =
new StatisticsCacheKey(tableId, idxId, colName);
final ColumnStatistic c = ColumnStatistic.fromResultRow(columnResults);
if (c == ColumnStatistic.UNKNOWN) {
return;
return false;
}
putCache(k, c);
if (ColumnStatistic.UNKNOWN == c) {
return;
return false;
}
TUpdateFollowerStatsCacheRequest updateFollowerStatsCacheRequest = new TUpdateFollowerStatsCacheRequest();
updateFollowerStatsCacheRequest.key = GsonUtils.GSON.toJson(k);
Expand All @@ -234,6 +237,7 @@ public void syncLoadColStats(long tableId, long idxId, String colName) {
}
sendStats(frontend, updateFollowerStatsCacheRequest);
}
return true;
}

@VisibleForTesting
Expand Down
79 changes: 78 additions & 1 deletion regression-test/suites/statistics/analyze_stats.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ suite("test_analyze") {

try {
sql """
SELECT COUNT(*) FROM ${tbl};
SELECT * FROM ${tbl};
"""
} catch (Exception e) {
exception = e
Expand Down Expand Up @@ -959,4 +959,81 @@ PARTITION `p599` VALUES IN (599)
"""
expected_col_stats(col_id_res, 3, 1)

sql """DROP TABLE IF EXISTS `some_complex_type_test`"""

sql """
CREATE TABLE `some_complex_type_test` (
`id` int(11) NULL COMMENT "",
`c_array` ARRAY<int(11)> NULL COMMENT ""
) ENGINE=OLAP
DUPLICATE KEY(`id`)
COMMENT "OLAP"
DISTRIBUTED BY HASH(`id`) BUCKETS 1
PROPERTIES (
"replication_allocation" = "tag.location.default: 1",
"in_memory" = "false",
"storage_format" = "V2"
);
"""

sql """INSERT INTO `some_complex_type_test` VALUES (1, [1,2,3,4,5]);"""
sql """INSERT INTO `some_complex_type_test` VALUES (2, [6,7,8]), (3, []), (4, null);"""

sql """
ANALYZE TABLE `some_complex_type_test` WITH SYNC;

"""

sql """
SELECT COUNT(1) FROM `some_complex_type_test`
"""

sql """DROP TABLE IF EXISTS `analyze_test_with_schema_update`"""

sql """
CREATE TABLE `analyze_test_with_schema_update` (
col1 varchar(11451) not null, col2 int not null, col3 int not null)
DUPLICATE KEY(col1)
DISTRIBUTED BY HASH(col1)
BUCKETS 3
PROPERTIES(
"replication_num"="1"
);
"""

sql """insert into analyze_test_with_schema_update values(1, 2, 3);"""
sql """insert into analyze_test_with_schema_update values(4, 5, 6);"""
sql """insert into analyze_test_with_schema_update values(7, 1, 9);"""
sql """insert into analyze_test_with_schema_update values(3, 8, 2);"""
sql """insert into analyze_test_with_schema_update values(5, 2, 1);"""

sql """
ANALYZE TABLE analyze_test_with_schema_update WITH SYNC
"""

sql """
ALTER TABLE analyze_test_with_schema_update ADD COLUMN tbl_name VARCHAR(256) DEFAULT NULL;
"""

sql """
ANALYZE TABLE analyze_test_with_schema_update WITH SYNC
"""

sql """
SELECT * FROM analyze_test_with_schema_update;
"""

sql """
DROP STATS analyze_test_with_schema_update(col3);
"""

sql """
ANALYZE TABLE analyze_test_with_schema_update WITH SYNC
"""

sql """
SELECT * FROM analyze_test_with_schema_update;
"""


}