From d40f607f90b17eb45eb7abe9216a8cd94f508d26 Mon Sep 17 00:00:00 2001 From: Jibing Li Date: Thu, 17 Aug 2023 18:57:25 +0800 Subject: [PATCH] Load the cache for external table row count while init table. --- .../doris/analysis/ShowTableStatsStmt.java | 15 ++++++++ .../doris/catalog/external/ExternalTable.java | 2 ++ .../catalog/external/HMSExternalTable.java | 14 +++++--- .../translator/PhysicalPlanTranslator.java | 4 ++- .../org/apache/doris/qe/ShowExecutor.java | 11 +++++- .../doris/statistics/AnalysisManager.java | 16 +++++---- .../doris/statistics/HMSAnalysisTask.java | 6 +++- .../hive/test_hive_statistic.groovy | 4 +-- .../hive/test_hive_statistic_cache.groovy | 36 ++++++++++++++++++- 9 files changed, 91 insertions(+), 17 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowTableStatsStmt.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowTableStatsStmt.java index af14be37b5430c..c5e9e211e4f55d 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowTableStatsStmt.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowTableStatsStmt.java @@ -150,6 +150,21 @@ public ShowResultSet constructResultSet(TableStats tableStatistic) { return new ShowResultSet(getMetaData(), result); } + public ShowResultSet constructResultSet(long rowCount) { + List> result = Lists.newArrayList(); + List row = Lists.newArrayList(); + row.add(""); + row.add(""); + row.add(String.valueOf(rowCount)); + row.add(""); + row.add(""); + row.add(""); + row.add(""); + row.add(""); + result.add(row); + return new ShowResultSet(getMetaData(), result); + } + public boolean isCached() { return cached; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/external/ExternalTable.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/external/ExternalTable.java index f4c76cda7a98a0..0f9ec3f56433c7 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/external/ExternalTable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/external/ExternalTable.java @@ -72,6 +72,7 @@ public class ExternalTable implements TableIf, Writable, GsonPostProcessable { @SerializedName(value = "lastUpdateTime") protected long lastUpdateTime; + protected long dbId; protected boolean objectCreated; protected ExternalCatalog catalog; protected ReentrantReadWriteLock rwLock = new ReentrantReadWriteLock(true); @@ -113,6 +114,7 @@ protected void makeSureInitialized() { try { // getDbOrAnalysisException will call makeSureInitialized in ExternalCatalog. ExternalDatabase db = catalog.getDbOrAnalysisException(dbName); + dbId = db.getId(); db.makeSureInitialized(); } catch (AnalysisException e) { Util.logAndThrowRuntimeException(LOG, String.format("Exception to get db %s", dbName), e); diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/external/HMSExternalTable.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/external/HMSExternalTable.java index e75b9859b84095..a4c19ecc453491 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/external/HMSExternalTable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/external/HMSExternalTable.java @@ -149,6 +149,7 @@ protected synchronized void makeSureInitialized() { } } objectCreated = true; + estimatedRowCount = getRowCountFromExternalSource(); } } @@ -272,6 +273,15 @@ public long getUpdateTime() { @Override public long getRowCount() { makeSureInitialized(); + long rowCount = getRowCountFromExternalSource(); + if (rowCount == -1) { + LOG.debug("Will estimate row count from file list."); + rowCount = StatisticsUtil.getRowCountFromFileList(this); + } + return rowCount; + } + + private long getRowCountFromExternalSource() { long rowCount; switch (dlaType) { case HIVE: @@ -284,10 +294,6 @@ public long getRowCount() { LOG.warn("getRowCount for dlaType {} is not supported.", dlaType); rowCount = -1; } - if (rowCount == -1) { - LOG.debug("Will estimate row count from file list."); - rowCount = StatisticsUtil.getRowCountFromFileList(this); - } return rowCount; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java index 2a5ba6474e52bf..b2cd73954bebc0 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java @@ -437,7 +437,9 @@ public PlanFragment visitPhysicalFileScan(PhysicalFileScan fileScan, PlanTransla TableRef ref = new TableRef(tableName, null, null); BaseTableRef tableRef = new BaseTableRef(ref, table, tableName); tupleDescriptor.setRef(tableRef); - + if (fileScan.getStats() != null) { + scanNode.setCardinality((long) fileScan.getStats().getRowCount()); + } Utils.execWithUncheckedException(scanNode::init); context.addScanNode(scanNode); ScanNode finalScanNode = scanNode; diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/ShowExecutor.java b/fe/fe-core/src/main/java/org/apache/doris/qe/ShowExecutor.java index 69c3fb69cfecd9..f505089cd756c5 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/qe/ShowExecutor.java +++ b/fe/fe-core/src/main/java/org/apache/doris/qe/ShowExecutor.java @@ -2451,7 +2451,16 @@ private void handleShowTableStats() { ShowTableStatsStmt showTableStatsStmt = (ShowTableStatsStmt) stmt; TableIf tableIf = showTableStatsStmt.getTable(); TableStats tableStats = Env.getCurrentEnv().getAnalysisManager().findTableStatsStatus(tableIf.getId()); - resultSet = showTableStatsStmt.constructResultSet(tableStats); + /* + HMSExternalTable table will fetch row count from HMS + or estimate with file size and schema if it's not analyzed. + tableStats == null means it's not analyzed, in this case show the estimated row count. + */ + if (tableStats == null && tableIf instanceof HMSExternalTable) { + resultSet = showTableStatsStmt.constructResultSet(tableIf.estimatedRowCount()); + } else { + resultSet = showTableStatsStmt.constructResultSet(tableStats); + } } private void handleShowColumnStats() throws AnalysisException { diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java index 62d27a17c92dcd..5fbfb56829548d 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java @@ -32,6 +32,7 @@ import org.apache.doris.catalog.ScalarType; import org.apache.doris.catalog.TableIf; import org.apache.doris.catalog.View; +import org.apache.doris.catalog.external.ExternalTable; import org.apache.doris.catalog.external.HMSExternalTable; import org.apache.doris.common.AnalysisException; import org.apache.doris.common.Config; @@ -285,9 +286,7 @@ public List buildAnalysisInfosForDB(DatabaseIf db, Analyz // columnNames null means to add all visitable columns. // Will get all the visible columns in analyzeTblStmt.check() AnalyzeTblStmt analyzeTblStmt = new AnalyzeTblStmt(analyzeProperties, tableName, - table.getBaseSchema().stream().filter(c -> !StatisticsUtil.isUnsupportedType(c.getType())).map( - Column::getName).collect( - Collectors.toList()), db.getId(), table); + null, db.getId(), table); try { analyzeTblStmt.check(); } catch (AnalysisException analysisException) { @@ -334,11 +333,10 @@ public AnalysisInfo buildAndAssignJob(AnalyzeTblStmt stmt) throws DdlException { boolean isSync = stmt.isSync(); Map analysisTaskInfos = new HashMap<>(); + createTaskForEachColumns(jobInfo, analysisTaskInfos, isSync); if (stmt.isAllColumns() && StatisticsUtil.isExternalTable(jobInfo.catalogName, jobInfo.dbName, jobInfo.tblName)) { - createTaskForExternalTable(jobInfo, analysisTaskInfos, isSync); - } else { - createTaskForEachColumns(jobInfo, analysisTaskInfos, isSync); + createTableLevelTaskForExternalTable(jobInfo, analysisTaskInfos, isSync); } if (isSync) { syncExecute(analysisTaskInfos.values()); @@ -583,7 +581,7 @@ public void logCreateAnalysisJob(AnalysisInfo analysisJob) { } @VisibleForTesting - public void createTaskForExternalTable(AnalysisInfo jobInfo, + public void createTableLevelTaskForExternalTable(AnalysisInfo jobInfo, Map analysisTasks, boolean isSync) throws DdlException { @@ -616,6 +614,10 @@ public void updateTaskStatus(AnalysisInfo info, AnalysisState taskState, String public void updateTableStats(AnalysisInfo jobInfo) { TableIf tbl = StatisticsUtil.findTable(jobInfo.catalogName, jobInfo.dbName, jobInfo.tblName); + // External Table update table stats after table level task finished. + if (tbl instanceof ExternalTable) { + return; + } // TODO: set updatedRows to 0, when loadedRows of transaction info is ready. updateTableStatsStatus(new TableStats(tbl.getId(), tbl.estimatedRowCount(), jobInfo)); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/HMSAnalysisTask.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/HMSAnalysisTask.java index e741ee1d2caca8..512aa9982ffed5 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/HMSAnalysisTask.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/HMSAnalysisTask.java @@ -123,7 +123,7 @@ private void getTableStats() throws Exception { String rowCount = columnResult.get(0).get(0); Env.getCurrentEnv().getAnalysisManager() .updateTableStatsStatus( - new TableStats(table.getId(), Long.parseLong(rowCount), null)); + new TableStats(table.getId(), Long.parseLong(rowCount), info)); } /** @@ -269,6 +269,10 @@ private void setParameterData(Map parameters, Map