From 103aa272e21a845a11f943506e3b02dae64fd4c6 Mon Sep 17 00:00:00 2001 From: Jibing Li Date: Fri, 1 Sep 2023 11:47:31 +0800 Subject: [PATCH] Add property to support manually use auto analyzer to analyze db. --- .../java/org/apache/doris/catalog/Env.java | 4 + .../doris/statistics/AnalysisManager.java | 5 ++ .../statistics/StatisticsAutoCollector.java | 28 +++--- .../hive/test_hive_statistic_auto.groovy | 87 +++++++++++++++++++ 4 files changed, 112 insertions(+), 12 deletions(-) create mode 100644 regression-test/suites/external_table_p2/hive/test_hive_statistic_auto.groovy diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/Env.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/Env.java index 8f9ca541a879ab..0e2243036cf261 100755 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/Env.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/Env.java @@ -5601,4 +5601,8 @@ public void replayAutoIncrementIdUpdateLog(AutoIncrementIdUpdateLog log) throws public ColumnIdFlushDaemon getColumnIdFlusher() { return columnIdFlusher; } + + public StatisticsAutoCollector getStatisticsAutoCollector() { + return statisticsAutoCollector; + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java index 853e9b339314b5..fd2d844ce13f9d 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java @@ -266,6 +266,11 @@ public void createAnalyze(AnalyzeStmt analyzeStmt, boolean proxy) throws DdlExce public void createAnalysisJobs(AnalyzeDBStmt analyzeDBStmt, boolean proxy) throws DdlException { DatabaseIf db = analyzeDBStmt.getDb(); + // Using auto analyzer if user specifies. + if (analyzeDBStmt.getAnalyzeProperties().getProperties().containsKey("use.auto.analyzer")) { + Env.getCurrentEnv().getStatisticsAutoCollector().analyzeDb(db); + return; + } List analysisInfos = buildAnalysisInfosForDB(db, analyzeDBStmt.getAnalyzeProperties()); if (!analyzeDBStmt.isSync()) { sendJobId(analysisInfos, proxy); diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsAutoCollector.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsAutoCollector.java index c7310fc0e82873..e17db99f34f8d0 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsAutoCollector.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsAutoCollector.java @@ -77,18 +77,22 @@ private void analyzeAll() { if (StatisticConstants.STATISTICS_DB_BLACK_LIST.contains(databaseIf.getFullName())) { continue; } - List analysisInfos = constructAnalysisInfo(databaseIf); - for (AnalysisInfo analysisInfo : analysisInfos) { - analysisInfo = getReAnalyzeRequiredPart(analysisInfo); - if (analysisInfo == null) { - continue; - } - try { - createSystemAnalysisJob(analysisInfo); - } catch (Exception e) { - LOG.warn("Failed to create analysis job", e); - } - } + analyzeDb(databaseIf); + } + } + } + + public void analyzeDb(DatabaseIf databaseIf) { + List analysisInfos = constructAnalysisInfo(databaseIf); + for (AnalysisInfo analysisInfo : analysisInfos) { + analysisInfo = getReAnalyzeRequiredPart(analysisInfo); + if (analysisInfo == null) { + continue; + } + try { + createSystemAnalysisJob(analysisInfo); + } catch (Exception e) { + LOG.warn("Failed to create analysis job", e); } } } diff --git a/regression-test/suites/external_table_p2/hive/test_hive_statistic_auto.groovy b/regression-test/suites/external_table_p2/hive/test_hive_statistic_auto.groovy new file mode 100644 index 00000000000000..f766069346e1e8 --- /dev/null +++ b/regression-test/suites/external_table_p2/hive/test_hive_statistic_auto.groovy @@ -0,0 +1,87 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_hive_statistic_auto", "p2,external,hive,external_remote,external_remote_hive") { + String enabled = context.config.otherConfigs.get("enableExternalHiveTest") + if (enabled != null && enabled.equalsIgnoreCase("true")) { + String extHiveHmsHost = context.config.otherConfigs.get("extHiveHmsHost") + String extHiveHmsPort = context.config.otherConfigs.get("extHiveHmsPort") + String catalog_name = "test_hive_statistic_auto" + sql """drop catalog if exists ${catalog_name};""" + sql """ + create catalog if not exists ${catalog_name} properties ( + 'type'='hms', + 'hadoop.username' = 'hadoop', + 'hive.metastore.uris' = 'thrift://${extHiveHmsHost}:${extHiveHmsPort}' + ); + """ + logger.info("catalog " + catalog_name + " created") + + // Test analyze table without init. + sql """analyze database ${catalog_name}.statistics PROPERTIES("use.auto.analyzer"="true")""" + sql """use ${catalog_name}.statistics""" + + for (int i = 0; i < 10; i++) { + Thread.sleep(1000) + def result = sql """show column stats `statistics` (lo_quantity)""" + if (result.size <= 0) { + continue; + } + assertTrue(result.size() == 1) + assertTrue(result[0][0] == "lo_quantity") + assertTrue(result[0][1] == "100.0") + assertTrue(result[0][2] == "46.0") + assertTrue(result[0][3] == "0.0") + assertTrue(result[0][4] == "404.0") + assertTrue(result[0][5] == "4.0") + assertTrue(result[0][6] == "1") + assertTrue(result[0][7] == "50") + + result = sql """show column stats `statistics` (lo_orderkey)""" + if (result.size <= 0) { + continue; + } + assertTrue(result.size() == 1) + assertTrue(result[0][0] == "lo_orderkey") + assertTrue(result[0][1] == "100.0") + assertTrue(result[0][2] == "26.0") + assertTrue(result[0][3] == "0.0") + assertTrue(result[0][4] == "404.0") + assertTrue(result[0][5] == "4.0") + assertTrue(result[0][6] == "1") + assertTrue(result[0][7] == "98") + + result = sql """show column stats `statistics` (lo_linenumber)""" + if (result.size <= 0) { + continue; + } + assertTrue(result.size() == 1) + assertTrue(result[0][0] == "lo_linenumber") + assertTrue(result[0][1] == "100.0") + assertTrue(result[0][2] == "7.0") + assertTrue(result[0][3] == "0.0") + assertTrue(result[0][4] == "404.0") + assertTrue(result[0][5] == "4.0") + assertTrue(result[0][6] == "1") + assertTrue(result[0][7] == "7") + } + + sql """drop catalog ${catalog_name}""" + + } +} +