From 0579d1ecae271c3ba0137ee0bb36b5ac60506808 Mon Sep 17 00:00:00 2001 From: morrySnow <101034200+morrySnow@users.noreply.github.com> Date: Wed, 21 Aug 2024 16:47:49 +0800 Subject: [PATCH] [opt](Nereids) do not fallback if nereids failed because timeout (#39499) pick from master #39499 since legacy planner will cost more time to plan, fallback will be worse than throw exception directly --- .../exceptions/DoNotFallbackException.java | 27 ++++++++++++ .../jobs/scheduler/SimpleJobScheduler.java | 3 +- .../org/apache/doris/qe/StmtExecutor.java | 29 +++++++----- .../nereids_p0/test_timeout_fallback.groovy | 44 +++++++++++++++++++ 4 files changed, 90 insertions(+), 13 deletions(-) create mode 100644 fe/fe-core/src/main/java/org/apache/doris/nereids/exceptions/DoNotFallbackException.java create mode 100644 regression-test/suites/nereids_p0/test_timeout_fallback.groovy diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/exceptions/DoNotFallbackException.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/exceptions/DoNotFallbackException.java new file mode 100644 index 00000000000000..b6253f52c6b5df --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/exceptions/DoNotFallbackException.java @@ -0,0 +1,27 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.nereids.exceptions; + +/** + * Exception for can not fall back error in Nereids. + */ +public class DoNotFallbackException extends RuntimeException { + public DoNotFallbackException(String msg) { + super(msg); + } +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/scheduler/SimpleJobScheduler.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/scheduler/SimpleJobScheduler.java index ec751bdab2d446..32a82127e6dbe2 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/scheduler/SimpleJobScheduler.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/scheduler/SimpleJobScheduler.java @@ -18,6 +18,7 @@ package org.apache.doris.nereids.jobs.scheduler; import org.apache.doris.nereids.CascadesContext; +import org.apache.doris.nereids.exceptions.DoNotFallbackException; import org.apache.doris.nereids.jobs.Job; import org.apache.doris.qe.SessionVariable; @@ -36,7 +37,7 @@ public void executeJobPool(ScheduleContext scheduleContext) { if (sessionVariable.enableNereidsTimeout && context.getStatementContext().getStopwatch().elapsed(TimeUnit.MILLISECONDS) > sessionVariable.nereidsTimeoutSecond * 1000L) { - throw new RuntimeException( + throw new DoNotFallbackException( "Nereids cost too much time ( > " + sessionVariable.nereidsTimeoutSecond + "s )"); } Job job = pool.pop(); diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/StmtExecutor.java b/fe/fe-core/src/main/java/org/apache/doris/qe/StmtExecutor.java index 9f349891ce38b1..4f79b7b4ee9f64 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/qe/StmtExecutor.java +++ b/fe/fe-core/src/main/java/org/apache/doris/qe/StmtExecutor.java @@ -136,6 +136,7 @@ import org.apache.doris.nereids.NereidsPlanner; import org.apache.doris.nereids.PlanProcess; import org.apache.doris.nereids.StatementContext; +import org.apache.doris.nereids.exceptions.DoNotFallbackException; import org.apache.doris.nereids.exceptions.MustFallbackException; import org.apache.doris.nereids.exceptions.ParseException; import org.apache.doris.nereids.glue.LogicalPlanAdapter; @@ -520,7 +521,10 @@ public void execute() throws Exception { execute(queryId); } - public boolean notAllowFallback() { + public boolean notAllowFallback(NereidsException e) { + if (e.getException() instanceof DoNotFallbackException) { + return true; + } if (parsedStmt instanceof LogicalPlanAdapter) { LogicalPlan logicalPlan = ((LogicalPlanAdapter) parsedStmt).getLogicalPlan(); return logicalPlan instanceof NotAllowFallback; @@ -545,12 +549,12 @@ public void execute(TUniqueId queryId) throws Exception { } // try to fall back to legacy planner if (LOG.isDebugEnabled()) { - LOG.debug("nereids cannot process statement\n" + originStmt.originStmt - + "\n because of " + e.getMessage(), e); + LOG.debug("nereids cannot process statement\n{}\n because of {}", + originStmt.originStmt, e.getMessage(), e); } - if (notAllowFallback()) { + if (e instanceof NereidsException && notAllowFallback((NereidsException) e)) { LOG.warn("Analyze failed. {}", context.getQueryIdentifier(), e); - throw ((NereidsException) e).getException(); + throw new AnalysisException(e.getMessage()); } // FIXME: Force fallback for: // 1. group commit because nereids does not support it (see the following `isGroupCommit` variable) @@ -705,7 +709,7 @@ private void executeByNereids(TUniqueId queryId) throws Exception { syncJournalIfNeeded(); try { ((Command) logicalPlan).run(context, this); - } catch (MustFallbackException e) { + } catch (MustFallbackException | DoNotFallbackException e) { if (LOG.isDebugEnabled()) { LOG.debug("Command({}) process failed.", originStmt.originStmt, e); } @@ -751,10 +755,11 @@ private void executeByNereids(TUniqueId queryId) throws Exception { try { planner.plan(parsedStmt, context.getSessionVariable().toThrift()); checkBlockRules(); + } catch (MustFallbackException | DoNotFallbackException e) { + LOG.warn("Nereids plan query failed:\n{}", originStmt.originStmt, e); + throw new NereidsException("Command(" + originStmt.originStmt + ") process failed.", e); } catch (Exception e) { - if (LOG.isDebugEnabled()) { - LOG.debug("Nereids plan query failed:\n{}", originStmt.originStmt); - } + LOG.warn("Nereids plan query failed:\n{}", originStmt.originStmt, e); throw new NereidsException(new AnalysisException(e.getMessage(), e)); } profile.getSummaryProfile().setQueryPlanFinishTime(); @@ -3286,10 +3291,10 @@ public HttpStreamParams generateHttpStreamPlan(TUniqueId queryId) throws Excepti } // try to fall back to legacy planner if (LOG.isDebugEnabled()) { - LOG.debug("nereids cannot process statement\n" + originStmt.originStmt - + "\n because of " + e.getMessage(), e); + LOG.debug("nereids cannot process statement\n{}\n because of {}", + originStmt.originStmt, e.getMessage(), e); } - if (notAllowFallback()) { + if (e instanceof NereidsException && notAllowFallback((NereidsException) e)) { LOG.warn("Analyze failed. {}", context.getQueryIdentifier(), e); throw ((NereidsException) e).getException(); } diff --git a/regression-test/suites/nereids_p0/test_timeout_fallback.groovy b/regression-test/suites/nereids_p0/test_timeout_fallback.groovy new file mode 100644 index 00000000000000..084fe9c8a197ef --- /dev/null +++ b/regression-test/suites/nereids_p0/test_timeout_fallback.groovy @@ -0,0 +1,44 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_timeout_fallback") { + sql "set enable_nereids_planner=true" + sql "set enable_fallback_to_original_planner=true" + sql "set enable_nereids_timeout=true" + sql "set nereids_timeout_second=-1" + + test { + sql "select 1" + exception "Nereids cost too much time" + } + + test { + sql "explain select 1" + exception "Nereids cost too much time" + } + + sql "drop table if exists test_timeout_fallback" + + sql """ + create table test_timeout_fallback (id int) distributed by hash(id) properties ('replication_num'='1') + """ + + test { + sql "insert into test_timeout_fallback values (1)" + exception "Nereids cost too much time" + } +}