From 99930badd2f6c1d6d0bf37ef256d896f8a9d0e14 Mon Sep 17 00:00:00 2001 From: lamb Date: Thu, 16 Nov 2023 01:33:26 +0800 Subject: [PATCH 1/3] save --- be/src/exec/olap_scanner.cpp | 3 +- be/src/olap/schema_change.cpp | 2 +- be/src/olap/tablet.cpp | 19 +++++--- be/src/olap/tablet.h | 7 ++- be/src/runtime/runtime_state.h | 4 ++ be/src/vec/exec/scan/new_olap_scanner.cpp | 3 +- be/test/olap/tablet_test.cpp | 4 +- docs/en/docs/admin-manual/config/fe-config.md | 14 ------ docs/en/docs/advanced/variables.md | 4 ++ .../docs/admin-manual/config/fe-config.md | 18 -------- docs/zh-CN/docs/advanced/variables.md | 4 ++ .../java/org/apache/doris/common/Config.java | 14 ------ .../org/apache/doris/catalog/Replica.java | 18 ++++++++ .../java/org/apache/doris/catalog/Tablet.java | 6 +-- .../apache/doris/planner/OlapScanNode.java | 33 ++++++++------ .../org/apache/doris/qe/SessionVariable.java | 17 ++++++++ gensrc/thrift/PaloInternalService.thrift | 3 ++ .../test_skip_missing_version.groovy | 43 +++++++++++++++++++ 18 files changed, 141 insertions(+), 75 deletions(-) create mode 100644 regression-test/suites/query_p0/session_variable/test_skip_missing_version.groovy diff --git a/be/src/exec/olap_scanner.cpp b/be/src/exec/olap_scanner.cpp index 72769f77694976..1ab514194c81f4 100644 --- a/be/src/exec/olap_scanner.cpp +++ b/be/src/exec/olap_scanner.cpp @@ -104,7 +104,8 @@ Status OlapScanner::prepare( // the rowsets maybe compacted when the last olap scanner starts Version rd_version(0, _version); Status acquire_reader_st = - _tablet->capture_rs_readers(rd_version, &_tablet_reader_params.rs_readers); + _tablet->capture_rs_readers(rd_version, &_tablet_reader_params.rs_readers, + _runtime_state->skip_missing_version()); if (!acquire_reader_st.ok()) { LOG(WARNING) << "fail to init reader.res=" << acquire_reader_st; std::stringstream ss; diff --git a/be/src/olap/schema_change.cpp b/be/src/olap/schema_change.cpp index b22ba4f32db95a..1cc71662f24a8a 100644 --- a/be/src/olap/schema_change.cpp +++ b/be/src/olap/schema_change.cpp @@ -2138,7 +2138,7 @@ Status SchemaChangeHandler::_get_versions_to_be_changed( *max_rowset = rowset; RETURN_NOT_OK(base_tablet->capture_consistent_versions(Version(0, rowset->version().second), - versions_to_be_changed)); + versions_to_be_changed, false, false)); return Status::OK(); } diff --git a/be/src/olap/tablet.cpp b/be/src/olap/tablet.cpp index 98ef536d9f4602..2f4a9c1d6ad69f 100644 --- a/be/src/olap/tablet.cpp +++ b/be/src/olap/tablet.cpp @@ -581,7 +581,7 @@ void Tablet::delete_expired_stale_rowset() { Version test_version = Version(0, lastest_delta->end_version()); stale_version_path_map[*path_id_iter] = version_path; - Status status = capture_consistent_versions(test_version, nullptr); + Status status = capture_consistent_versions(test_version, nullptr, false, false); // 1. When there is no consistent versions, we must reconstruct the tracker. if (!status.ok()) { // 2. fetch missing version after delete @@ -696,7 +696,8 @@ bool Tablet::_reconstruct_version_tracker_if_necessary() { } Status Tablet::capture_consistent_versions(const Version& spec_version, - std::vector* version_path, bool quiet) const { + std::vector* version_path, + bool skip_missing_version, bool quiet) const { Status status = _timestamped_version_tracker.capture_consistent_versions(spec_version, version_path); if (!status.ok() && !quiet) { @@ -715,6 +716,10 @@ Status Tablet::capture_consistent_versions(const Version& spec_version, LOG(WARNING) << "status:" << status << ", tablet:" << full_name() << ", missed version for version:" << spec_version; _print_missed_versions(missed_versions); + if (skip_missing_version) { + LOG(WARNING) << "force skipping missing version for tablet:" << full_name(); + return Status::OK(); + } } } } @@ -723,7 +728,7 @@ Status Tablet::capture_consistent_versions(const Version& spec_version, Status Tablet::check_version_integrity(const Version& version, bool quiet) { std::shared_lock rdlock(_meta_lock); - return capture_consistent_versions(version, nullptr, quiet); + return capture_consistent_versions(version, nullptr, false, quiet); } // If any rowset contains the specific version, it means the version already exist @@ -747,7 +752,7 @@ void Tablet::acquire_version_and_rowsets( Status Tablet::capture_consistent_rowsets(const Version& spec_version, std::vector* rowsets) const { std::vector version_path; - RETURN_NOT_OK(capture_consistent_versions(spec_version, &version_path)); + RETURN_NOT_OK(capture_consistent_versions(spec_version, &version_path, false, false)); RETURN_NOT_OK(_capture_consistent_rowsets_unlocked(version_path, rowsets)); return Status::OK(); } @@ -784,9 +789,11 @@ Status Tablet::_capture_consistent_rowsets_unlocked(const std::vector& } Status Tablet::capture_rs_readers(const Version& spec_version, - std::vector* rs_readers) const { + std::vector* rs_readers, + bool skip_missing_version) const { std::vector version_path; - RETURN_NOT_OK(capture_consistent_versions(spec_version, &version_path)); + RETURN_NOT_OK( + capture_consistent_versions(spec_version, &version_path, skip_missing_version, false)); RETURN_NOT_OK(capture_rs_readers(version_path, rs_readers)); return Status::OK(); } diff --git a/be/src/olap/tablet.h b/be/src/olap/tablet.h index ce7bb1cc44b3e6..816b50c188da38 100644 --- a/be/src/olap/tablet.h +++ b/be/src/olap/tablet.h @@ -148,9 +148,10 @@ class Tablet : public BaseTablet { // Given spec_version, find a continuous version path and store it in version_path. // If quiet is true, then only "does this path exist" is returned. + // If skip_missing_version is true, return ok even there are missing versions. Status capture_consistent_versions(const Version& spec_version, std::vector* version_path, - bool quiet = false) const; + bool skip_missing_version, bool quiet) const; // if quiet is true, no error log will be printed if there are missing versions Status check_version_integrity(const Version& version, bool quiet = false); bool check_version_exist(const Version& version) const; @@ -159,8 +160,10 @@ class Tablet : public BaseTablet { Status capture_consistent_rowsets(const Version& spec_version, std::vector* rowsets) const; + // If skip_missing_version is true, skip versions if they are missing. Status capture_rs_readers(const Version& spec_version, - std::vector* rs_readers) const; + std::vector* rs_readers, + bool skip_missing_version) const; Status capture_rs_readers(const std::vector& version_path, std::vector* rs_readers) const; diff --git a/be/src/runtime/runtime_state.h b/be/src/runtime/runtime_state.h index dc4c5f97d1da9d..21cb70e73b3d7a 100644 --- a/be/src/runtime/runtime_state.h +++ b/be/src/runtime/runtime_state.h @@ -362,6 +362,10 @@ class RuntimeState { return _query_options.__isset.skip_delete_bitmap && _query_options.skip_delete_bitmap; } + bool skip_missing_version() const { + return _query_options.__isset.skip_missing_version && _query_options.skip_missing_version; + } + int partitioned_hash_join_rows_threshold() const { if (!_query_options.__isset.partitioned_hash_join_rows_threshold) { return 0; diff --git a/be/src/vec/exec/scan/new_olap_scanner.cpp b/be/src/vec/exec/scan/new_olap_scanner.cpp index a702b2b6e567c7..0d59f122351b11 100644 --- a/be/src/vec/exec/scan/new_olap_scanner.cpp +++ b/be/src/vec/exec/scan/new_olap_scanner.cpp @@ -91,7 +91,8 @@ Status NewOlapScanner::prepare(const TPaloScanRange& scan_range, // the rowsets maybe compacted when the last olap scanner starts Version rd_version(0, _version); Status acquire_reader_st = - _tablet->capture_rs_readers(rd_version, &_tablet_reader_params.rs_readers); + _tablet->capture_rs_readers(rd_version, &_tablet_reader_params.rs_readers, + _state->skip_missing_version()); if (!acquire_reader_st.ok()) { LOG(WARNING) << "fail to init reader.res=" << acquire_reader_st; std::stringstream ss; diff --git a/be/test/olap/tablet_test.cpp b/be/test/olap/tablet_test.cpp index f97fd6833c0b3b..496d67a9e5201c 100644 --- a/be/test/olap/tablet_test.cpp +++ b/be/test/olap/tablet_test.cpp @@ -300,12 +300,12 @@ TEST_F(TestTablet, pad_rowset) { Version version(5, 5); std::vector readers; - ASSERT_FALSE(_tablet->capture_rs_readers(version, &readers).ok()); + ASSERT_FALSE(_tablet->capture_rs_readers(version, &readers, false).ok()); readers.clear(); PadRowsetAction action; action._pad_rowset(_tablet, version); - ASSERT_TRUE(_tablet->capture_rs_readers(version, &readers).ok()); + ASSERT_TRUE(_tablet->capture_rs_readers(version, &readers, false).ok()); } TEST_F(TestTablet, cooldown_policy) { diff --git a/docs/en/docs/admin-manual/config/fe-config.md b/docs/en/docs/admin-manual/config/fe-config.md index 1e69299a1df1b5..9d557772b6488e 100644 --- a/docs/en/docs/admin-manual/config/fe-config.md +++ b/docs/en/docs/admin-manual/config/fe-config.md @@ -1805,20 +1805,6 @@ In some very special circumstances, such as code bugs, or human misoperation, et Set to true so that Doris will automatically use blank replicas to fill tablets which all replicas have been damaged or missing -#### `recover_with_skip_missing_version` - -Default:disable - -IsMutable:true - -MasterOnly:true - -In some scenarios, there is an unrecoverable metadata problem in the cluster, and the visibleVersion of the data does not match be. In this case, it is still necessary to restore the remaining data (which may cause problems with the correctness of the data). This configuration is the same as` recover_with_empty_tablet` should only be used in emergency situations -This configuration has three values: -* disable : If an exception occurs, an error will be reported normally. -* ignore_version: ignore the visibleVersion information recorded in fe partition, use replica version -* ignore_all: In addition to ignore_version, when encountering no queryable replica, skip it directly instead of throwing an exception - #### `min_clone_task_timeout_sec` `And max_clone_task_timeout_sec` Default:Minimum 3 minutes, maximum two hours diff --git a/docs/en/docs/advanced/variables.md b/docs/en/docs/advanced/variables.md index 7152fa49bb7398..23880635e6e18d 100644 --- a/docs/en/docs/advanced/variables.md +++ b/docs/en/docs/advanced/variables.md @@ -574,6 +574,10 @@ Translated with www.DeepL.com/Translator (free version) For debugging purpose. In Unique Key MoW table, in case of problems of reading data, setting value to `true` will also read deleted data. +* `skip_missing_version` + + In some scenarios, all replicas of tablet are having missing versions, and the tablet is unable to recover. This config can control the behavior of query. When it is opened, the query will ignore the visible version recorded in FE partition, use the replica version. If the replica on be has missing versions, the query will directly skip this missing version, and only return the data of the existing version, In addition, the query will always try to select the one with the highest lastSuccessVersion among all surviving BE replicas, so as to recover as much data as possible. You should only open it in the emergency scenarios mentioned above, only used for temporary recovery queries. Note that, this variable conflicts with the a variable, when the a variable is not -1, this variable will not work. + * `default_password_lifetime` Default password expiration time. The default value is 0, which means no expiration. The unit is days. This parameter is only enabled if the user's password expiration property has a value of DEFAULT. like: diff --git a/docs/zh-CN/docs/admin-manual/config/fe-config.md b/docs/zh-CN/docs/admin-manual/config/fe-config.md index d78ae3804c1ca3..ec4d759be35ac4 100644 --- a/docs/zh-CN/docs/admin-manual/config/fe-config.md +++ b/docs/zh-CN/docs/admin-manual/config/fe-config.md @@ -1805,24 +1805,6 @@ show data (其他用法:HELP SHOW DATA) 在这种情况下,您可以将此配置设置为 true。 系统会将损坏的 tablet 替换为空 tablet,以确保查询可以执行。 (但此时数据已经丢失,所以查询结果可能不准确) -#### `recover_with_skip_missing_version` - -默认值:disable - -是否可以动态配置:true - -是否为 Master FE 节点独有的配置项:true - -有些场景下集群出现了不可恢复的元数据问题,数据已的visibleversion 已经和be 不匹配, - -这种情况下仍然需要恢复剩余的数据(可能能会导致数据的正确性有问题),这个配置同`recover_with_empty_tablet` 一样只能在紧急情况下使用 - -这个配置有三个值: - - * disable :出现异常会正常报错。 - * ignore_version: 忽略 fe partition 中记录的visibleVersion 信息, 使用replica version - * ignore_all: 除了ignore_version, 在遇到找不到可查询的replica 时,直接跳过而不是抛出异常 - #### `min_clone_task_timeout_sec` 和 `max_clone_task_timeout_sec` 默认值:最小3分钟,最大两小时 diff --git a/docs/zh-CN/docs/advanced/variables.md b/docs/zh-CN/docs/advanced/variables.md index 2309389367ab2f..a002749107a4e2 100644 --- a/docs/zh-CN/docs/advanced/variables.md +++ b/docs/zh-CN/docs/advanced/variables.md @@ -561,6 +561,10 @@ try (Connection conn = DriverManager.getConnection("jdbc:mysql://127.0.0.1:9030/ 用于调试目的。在Unique Key MoW表中,当发现读取表的数据结果有误的时候,把此变量的值设置为`true`,将会把被delete bitmap标记删除的数据当成正常数据读取。 +* `skip_missing_version` + + 有些极端场景下,表的 Tablet 下的所有的所有副本都有版本缺失,使得这些 Tablet 没有办法被恢复,导致整张表都不能查询。这个变量可以用来控制查询的行为,打设置为`true`时,查询会忽略 FE partition 中记录的 visibleVersion,使用 replica version。如果 Be 上的 Replica 有缺失的版本,则查询会直接跳过这些缺失的版本,只返回仍存在版本的数据。此外,查询将会总是选择所有存活的 BE 中所有 Replica 里 lastSuccessVersion 最大的那一个,这样可以尽可能的恢复更多的数据。这个变量应该只在上述紧急情况下才被设置为`true`,仅用于临时让表恢复查询。注意,此变量与 use_fix_replica 变量冲突,当 use_fix_replica 变量不等于 -1 时,此变量会不起作用 + * `default_password_lifetime` 默认的密码过期时间。默认值为 0,即表示不过期。单位为天。该参数只有当用户的密码过期属性为 DEFAULT 值时,才启用。如: diff --git a/fe/fe-common/src/main/java/org/apache/doris/common/Config.java b/fe/fe-common/src/main/java/org/apache/doris/common/Config.java index f0953957f23439..722c23036369f6 100644 --- a/fe/fe-common/src/main/java/org/apache/doris/common/Config.java +++ b/fe/fe-common/src/main/java/org/apache/doris/common/Config.java @@ -1406,20 +1406,6 @@ public class Config extends ConfigBase { @ConfField(mutable = true, masterOnly = true) public static boolean recover_with_empty_tablet = false; - /** - * In some scenarios, there is an unrecoverable metadata problem in the cluster, - * and the visibleVersion of the data does not match be. In this case, it is still - * necessary to restore the remaining data (which may cause problems with the correctness of the data). - * This configuration is the same as` recover_with_empty_tablet` should only be used in emergency situations - * This configuration has three values: - * disable : If an exception occurs, an error will be reported normally. - * ignore_version: ignore the visibleVersion information recorded in fe partition, use replica version - * ignore_all: In addition to ignore_version, when encountering no queryable replica, - * skip it directly instead of throwing an exception - */ - @ConfField(mutable = true, masterOnly = true) - public static String recover_with_skip_missing_version = "disable"; - /** * Whether to add a delete sign column when create unique table */ diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/Replica.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/Replica.java index da845d2cd67f05..39ea83214ab8dc 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/Replica.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/Replica.java @@ -35,6 +35,8 @@ public class Replica implements Writable { private static final Logger LOG = LogManager.getLogger(Replica.class); public static final VersionComparator VERSION_DESC_COMPARATOR = new VersionComparator(); + public static final LastSuccessVersionComparator LAST_SUCCESS_VERSION_COMPARATOR = + new LastSuccessVersionComparator(); public static final IdComparator ID_COMPARATOR = new IdComparator(); public enum ReplicaState { @@ -528,6 +530,22 @@ public int compare(T replica1, T replica2) { } } + private static class LastSuccessVersionComparator implements Comparator { + public LastSuccessVersionComparator() { + } + + @Override + public int compare(T replica1, T replica2) { + if (replica1.getLastSuccessVersion() < replica2.getLastSuccessVersion()) { + return 1; + } else if (replica1.getLastSuccessVersion() == replica2.getLastSuccessVersion()) { + return 0; + } else { + return -1; + } + } + } + private static class IdComparator implements Comparator { public IdComparator() { } diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/Tablet.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/Tablet.java index 1ba726cffc9cc2..0e60a9519745ac 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/Tablet.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/Tablet.java @@ -214,7 +214,7 @@ public Multimap getNormalReplicaBackendPathMap() { } // for query - public List getQueryableReplicas(long visibleVersion) { + public List getQueryableReplicas(long visibleVersion, boolean allowFailedVersion) { List allQueryableReplica = Lists.newArrayListWithCapacity(replicas.size()); List auxiliaryReplica = Lists.newArrayListWithCapacity(replicas.size()); for (Replica replica : replicas) { @@ -222,8 +222,8 @@ public List getQueryableReplicas(long visibleVersion) { continue; } - // Skip the missing version replica - if (replica.getLastFailedVersion() > 0) { + // Skip the missing version replica. + if (replica.getLastFailedVersion() > 0 && !allowFailedVersion) { continue; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/OlapScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/OlapScanNode.java index 05fcf0bc6e7ddf..fc0a4210c39c64 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/OlapScanNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/OlapScanNode.java @@ -627,14 +627,19 @@ private void addScanRangeLocations(Partition partition, String visibleVersionStr = String.valueOf(visibleVersion); Set allowedTags = Sets.newHashSet(); + int useFixReplica = -1; boolean needCheckTags = false; + boolean skipMissingVersion = false; if (ConnectContext.get() != null) { allowedTags = ConnectContext.get().getResourceTags(); needCheckTags = ConnectContext.get().isResourceTagsSet(); + useFixReplica = ConnectContext.get().getSessionVariable().useFixReplica; + // if use_fix_replica is set to true, set skip_missing_version to false + skipMissingVersion = useFixReplica == -1 && ConnectContext.get().getSessionVariable().skipMissingVersion; } for (Tablet tablet : tablets) { long tabletId = tablet.getId(); - if (!Config.recover_with_skip_missing_version.equalsIgnoreCase("disable")) { + if (skipMissingVersion) { long tabletVersion = -1L; for (Replica replica : tablet.getReplicas()) { if (replica.getVersion() > tabletVersion) { @@ -657,7 +662,7 @@ private void addScanRangeLocations(Partition partition, paloRange.setTabletId(tabletId); // random shuffle List && only collect one copy - List replicas = tablet.getQueryableReplicas(visibleVersion); + List replicas = tablet.getQueryableReplicas(visibleVersion, skipMissingVersion); if (replicas.isEmpty()) { LOG.error("no queryable replica found in tablet {}. visible version {}", tabletId, visibleVersion); @@ -669,12 +674,13 @@ private void addScanRangeLocations(Partition partition, throw new UserException("Failed to get scan range, no queryable replica found in tablet: " + tabletId); } - int useFixReplica = -1; - if (ConnectContext.get() != null) { - useFixReplica = ConnectContext.get().getSessionVariable().useFixReplica; - } if (useFixReplica == -1) { - Collections.shuffle(replicas); + if (skipMissingVersion) { + // sort by replica's last success version, higher success version in the front. + replicas.sort(Replica.LAST_SUCCESS_VERSION_COMPARATOR); + } else { + Collections.shuffle(replicas); + } } else { LOG.debug("use fix replica, value: {}, replica num: {}", useFixReplica, replicas.size()); // sort by replica id @@ -721,14 +727,15 @@ private void addScanRangeLocations(Partition partition, collectedStat = true; } scanBackendIds.add(backend.getId()); + // For skipping missing version of tablet, we only select the backend with the highest last + // success version replica to save as much data as possible. + if (!tabletIsNull && skipMissingVersion) { + break; + } } if (tabletIsNull) { - if (Config.recover_with_skip_missing_version.equalsIgnoreCase("ignore_all")) { - continue; - } else { - throw new UserException(tabletId + " have no queryable replicas. err: " - + Joiner.on(", ").join(errs)); - } + throw new UserException(tabletId + " have no queryable replicas. err: " + + Joiner.on(", ").join(errs)); } TScanRange scanRange = new TScanRange(); scanRange.setPaloScanRange(paloRange); diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java index ce3660d2d9c1d7..2a9a784173f4af 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java @@ -236,6 +236,8 @@ public class SessionVariable implements Serializable, Writable { public static final String SKIP_DELETE_BITMAP = "skip_delete_bitmap"; + public static final String SKIP_MISSING_VERSION = "skip_missing_version"; + public static final String ENABLE_NEW_SHUFFLE_HASH_METHOD = "enable_new_shuffle_hash_method"; public static final String ENABLE_PUSH_DOWN_NO_GROUP_AGG = "enable_push_down_no_group_agg"; @@ -646,6 +648,19 @@ public class SessionVariable implements Serializable, Writable { @VariableMgr.VarAttr(name = SKIP_DELETE_BITMAP) public boolean skipDeleteBitmap = false; + // This variable replace the original FE config `recover_with_skip_missing_version`. + // In some scenarios, all replicas of tablet are having missing versions, and the tablet is unable to recover. + // This config can control the behavior of query. When it is set to `true`, the query will ignore the + // visible version recorded in FE partition, use the replica version. If the replica on BE has missing versions, + // the query will directly skip this missing version, and only return the data of the existing versions. + // Besides, the query will always try to select the one with the highest lastSuccessVersion among all surviving + // BE replicas, so as to recover as much data as possible. + // You should only open it in the emergency scenarios mentioned above, only used for temporary recovery queries. + // This variable conflicts with the use_fix_replica variable, when the use_fix_replica variable is not -1, + // this variable will not work. + @VariableMgr.VarAttr(name = SKIP_MISSING_VERSION) + public boolean skipMissingVersion = false; + // This variable is used to avoid FE fallback to the original parser. When we execute SQL in regression tests // for nereids, fallback will cause the Doris return the correct result although the syntax is unsupported // in nereids for some mistaken modification. You should set it on the @@ -1443,6 +1458,8 @@ public TQueryOptions toThrift() { tResult.setSkipDeleteBitmap(skipDeleteBitmap); + tResult.setSkipMissingVersion(skipMissingVersion); + tResult.setPartitionedHashJoinRowsThreshold(partitionedHashJoinRowsThreshold); return tResult; diff --git a/gensrc/thrift/PaloInternalService.thrift b/gensrc/thrift/PaloInternalService.thrift index 6ebf0f4a3a89d7..c54d83acb6151a 100644 --- a/gensrc/thrift/PaloInternalService.thrift +++ b/gensrc/thrift/PaloInternalService.thrift @@ -190,6 +190,9 @@ struct TQueryOptions { 56: optional bool skip_delete_bitmap = false 57: optional i64 scan_queue_mem_limit + + // For emergency use, skip missing version when reading rowsets + 58: optional bool skip_missing_version = false; } diff --git a/regression-test/suites/query_p0/session_variable/test_skip_missing_version.groovy b/regression-test/suites/query_p0/session_variable/test_skip_missing_version.groovy new file mode 100644 index 00000000000000..c1f4c4464dc001 --- /dev/null +++ b/regression-test/suites/query_p0/session_variable/test_skip_missing_version.groovy @@ -0,0 +1,43 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + + +suite("test_skip_missing_version") { + def test_tbl = "test_skip_missing_version_tbl" + + sql """ DROP TABLE IF EXISTS ${test_tbl}""" + sql """ + CREATE TABLE ${test_tbl} ( + `k1` int(11) NULL, + `k2` char(5) NULL, + `k3` tinyint(4) NULL + ) ENGINE=OLAP + DUPLICATE KEY(`k1`, `k2`, `k3`) + DISTRIBUTED BY HASH(`k1`) BUCKETS 5 + PROPERTIES ( + "replication_num"="1" + ); + """ + + sql """ INSERT INTO ${test_tbl} VALUES(1000, 'a', 10); """ + sql """ INSERT INTO ${test_tbl} VALUES(2000, 'b', 10); """ + + // This case cannot verify the results, but it can verify abnormalities after + // SET skip_missing_version=true + sql """ SET skip_missing_version=true """ + qt_select_all """ select * from ${test_tbl} order by k1 """ +} From 5d7edaae8f2067c8c47f00d2ee1437f33e305e67 Mon Sep 17 00:00:00 2001 From: lamb Date: Thu, 16 Nov 2023 01:39:09 +0800 Subject: [PATCH 2/3] add test --- .../query_p0/session_variable/test_skip_missing_version.out | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 regression-test/data/query_p0/session_variable/test_skip_missing_version.out diff --git a/regression-test/data/query_p0/session_variable/test_skip_missing_version.out b/regression-test/data/query_p0/session_variable/test_skip_missing_version.out new file mode 100644 index 00000000000000..37d08502236783 --- /dev/null +++ b/regression-test/data/query_p0/session_variable/test_skip_missing_version.out @@ -0,0 +1,5 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !select_all -- +1000 a 10 +2000 b 10 + From 46faa9d38c76c99f6cf5f4b2e01d2bf222be51b5 Mon Sep 17 00:00:00 2001 From: lamb Date: Thu, 16 Nov 2023 01:46:20 +0800 Subject: [PATCH 3/3] format --- be/src/vec/exec/scan/new_olap_scanner.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/be/src/vec/exec/scan/new_olap_scanner.cpp b/be/src/vec/exec/scan/new_olap_scanner.cpp index 0d59f122351b11..1b961af6da1109 100644 --- a/be/src/vec/exec/scan/new_olap_scanner.cpp +++ b/be/src/vec/exec/scan/new_olap_scanner.cpp @@ -90,9 +90,8 @@ Status NewOlapScanner::prepare(const TPaloScanRange& scan_range, // to prevent this case: when there are lots of olap scanners to run for example 10000 // the rowsets maybe compacted when the last olap scanner starts Version rd_version(0, _version); - Status acquire_reader_st = - _tablet->capture_rs_readers(rd_version, &_tablet_reader_params.rs_readers, - _state->skip_missing_version()); + Status acquire_reader_st = _tablet->capture_rs_readers( + rd_version, &_tablet_reader_params.rs_readers, _state->skip_missing_version()); if (!acquire_reader_st.ok()) { LOG(WARNING) << "fail to init reader.res=" << acquire_reader_st; std::stringstream ss;