From 6b07f0de30424e8c2667f652cee5a6ae296135fd Mon Sep 17 00:00:00 2001 From: Gavin Chou Date: Thu, 17 Jul 2025 15:51:12 +0800 Subject: [PATCH] [fix](cloud) Fix roll-backed cumulative point of new tablet when doing schema change (#53402) cumu point in job is from base tablet which may be fetched long time ago since the new tablet may have done cumu compactions with alter_version as initial cumu point current cumu point of new tablet may be larger than job.alter_version we need to keep the larger one in case of cumu point roll-back to break the basic assumptions of non-decreasing cumu point --- cloud/src/meta-service/meta_service.cpp | 3 ++- cloud/src/meta-service/meta_service_job.cpp | 9 +++++++-- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/cloud/src/meta-service/meta_service.cpp b/cloud/src/meta-service/meta_service.cpp index 176882cb120394..87a6cf68e33915 100644 --- a/cloud/src/meta-service/meta_service.cpp +++ b/cloud/src/meta-service/meta_service.cpp @@ -1816,8 +1816,9 @@ void MetaServiceImpl::get_rowset(::google::protobuf::RpcController* controller, code = MetaServiceCode::INVALID_ARGUMENT; ss << "no valid compaction_cnt or cumulative_point given. req_bc_cnt=" << req_bc_cnt << ", bc_cnt=" << bc_cnt << ", req_cc_cnt=" << req_cc_cnt << ", cc_cnt=" << cc_cnt - << ", req_cp=" << req_cp << ", cp=" << cp; + << ", req_cp=" << req_cp << ", cp=" << cp << " tablet_id=" << tablet_id; msg = ss.str(); + LOG(WARNING) << msg; return; } auto versions = calc_sync_versions(req_bc_cnt, bc_cnt, req_cc_cnt, cc_cnt, req_cp, cp, diff --git a/cloud/src/meta-service/meta_service_job.cpp b/cloud/src/meta-service/meta_service_job.cpp index 000edc76a30395..6b60a0dd81c371 100644 --- a/cloud/src/meta-service/meta_service_job.cpp +++ b/cloud/src/meta-service/meta_service_job.cpp @@ -1204,7 +1204,7 @@ void process_schema_change_job(MetaServiceCode& code, std::string& msg, std::str //========================================================================== if (!schema_change.has_alter_version()) { code = MetaServiceCode::INVALID_ARGUMENT; - msg = "invalid alter_version"; + msg = "no alter_version for schema change job, tablet_id=" + std::to_string(tablet_id); return; } if (schema_change.alter_version() < 2) { @@ -1304,7 +1304,12 @@ void process_schema_change_job(MetaServiceCode& code, std::string& msg, std::str internal_get_tablet_stats(code, msg, txn.get(), instance_id, new_tablet_idx, *stats, detached_stats, config::snapshot_get_tablet_stats); // clang-format off - stats->set_cumulative_point(schema_change.output_cumulative_point()); + // ATTN: cumu point in job is from base tablet which may be fetched long time ago + // since the new tablet may have done cumu compactions with alter_version as initial cumu point + // current cumu point of new tablet may be larger than job.alter_version + // we need to keep the larger one in case of cumu point roll-back to + // break the basic assumptions of non-decreasing cumu point + stats->set_cumulative_point(std::max(schema_change.output_cumulative_point(), stats->cumulative_point())); stats->set_num_rows(stats->num_rows() + (schema_change.num_output_rows() - num_remove_rows)); stats->set_data_size(stats->data_size() + (schema_change.size_output_rowsets() - size_remove_rowsets)); stats->set_num_rowsets(stats->num_rowsets() + (schema_change.num_output_rowsets() - num_remove_rowsets));