From 29b9c936d927e9a30b6e4d6c12b0dd35d9861578 Mon Sep 17 00:00:00 2001 From: huangkangping Date: Mon, 12 Aug 2019 15:20:34 +0800 Subject: [PATCH 1/4] Fix tablet restore api in BE(#1623) 1. fix bug when tablet with pending dir 2. add a shell script to support batch restore --- be/src/http/action/restore_tablet_action.cpp | 72 +++++++++-------- be/src/http/action/restore_tablet_action.h | 2 + bin/restore_tablet_tool.sh | 84 ++++++++++++++++++++ 3 files changed, 126 insertions(+), 32 deletions(-) create mode 100644 bin/restore_tablet_tool.sh diff --git a/be/src/http/action/restore_tablet_action.cpp b/be/src/http/action/restore_tablet_action.cpp index 745fa106bcb7d8..1464f498c4c31a 100644 --- a/be/src/http/action/restore_tablet_action.cpp +++ b/be/src/http/action/restore_tablet_action.cpp @@ -86,11 +86,11 @@ Status RestoreTabletAction::_handle(HttpRequest *req) { TabletSharedPtr tablet = StorageEngine::instance()->tablet_manager()->get_tablet(tablet_id, schema_hash); - if (tablet.get() != nullptr) { + if (tablet = nullptr) { LOG(WARNING) << "find tablet. tablet_id=" << tablet_id << " schema_hash=" << schema_hash; return Status::InternalError("tablet already exists, can not restore."); } - std::string key = std::to_string(tablet_id) + "_" + std::to_string(schema_hash); + std::string key = tablet_id_str + "_" + schema_hash_str; { // check tablet_id + schema_hash already is restoring std::lock_guard l(_tablet_restore_lock); @@ -121,7 +121,7 @@ Status RestoreTabletAction::_reload_tablet( << ", signature: " << tablet_id; // remove tablet data path in data path // path: /roo_path/data/shard/tablet_id - std::string tablet_path = shard_path + "/" + std::to_string(tablet_id); + std::string tablet_path = shard_path + "/" + std::to_string(tablet_id) + "/" + std::to_string(schema_hash); LOG(INFO) << "remove tablet_path:" << tablet_path; Status s = FileUtils::remove_all(tablet_path); if (!s.ok()) { @@ -129,28 +129,19 @@ Status RestoreTabletAction::_reload_tablet( } return Status::InternalError("command executor load header failed"); } else { - LOG(INFO) << "load header success. status: " << res - << ", signature: " << tablet_id; - // remove tablet data path in trash - // path: /root_path/trash/time_label, because only one tablet path under time_label std::string trash_tablet_schema_hash_dir = ""; - { // get tablet path in trash std::lock_guard l(_tablet_restore_lock); trash_tablet_schema_hash_dir = _tablet_path_map[key]; } + LOG(INFO) << "load header success. status: " << res + << ", signature: " << tablet_id << ", from trash path:" << trash_tablet_schema_hash_dir + << " to shard path:" << shard_path; - boost::filesystem::path trash_tablet_schema_hash_path(trash_tablet_schema_hash_dir); - boost::filesystem::path time_label_path = trash_tablet_schema_hash_path.parent_path().parent_path(); - LOG(INFO) << "remove time label path:" << time_label_path.string(); - Status s = FileUtils::remove_all(time_label_path.string()); - if (!s.ok()) { - LOG(WARNING) << "remove time label path:" << time_label_path.string() << " failed"; - } return Status::OK(); } -} +} Status RestoreTabletAction::_restore(const std::string& key, int64_t tablet_id, int32_t schema_hash) { // get latest tablet path in trash @@ -185,31 +176,48 @@ Status RestoreTabletAction::_restore(const std::string& key, int64_t tablet_id, return s; } // create hard link for files in /root_path/data/shard/tablet_id/schema_hash + s = _create_hard_link_recursive(latest_tablet_path, restore_schema_hash_path); + if (!s.ok()) { + s = FileUtils::remove_all(restore_schema_hash_path); + if (!s.ok()) { + LOG(WARNING) << "remove invalid tablet path:" << restore_schema_hash_path << " failed"; + } + } + std::string restore_shard_path = store->get_absolute_shard_path(std::to_string(tablet_meta.shard_id())); + Status status = _reload_tablet(key, restore_shard_path, tablet_id, schema_hash); + return status; +} + +Status RestoreTabletAction::_create_hard_link_recursive(const std::string& src, const std::string& dst) { std::vector files; - s = FileUtils::scan_dir(latest_tablet_path, &files); + Status s = FileUtils::scan_dir(src, &files); if (!s.ok()) { - LOG(WARNING) << "scan dir failed:" << latest_tablet_path; + LOG(WARNING) << "scan dir failed:" << src; return s; } for (auto& file : files) { - std::string from = latest_tablet_path + "/" + file; - std::string to = restore_schema_hash_path + "/" + file; - int link_ret = link(from.c_str(), to.c_str()); - if (link_ret != 0) { - LOG(WARNING) << "link from:" << from - << " to:" << to << " failed, link ret:" << link_ret; - std::string restore_tablet_path = store->get_absolute_tablet_path(&tablet_meta, false); - LOG(WARNING) << "remove tablet_path:" << restore_tablet_path; - Status s = FileUtils::remove_all(restore_tablet_path); + std::string from = src + "/" + file; + std::string to = dst + "/" + file; + if (FileUtils::is_dir(from)) { + s = FileUtils::create_dir(to); + if (!s.ok()) { + LOG(WARNING) << "create path failed:" << to; + return s; + } + s = _create_hard_link_recursive(from, to); if (!s.ok()) { - LOG(WARNING) << "remove invalid tablet path:" << restore_tablet_path << " failed"; + return s; + } + } else { + int link_ret = link(from.c_str(), to.c_str()); + if (link_ret != 0) { + LOG(WARNING) << "link from:" << from + << " to:" << to << " failed, link ret:" << link_ret; + return Status::InternalError("create link path failed"); } - return Status::InternalError("create link path failed"); } } - std::string restore_shard_path = store->get_absolute_shard_path(std::to_string(tablet_meta.shard_id())); - Status status = _reload_tablet(key, restore_shard_path, tablet_id, schema_hash); - return status; + return Status::OK(); } bool RestoreTabletAction::_get_latest_tablet_path_from_trash( diff --git a/be/src/http/action/restore_tablet_action.h b/be/src/http/action/restore_tablet_action.h index d22686d4932dc6..9fce3110cb2477 100644 --- a/be/src/http/action/restore_tablet_action.h +++ b/be/src/http/action/restore_tablet_action.h @@ -51,6 +51,8 @@ class RestoreTabletAction : public HttpHandler { void _clear_key(const std::string& key); + Status _create_hard_link_recursive(const std::string& src, const std::string& dst); + private: ExecEnv* _exec_env; std::mutex _tablet_restore_lock; diff --git a/bin/restore_tablet_tool.sh b/bin/restore_tablet_tool.sh new file mode 100644 index 00000000000000..b2cbb801e0c339 --- /dev/null +++ b/bin/restore_tablet_tool.sh @@ -0,0 +1,84 @@ +#!/usr/bin/env bash +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# print usage +usage() { + echo " +Usage: $0 + Optional options: + -h | --host BE hostname + -p | --port BE http port + -t | --tablet_id tablet id to restore + -s | --schema_hash tablet related schema hash + -f | --file file with lines containing comma-split tablet id and schema hash + " + exit 1 +} + +OPTS=$(getopt \ + -n $0 \ + -o 'h:p:t:s:f:' \ + -l 'host:,port:,tablet_id:,schema_hash:,file:,help' \ + -- "$@") + +if [ $? != 0 ] ; then + usage +fi + +eval set -- "$OPTS" + +HOSTNAME=127.0.0.1 +HTTPPORT=8040 +TABLET_ID= +SCHEMA_HASH= +FILENAME= +USE_FILE=false + +while true; do + case "$1" in + -h|--host) HOSTNAME=$2 ; shift 2 ;; + -p|--port) HTTPPORT=$2 ; shift 2 ;; + -f|--file) FILENAME=$2 ; USE_FILE=true ; shift 2 ;; + -t|--tablet_id) TABLET_ID=$2 ; shift 2 ;; + -s|--schema_hash) SCHEMA_HASH=$2 ; shift 2 ;; + --help) usage ; shift ;; + --) shift ; break ;; + *) echo "Internal error!" ; exit 1 ;; + esac +done + +restore_tablet() { + echo "start to restore tablet id:"$3", schema hash:"$4 + curl -X POST "http://$1:$2/api/restore_tablet?tablet_id=$3&schema_hash=$4" + echo -e "\n" +} + +if [ $USE_FILE = true ] ; then + lines=`cat $FILENAME` + for line in $lines + do + # split the comma-split line + # format: tablet_id,schema_hash + fields=(${line/,/ }) + TABLET_ID=${fields[0]} + SCHEMA_HASH=${fields[1]} + restore_tablet $HOSTNAME $HTTPPORT $TABLET_ID $SCHEMA_HASH + done +else + restore_tablet $HOSTNAME $HTTPPORT $TABLET_ID $SCHEMA_HASH +fi \ No newline at end of file From 49a8bfd2ba1352a78610a0c6b46b9d0c72f5fc09 Mon Sep 17 00:00:00 2001 From: huangkangping Date: Mon, 12 Aug 2019 15:38:59 +0800 Subject: [PATCH 2/4] fix tablet assign bug --- be/src/http/action/restore_tablet_action.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/be/src/http/action/restore_tablet_action.cpp b/be/src/http/action/restore_tablet_action.cpp index 1464f498c4c31a..51de91bd51b7d7 100644 --- a/be/src/http/action/restore_tablet_action.cpp +++ b/be/src/http/action/restore_tablet_action.cpp @@ -86,7 +86,7 @@ Status RestoreTabletAction::_handle(HttpRequest *req) { TabletSharedPtr tablet = StorageEngine::instance()->tablet_manager()->get_tablet(tablet_id, schema_hash); - if (tablet = nullptr) { + if (tablet != nullptr) { LOG(WARNING) << "find tablet. tablet_id=" << tablet_id << " schema_hash=" << schema_hash; return Status::InternalError("tablet already exists, can not restore."); } From 3ec1a312dbb07bfa93ac40856f4e6357e81b397b Mon Sep 17 00:00:00 2001 From: huangkangping Date: Mon, 12 Aug 2019 19:49:42 +0800 Subject: [PATCH 3/4] fix pr problem and add doc for restore tool --- be/src/http/action/restore_tablet_action.cpp | 20 +--- bin/restore_tablet_tool.sh | 84 --------------- .../operation/tablet-restore-tool.md | 64 +++++++++++ tools/restore_tablet_tool.sh | 101 ++++++++++++++++++ 4 files changed, 170 insertions(+), 99 deletions(-) delete mode 100644 bin/restore_tablet_tool.sh create mode 100644 docs/documentation/cn/administrator-guide/operation/tablet-restore-tool.md create mode 100644 tools/restore_tablet_tool.sh diff --git a/be/src/http/action/restore_tablet_action.cpp b/be/src/http/action/restore_tablet_action.cpp index 51de91bd51b7d7..e2be827d01b2b6 100644 --- a/be/src/http/action/restore_tablet_action.cpp +++ b/be/src/http/action/restore_tablet_action.cpp @@ -37,6 +37,7 @@ #include "olap/storage_engine.h" #include "olap/data_dir.h" #include "runtime/exec_env.h" +#include "gutil/strings/substitute.h" // for Substitute using boost::filesystem::path; @@ -121,7 +122,7 @@ Status RestoreTabletAction::_reload_tablet( << ", signature: " << tablet_id; // remove tablet data path in data path // path: /roo_path/data/shard/tablet_id - std::string tablet_path = shard_path + "/" + std::to_string(tablet_id) + "/" + std::to_string(schema_hash); + std::string tablet_path = strings::Substitute("$0/$1/$2", shard_path, tablet_id, schema_hash); LOG(INFO) << "remove tablet_path:" << tablet_path; Status s = FileUtils::remove_all(tablet_path); if (!s.ok()) { @@ -190,24 +191,13 @@ Status RestoreTabletAction::_restore(const std::string& key, int64_t tablet_id, Status RestoreTabletAction::_create_hard_link_recursive(const std::string& src, const std::string& dst) { std::vector files; - Status s = FileUtils::scan_dir(src, &files); - if (!s.ok()) { - LOG(WARNING) << "scan dir failed:" << src; - return s; - } + RETURN_IF_ERROR(FileUtils::scan_dir(src, &files)); for (auto& file : files) { std::string from = src + "/" + file; std::string to = dst + "/" + file; if (FileUtils::is_dir(from)) { - s = FileUtils::create_dir(to); - if (!s.ok()) { - LOG(WARNING) << "create path failed:" << to; - return s; - } - s = _create_hard_link_recursive(from, to); - if (!s.ok()) { - return s; - } + RETURN_IF_ERROR(FileUtils::create_dir(to)); + RETURN_IF_ERROR(_create_hard_link_recursive(from, to)); } else { int link_ret = link(from.c_str(), to.c_str()); if (link_ret != 0) { diff --git a/bin/restore_tablet_tool.sh b/bin/restore_tablet_tool.sh deleted file mode 100644 index b2cbb801e0c339..00000000000000 --- a/bin/restore_tablet_tool.sh +++ /dev/null @@ -1,84 +0,0 @@ -#!/usr/bin/env bash -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -# print usage -usage() { - echo " -Usage: $0 - Optional options: - -h | --host BE hostname - -p | --port BE http port - -t | --tablet_id tablet id to restore - -s | --schema_hash tablet related schema hash - -f | --file file with lines containing comma-split tablet id and schema hash - " - exit 1 -} - -OPTS=$(getopt \ - -n $0 \ - -o 'h:p:t:s:f:' \ - -l 'host:,port:,tablet_id:,schema_hash:,file:,help' \ - -- "$@") - -if [ $? != 0 ] ; then - usage -fi - -eval set -- "$OPTS" - -HOSTNAME=127.0.0.1 -HTTPPORT=8040 -TABLET_ID= -SCHEMA_HASH= -FILENAME= -USE_FILE=false - -while true; do - case "$1" in - -h|--host) HOSTNAME=$2 ; shift 2 ;; - -p|--port) HTTPPORT=$2 ; shift 2 ;; - -f|--file) FILENAME=$2 ; USE_FILE=true ; shift 2 ;; - -t|--tablet_id) TABLET_ID=$2 ; shift 2 ;; - -s|--schema_hash) SCHEMA_HASH=$2 ; shift 2 ;; - --help) usage ; shift ;; - --) shift ; break ;; - *) echo "Internal error!" ; exit 1 ;; - esac -done - -restore_tablet() { - echo "start to restore tablet id:"$3", schema hash:"$4 - curl -X POST "http://$1:$2/api/restore_tablet?tablet_id=$3&schema_hash=$4" - echo -e "\n" -} - -if [ $USE_FILE = true ] ; then - lines=`cat $FILENAME` - for line in $lines - do - # split the comma-split line - # format: tablet_id,schema_hash - fields=(${line/,/ }) - TABLET_ID=${fields[0]} - SCHEMA_HASH=${fields[1]} - restore_tablet $HOSTNAME $HTTPPORT $TABLET_ID $SCHEMA_HASH - done -else - restore_tablet $HOSTNAME $HTTPPORT $TABLET_ID $SCHEMA_HASH -fi \ No newline at end of file diff --git a/docs/documentation/cn/administrator-guide/operation/tablet-restore-tool.md b/docs/documentation/cn/administrator-guide/operation/tablet-restore-tool.md new file mode 100644 index 00000000000000..0aae09f423312c --- /dev/null +++ b/docs/documentation/cn/administrator-guide/operation/tablet-restore-tool.md @@ -0,0 +1,64 @@ +# BE Tablet数据恢复工具 + +## 背景 + +用户在使用Doris的过程中,可能会发生因为一些误操作或者线上bug,导致一些有效的tablet被删除(包括元数据和数据)。为了防止在这些异常情况出现数据丢失,Doris提供了回收站机制,来保护用户数据。用户删除的tablet数据不会被直接删除,会被放在回收站中存储一段时间,在一段时间之后会有定时清理机制将过期的数据删除。回收站中的数据包括:tablet的data文件(.dat),tablet的索引文件(.idx)和tablet的元数据文件(.hdr)。数据将会存放在如下格式的路径: + +/root_path/trash/time_label/tablet_id/schema_hash/ + +其中, root path是用户配置的一块盘上be存储的根目录; +trash:是回收站的目录 +time_label: 时间标签,为了回收站中数据目录的唯一性,同时记录数据时间,使用时间标签作为子目录 + +当用户发现线上的数据被误删除,需要从回收站中恢复被删除的tablet,需要用到这个tablet数据恢复功能。BE提供http接口和restore_tablet_tool.sh脚本实现这个功能,支持单tablet操作(single mode)和批量操作模式(batch mode)。 +在single mode下,支持单个tablet的数据恢复。 +在batch mode下,支持批量tablet的数据恢复。 + +## 操作 + +### single mode + +#### http请求方式 + +BE中提供单个tablet数据恢复的http接口,接口如下: + +``` +curl -X POST "http://localhost:8040/api/restore_tablet?tablet_id=11111\&schema_hash=12345" +``` + + +成功的结果如下: +``` +{"status": "Success", "msg": "OK"} +``` + +失败的话,会返回相应的失败原因,一种可能的结果如下: +``` +{"status": "Failed", "msg": "create link path failed"} +``` + +#### 脚本方式 + +restore_tablet_tool.sh可用来实现单tablet数据恢复的功能。 + +``` +sh tools/restore_tablet_tool.sh -b "http://127.0.0.1:8040" -t 12345 -s 11111 +sh tools/restore_tablet_tool.sh --backend "http://127.0.0.1:8040" --tablet_id 12345 --schema_hash 11111 +``` + +### batch mode + +批量恢复模式用于实现恢复多个tablet数据的功能。使用的时候需要预先将恢复的tablet id和schema hash按照逗号分隔的格式放在一个文件中,一个tablet一行。 +格式如下: +``` +12345,11111 +12346,11111 +12347,11111 +``` + +然后如下的命令进行恢复(假设文件名为:tablets.txt): + +``` +sh restore_tablet_tool.sh -b "http://127.0.0.1:8040" -f tablets.txt +sh restore_tablet_tool.sh --backend "http://127.0.0.1:8040" --file tablets.txt +``` diff --git a/tools/restore_tablet_tool.sh b/tools/restore_tablet_tool.sh new file mode 100644 index 00000000000000..17c1e2ca4febec --- /dev/null +++ b/tools/restore_tablet_tool.sh @@ -0,0 +1,101 @@ +#!/usr/bin/env bash +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# print usage +usage() { + echo " +Description: + This script is used to restore the tablets from trash. It supports single mode + and batch mode. + In single mode, it will restore just one tablet. + In batch mode, it will restore all the tablets specified in file. The content + of the file is comma-split tablet id and schema hash, like the following: + 12345,11111 + 12346,11111 + 12347,11111 + +Usage: $0 + Optional options: + -h | --help print help info + -b | --backend backend http service, default: http://127.0.0.1/8040 + -t | --tablet_id tablet id to restore + -s | --schema_hash tablet related schema hash + -f | --file file with lines containing comma-split tablet id and schema hash + +Examples: + batch mode: + sh restore_tablet_tool.sh -b "http://127.0.0.1:8040" -f tablets.txt + sh restore_tablet_tool.sh --backend "http://127.0.0.1:8040" --file tablets.txt + + single mode: + sh restore_tablet_tool.sh -b "http://127.0.0.1:8040" -t 12345 -s 11111 + sh restore_tablet_tool.sh --backend "http://127.0.0.1:8040" --tablet_id 12345 --schema_hash 11111 + " + exit 1 +} + +OPTS=$(getopt \ + -n $0 \ + -o 'b:t:s:f:' \ + -l 'server:,tablet_id:,schema_hash:,file:,help' \ + -- "$@") + +if [ $? != 0 ] ; then + usage +fi + +eval set -- "$OPTS" + +SERVER="http://127.0.0.1/8040" +TABLET_ID= +SCHEMA_HASH= +FILENAME= +BATCH_MODE=false + +while true; do + case "$1" in + -b|--backend) SERVER=$2 ; shift 2 ;; + -f|--file) FILENAME=$2 ; BATCH_MODE=true ; shift 2 ;; + -t|--tablet_id) TABLET_ID=$2 ; shift 2 ;; + -s|--schema_hash) SCHEMA_HASH=$2 ; shift 2 ;; + -h|--help) usage ; shift ;; + --) shift ; break ;; + *) echo "Internal error!" ; exit 1 ;; + esac +done + +restore_tablet() { + echo "start to restore tablet id:"$2", schema hash:"$3 + curl -X POST "$1/api/restore_tablet?tablet_id=$2&schema_hash=$3" + echo -e "\n" +} + +if [ $BATCH_MODE = true ] ; then + lines=`cat $FILENAME` + for line in $lines + do + # split the comma-split line + # format: tablet_id,schema_hash + fields=(${line/,/ }) + TABLET_ID=${fields[0]} + SCHEMA_HASH=${fields[1]} + restore_tablet $SERVER $TABLET_ID $SCHEMA_HASH + done +else + restore_tablet $SERVER $TABLET_ID $SCHEMA_HASH +fi From 92c959a4b5aefba4ed2d00266ad69a477af02b2a Mon Sep 17 00:00:00 2001 From: huangkangping Date: Mon, 12 Aug 2019 20:18:03 +0800 Subject: [PATCH 4/4] fix bug --- be/src/http/action/restore_tablet_action.cpp | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/be/src/http/action/restore_tablet_action.cpp b/be/src/http/action/restore_tablet_action.cpp index e2be827d01b2b6..6d0e1a740c9544 100644 --- a/be/src/http/action/restore_tablet_action.cpp +++ b/be/src/http/action/restore_tablet_action.cpp @@ -179,10 +179,8 @@ Status RestoreTabletAction::_restore(const std::string& key, int64_t tablet_id, // create hard link for files in /root_path/data/shard/tablet_id/schema_hash s = _create_hard_link_recursive(latest_tablet_path, restore_schema_hash_path); if (!s.ok()) { - s = FileUtils::remove_all(restore_schema_hash_path); - if (!s.ok()) { - LOG(WARNING) << "remove invalid tablet path:" << restore_schema_hash_path << " failed"; - } + RETURN_IF_ERROR(FileUtils::remove_all(restore_schema_hash_path)); + return s; } std::string restore_shard_path = store->get_absolute_shard_path(std::to_string(tablet_meta.shard_id())); Status status = _reload_tablet(key, restore_shard_path, tablet_id, schema_hash);