Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
64 changes: 30 additions & 34 deletions be/src/http/action/restore_tablet_action.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
#include "olap/storage_engine.h"
#include "olap/data_dir.h"
#include "runtime/exec_env.h"
#include "gutil/strings/substitute.h" // for Substitute

using boost::filesystem::path;

Expand Down Expand Up @@ -86,11 +87,11 @@ Status RestoreTabletAction::_handle(HttpRequest *req) {

TabletSharedPtr tablet =
StorageEngine::instance()->tablet_manager()->get_tablet(tablet_id, schema_hash);
if (tablet.get() != nullptr) {
if (tablet != nullptr) {
LOG(WARNING) << "find tablet. tablet_id=" << tablet_id << " schema_hash=" << schema_hash;
return Status::InternalError("tablet already exists, can not restore.");
}
std::string key = std::to_string(tablet_id) + "_" + std::to_string(schema_hash);
std::string key = tablet_id_str + "_" + schema_hash_str;
{
// check tablet_id + schema_hash already is restoring
std::lock_guard<std::mutex> l(_tablet_restore_lock);
Expand Down Expand Up @@ -121,36 +122,27 @@ Status RestoreTabletAction::_reload_tablet(
<< ", signature: " << tablet_id;
// remove tablet data path in data path
// path: /roo_path/data/shard/tablet_id
std::string tablet_path = shard_path + "/" + std::to_string(tablet_id);
std::string tablet_path = strings::Substitute("$0/$1/$2", shard_path, tablet_id, schema_hash);
LOG(INFO) << "remove tablet_path:" << tablet_path;
Status s = FileUtils::remove_all(tablet_path);
if (!s.ok()) {
LOG(WARNING) << "remove invalid tablet schema hash path:" << tablet_path << " failed";
}
return Status::InternalError("command executor load header failed");
} else {
LOG(INFO) << "load header success. status: " << res
<< ", signature: " << tablet_id;
// remove tablet data path in trash
// path: /root_path/trash/time_label, because only one tablet path under time_label
std::string trash_tablet_schema_hash_dir = "";

{
// get tablet path in trash
std::lock_guard<std::mutex> l(_tablet_restore_lock);
trash_tablet_schema_hash_dir = _tablet_path_map[key];
}
LOG(INFO) << "load header success. status: " << res
<< ", signature: " << tablet_id << ", from trash path:" << trash_tablet_schema_hash_dir
<< " to shard path:" << shard_path;

boost::filesystem::path trash_tablet_schema_hash_path(trash_tablet_schema_hash_dir);
boost::filesystem::path time_label_path = trash_tablet_schema_hash_path.parent_path().parent_path();
LOG(INFO) << "remove time label path:" << time_label_path.string();
Status s = FileUtils::remove_all(time_label_path.string());
if (!s.ok()) {
LOG(WARNING) << "remove time label path:" << time_label_path.string() << " failed";
}
return Status::OK();
}
}
}

Status RestoreTabletAction::_restore(const std::string& key, int64_t tablet_id, int32_t schema_hash) {
// get latest tablet path in trash
Expand Down Expand Up @@ -185,31 +177,35 @@ Status RestoreTabletAction::_restore(const std::string& key, int64_t tablet_id,
return s;
}
// create hard link for files in /root_path/data/shard/tablet_id/schema_hash
std::vector<std::string> files;
s = FileUtils::scan_dir(latest_tablet_path, &files);
s = _create_hard_link_recursive(latest_tablet_path, restore_schema_hash_path);
if (!s.ok()) {
LOG(WARNING) << "scan dir failed:" << latest_tablet_path;
RETURN_IF_ERROR(FileUtils::remove_all(restore_schema_hash_path));
return s;
}
std::string restore_shard_path = store->get_absolute_shard_path(std::to_string(tablet_meta.shard_id()));
Status status = _reload_tablet(key, restore_shard_path, tablet_id, schema_hash);
return status;
}

Status RestoreTabletAction::_create_hard_link_recursive(const std::string& src, const std::string& dst) {
std::vector<std::string> files;
RETURN_IF_ERROR(FileUtils::scan_dir(src, &files));
for (auto& file : files) {
std::string from = latest_tablet_path + "/" + file;
std::string to = restore_schema_hash_path + "/" + file;
int link_ret = link(from.c_str(), to.c_str());
if (link_ret != 0) {
LOG(WARNING) << "link from:" << from
<< " to:" << to << " failed, link ret:" << link_ret;
std::string restore_tablet_path = store->get_absolute_tablet_path(&tablet_meta, false);
LOG(WARNING) << "remove tablet_path:" << restore_tablet_path;
Status s = FileUtils::remove_all(restore_tablet_path);
if (!s.ok()) {
LOG(WARNING) << "remove invalid tablet path:" << restore_tablet_path << " failed";
std::string from = src + "/" + file;
std::string to = dst + "/" + file;
if (FileUtils::is_dir(from)) {
RETURN_IF_ERROR(FileUtils::create_dir(to));
RETURN_IF_ERROR(_create_hard_link_recursive(from, to));
} else {
int link_ret = link(from.c_str(), to.c_str());
if (link_ret != 0) {
LOG(WARNING) << "link from:" << from
<< " to:" << to << " failed, link ret:" << link_ret;
return Status::InternalError("create link path failed");
}
return Status::InternalError("create link path failed");
}
}
std::string restore_shard_path = store->get_absolute_shard_path(std::to_string(tablet_meta.shard_id()));
Status status = _reload_tablet(key, restore_shard_path, tablet_id, schema_hash);
return status;
return Status::OK();
}

bool RestoreTabletAction::_get_latest_tablet_path_from_trash(
Expand Down
2 changes: 2 additions & 0 deletions be/src/http/action/restore_tablet_action.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,8 @@ class RestoreTabletAction : public HttpHandler {

void _clear_key(const std::string& key);

Status _create_hard_link_recursive(const std::string& src, const std::string& dst);

private:
ExecEnv* _exec_env;
std::mutex _tablet_restore_lock;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
# BE Tablet数据恢复工具

## 背景

用户在使用Doris的过程中,可能会发生因为一些误操作或者线上bug,导致一些有效的tablet被删除(包括元数据和数据)。为了防止在这些异常情况出现数据丢失,Doris提供了回收站机制,来保护用户数据。用户删除的tablet数据不会被直接删除,会被放在回收站中存储一段时间,在一段时间之后会有定时清理机制将过期的数据删除。回收站中的数据包括:tablet的data文件(.dat),tablet的索引文件(.idx)和tablet的元数据文件(.hdr)。数据将会存放在如下格式的路径:

/root_path/trash/time_label/tablet_id/schema_hash/

其中, root path是用户配置的一块盘上be存储的根目录;
trash:是回收站的目录
time_label: 时间标签,为了回收站中数据目录的唯一性,同时记录数据时间,使用时间标签作为子目录

当用户发现线上的数据被误删除,需要从回收站中恢复被删除的tablet,需要用到这个tablet数据恢复功能。BE提供http接口和restore_tablet_tool.sh脚本实现这个功能,支持单tablet操作(single mode)和批量操作模式(batch mode)。
在single mode下,支持单个tablet的数据恢复。
在batch mode下,支持批量tablet的数据恢复。

## 操作

### single mode

#### http请求方式

BE中提供单个tablet数据恢复的http接口,接口如下:

```
curl -X POST "http://localhost:8040/api/restore_tablet?tablet_id=11111\&schema_hash=12345"
```


成功的结果如下:
```
{"status": "Success", "msg": "OK"}
```

失败的话,会返回相应的失败原因,一种可能的结果如下:
```
{"status": "Failed", "msg": "create link path failed"}
```

#### 脚本方式

restore_tablet_tool.sh可用来实现单tablet数据恢复的功能。

```
sh tools/restore_tablet_tool.sh -b "http://127.0.0.1:8040" -t 12345 -s 11111
sh tools/restore_tablet_tool.sh --backend "http://127.0.0.1:8040" --tablet_id 12345 --schema_hash 11111
```

### batch mode

批量恢复模式用于实现恢复多个tablet数据的功能。使用的时候需要预先将恢复的tablet id和schema hash按照逗号分隔的格式放在一个文件中,一个tablet一行。
格式如下:
```
12345,11111
12346,11111
12347,11111
```

然后如下的命令进行恢复(假设文件名为:tablets.txt):

```
sh restore_tablet_tool.sh -b "http://127.0.0.1:8040" -f tablets.txt
sh restore_tablet_tool.sh --backend "http://127.0.0.1:8040" --file tablets.txt
```
101 changes: 101 additions & 0 deletions tools/restore_tablet_tool.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
#!/usr/bin/env bash
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

# print usage
usage() {
echo "
Description:
This script is used to restore the tablets from trash. It supports single mode
and batch mode.
In single mode, it will restore just one tablet.
In batch mode, it will restore all the tablets specified in file. The content
of the file is comma-split tablet id and schema hash, like the following:
12345,11111
12346,11111
12347,11111

Usage: $0 <options>
Optional options:
-h | --help print help info
-b | --backend backend http service, default: http://127.0.0.1/8040
-t | --tablet_id tablet id to restore
-s | --schema_hash tablet related schema hash
-f | --file file with lines containing comma-split tablet id and schema hash

Examples:
batch mode:
sh restore_tablet_tool.sh -b "http://127.0.0.1:8040" -f tablets.txt
sh restore_tablet_tool.sh --backend "http://127.0.0.1:8040" --file tablets.txt

single mode:
sh restore_tablet_tool.sh -b "http://127.0.0.1:8040" -t 12345 -s 11111
sh restore_tablet_tool.sh --backend "http://127.0.0.1:8040" --tablet_id 12345 --schema_hash 11111
"
exit 1
}

OPTS=$(getopt \
-n $0 \
-o 'b:t:s:f:' \
-l 'server:,tablet_id:,schema_hash:,file:,help' \
-- "$@")

if [ $? != 0 ] ; then
usage
fi

eval set -- "$OPTS"

SERVER="http://127.0.0.1/8040"
TABLET_ID=
SCHEMA_HASH=
FILENAME=
BATCH_MODE=false

while true; do
case "$1" in
-b|--backend) SERVER=$2 ; shift 2 ;;
-f|--file) FILENAME=$2 ; BATCH_MODE=true ; shift 2 ;;
-t|--tablet_id) TABLET_ID=$2 ; shift 2 ;;
-s|--schema_hash) SCHEMA_HASH=$2 ; shift 2 ;;
-h|--help) usage ; shift ;;
--) shift ; break ;;
*) echo "Internal error!" ; exit 1 ;;
esac
done

restore_tablet() {
echo "start to restore tablet id:"$2", schema hash:"$3
curl -X POST "$1/api/restore_tablet?tablet_id=$2&schema_hash=$3"
echo -e "\n"
}

if [ $BATCH_MODE = true ] ; then
lines=`cat $FILENAME`
for line in $lines
do
# split the comma-split line
# format: tablet_id,schema_hash
fields=(${line/,/ })
TABLET_ID=${fields[0]}
SCHEMA_HASH=${fields[1]}
restore_tablet $SERVER $TABLET_ID $SCHEMA_HASH
done
else
restore_tablet $SERVER $TABLET_ID $SCHEMA_HASH
fi