Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
78 changes: 1 addition & 77 deletions be/src/olap/data_dir.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -69,29 +69,14 @@ DataDir::DataDir(const std::string& path, int64_t capacity_bytes,
_cluster_id(-1),
_to_be_deleted(false),
_current_shard(0),
_test_file_read_buf(nullptr),
_test_file_write_buf(nullptr),
_meta(nullptr) {}

DataDir::~DataDir() {
free(_test_file_read_buf);
free(_test_file_write_buf);
delete _id_generator;
delete _meta;
}

Status DataDir::init() {
_rand_seed = static_cast<uint32_t>(time(NULL));
if (posix_memalign((void**)&_test_file_write_buf, DIRECT_IO_ALIGNMENT, TEST_FILE_BUF_SIZE) !=
0) {
LOG(WARNING) << "fail to allocate memory. size=" << TEST_FILE_BUF_SIZE;
return Status::InternalError("No memory");
}
if (posix_memalign((void**)&_test_file_read_buf, DIRECT_IO_ALIGNMENT, TEST_FILE_BUF_SIZE) !=
0) {
LOG(WARNING) << "fail to allocate memory. size=" << TEST_FILE_BUF_SIZE;
return Status::InternalError("No memory");
}
if (!FileUtils::check_exist(_path)) {
LOG(WARNING) << "opendir failed, path=" << _path;
return Status::InternalError("opendir failed");
Expand Down Expand Up @@ -315,68 +300,7 @@ void DataDir::health_check() {

OLAPStatus DataDir::_read_and_write_test_file() {
std::string test_file = _path + kTestFilePath;

if (access(test_file.c_str(), F_OK) == 0) {
if (remove(test_file.c_str()) != 0) {
char errmsg[64];
LOG(WARNING) << "fail to delete test file. "
<< "path=" << test_file << ", errno=" << errno
<< ", err=" << strerror_r(errno, errmsg, 64);
return OLAP_ERR_IO_ERROR;
}
} else {
if (errno != ENOENT) {
char errmsg[64];
LOG(WARNING) << "fail to access test file. "
<< "path=" << test_file << ", errno=" << errno
<< ", err=" << strerror_r(errno, errmsg, 64);
return OLAP_ERR_IO_ERROR;
}
}

OLAPStatus res = OLAP_SUCCESS;
FileHandler file_handler;
if ((res = file_handler.open_with_mode(test_file.c_str(), O_RDWR | O_CREAT | O_DIRECT,
S_IRUSR | S_IWUSR)) != OLAP_SUCCESS) {
LOG(WARNING) << "fail to create test file. path=" << test_file;
return res;
}

for (size_t i = 0; i < TEST_FILE_BUF_SIZE; ++i) {
int32_t tmp_value = rand_r(&_rand_seed);
_test_file_write_buf[i] = static_cast<char>(tmp_value);
}

if ((res = file_handler.pwrite(_test_file_write_buf, TEST_FILE_BUF_SIZE, SEEK_SET)) !=
OLAP_SUCCESS) {
LOG(WARNING) << "fail to write test file. [file_name=" << test_file << "]";
return res;
}

if ((res = file_handler.pread(_test_file_read_buf, TEST_FILE_BUF_SIZE, SEEK_SET)) !=
OLAP_SUCCESS) {
LOG(WARNING) << "fail to read test file. [file_name=" << test_file << "]";
return res;
}

if (memcmp(_test_file_write_buf, _test_file_read_buf, TEST_FILE_BUF_SIZE) != 0) {
OLAP_LOG_WARNING("the test file write_buf and read_buf not equal.");
return OLAP_ERR_TEST_FILE_ERROR;
}

if ((res = file_handler.close()) != OLAP_SUCCESS) {
LOG(WARNING) << "fail to close test file. [file_name=" << test_file << "]";
return res;
}

if (remove(test_file.c_str()) != 0) {
char errmsg[64];
VLOG(3) << "fail to delete test file. [err='" << strerror_r(errno, errmsg, 64) << "' path='"
<< test_file << "']";
return OLAP_ERR_IO_ERROR;
}

return res;
return read_write_test_file(test_file);;
}

OLAPStatus DataDir::get_shard(uint64_t* shard) {
Expand Down
6 changes: 0 additions & 6 deletions be/src/olap/data_dir.h
Original file line number Diff line number Diff line change
Expand Up @@ -159,8 +159,6 @@ class DataDir {
TStorageMedium::type _storage_medium;
bool _is_used;

uint32_t _rand_seed;

std::string _file_system;
TabletManager* _tablet_manager;
TxnManager* _txn_manager;
Expand All @@ -173,11 +171,7 @@ class DataDir {
uint64_t _current_shard;
std::set<TabletInfo> _tablet_set;

static const size_t TEST_FILE_BUF_SIZE = 4096;
static const size_t DIRECT_IO_ALIGNMENT = 512;
static const uint32_t MAX_SHARD_NUM = 1024;
char* _test_file_read_buf;
char* _test_file_write_buf;

OlapMeta* _meta = nullptr;
RowsetIdGenerator* _id_generator = nullptr;
Expand Down
92 changes: 92 additions & 0 deletions be/src/olap/utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,14 +22,19 @@
#include <sys/stat.h>
#include <time.h>

#include <stdlib.h>
#include <unistd.h>
#include <string>
#include <cstring>
#include <cstdint>
#include <vector>

#include <boost/filesystem.hpp>
#include <boost/regex.hpp>
#include <errno.h>
#include <lz4/lz4.h>
#include "util/file_utils.h"
#include "olap/file_helper.h"

#ifdef DORIS_WITH_LZO
#include <lzo/lzo1c.h>
Expand All @@ -50,6 +55,7 @@
using std::string;
using std::set;
using std::vector;
using std::unique_ptr;

namespace doris {

Expand Down Expand Up @@ -973,6 +979,92 @@ OLAPStatus copy_file(const string& src, const string& dest) {

return res;
}
OLAPStatus read_write_test_file(const string& test_file_path) {
if (access(test_file_path.c_str(), F_OK) == 0) {
if (remove(test_file_path.c_str()) != 0) {
char errmsg[64];
LOG(WARNING) << "fail to delete test file. "
<< "path=" << test_file_path
<< ", errno=" << errno << ", err=" << strerror_r(errno, errmsg, 64);
return OLAP_ERR_IO_ERROR;
}
} else {
if (errno != ENOENT) {
char errmsg[64];
LOG(WARNING) << "fail to access test file. "
<< "path=" << test_file_path
<< ", errno=" << errno << ", err=" << strerror_r(errno, errmsg, 64);
return OLAP_ERR_IO_ERROR;
}
}
OLAPStatus res = OLAP_SUCCESS;
FileHandler file_handler;
if ((res = file_handler.open_with_mode(test_file_path.c_str(),
O_RDWR | O_CREAT | O_DIRECT,
S_IRUSR | S_IWUSR)) != OLAP_SUCCESS) {
LOG(WARNING) << "fail to create test file. path=" << test_file_path;
return res;
}
const size_t TEST_FILE_BUF_SIZE = 4096;
const size_t DIRECT_IO_ALIGNMENT = 512;
char *write_test_buff = nullptr;
char *read_test_buff = nullptr;
if (posix_memalign((void**) &write_test_buff, DIRECT_IO_ALIGNMENT, TEST_FILE_BUF_SIZE)!= 0) {
LOG(WARNING) << "fail to allocate write buffer memory. size=" << TEST_FILE_BUF_SIZE;
return OLAP_ERR_MALLOC_ERROR;
}
unique_ptr<char, decltype(&std::free)> write_buff (write_test_buff, &std::free);
if (posix_memalign((void**) &read_test_buff, DIRECT_IO_ALIGNMENT, TEST_FILE_BUF_SIZE)!= 0) {
LOG(WARNING) << "fail to allocate read buffer memory. size=" << TEST_FILE_BUF_SIZE;
return OLAP_ERR_MALLOC_ERROR;
}
unique_ptr<char, decltype(&std::free)> read_buff (read_test_buff, &std::free);
// generate random numbers
uint32_t rand_seed = static_cast<uint32_t>(time(NULL));
for (size_t i = 0; i < TEST_FILE_BUF_SIZE; ++i) {
int32_t tmp_value = rand_r(&rand_seed);
write_test_buff[i] = static_cast<char>(tmp_value);
}
if ((res = file_handler.pwrite(write_buff.get(), TEST_FILE_BUF_SIZE, SEEK_SET)) != OLAP_SUCCESS) {
LOG(WARNING) << "fail to write test file. [file_name=" << test_file_path << "]";
return res;
}
if ((res = file_handler.pread(read_buff.get(), TEST_FILE_BUF_SIZE, SEEK_SET)) != OLAP_SUCCESS) {
LOG(WARNING) << "fail to read test file. [file_name=" << test_file_path << "]";
return res;
}
if (memcmp(write_buff.get(), read_buff.get(), TEST_FILE_BUF_SIZE) != 0) {
LOG(WARNING) << "the test file write_buf and read_buf not equal, [file_name = " << test_file_path << "]";
return OLAP_ERR_TEST_FILE_ERROR;
}
if ((res = file_handler.close()) != OLAP_SUCCESS) {
LOG(WARNING) << "fail to close test file. [file_name=" << test_file_path << "]";
return res;
}
if (remove(test_file_path.c_str()) != 0) {
char errmsg[64];
VLOG(3) << "fail to delete test file. [err='" << strerror_r(errno, errmsg, 64)
<< "' path='" << test_file_path << "']";
return OLAP_ERR_IO_ERROR;
}
return res;
}

bool check_datapath_rw(const string& path) {
if (!FileUtils::check_exist(path))
return false;
string file_path = path + "/.read_write_test_file";
try {
OLAPStatus res = read_write_test_file(file_path);
return res == OLAP_SUCCESS;
} catch (...) {
// do nothing
}
LOG(WARNING) << "error when try to read and write temp file under the data path and return false. [path=" << path << "]";
return false;
}



OLAPStatus copy_dir(const string &src_dir, const string &dst_dir) {
boost::filesystem::path src_path(src_dir.c_str());
Expand Down
4 changes: 4 additions & 0 deletions be/src/olap/utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -227,6 +227,10 @@ OLAPStatus copy_file(const std::string& src, const std::string& dest);

OLAPStatus copy_dir(const std::string &src_dir, const std::string &dst_dir);

bool check_datapath_rw(const std::string& path);

OLAPStatus read_write_test_file(const std::string& test_file_path);

//转换两个list
template<typename T1, typename T2>
void static_cast_assign_vector(std::vector<T1>* v1, const std::vector<T2>& v2) {
Expand Down
19 changes: 19 additions & 0 deletions be/src/service/doris_main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,25 @@ int main(int argc, char** argv) {
LOG(FATAL) << "parse config storage path failed, path=" << doris::config::storage_root_path;
exit(-1);
}
auto it = paths.begin();
for (;it != paths.end();) {
if (!doris::check_datapath_rw(it->path)) {
if (doris::config::ignore_broken_disk) {
LOG(WARNING) << "read write test file failed, path=" << it->path;
it = paths.erase(it);
} else {
LOG(FATAL) << "read write test file failed, path=" << it->path;
exit(-1);
}
} else {
++it;
}
}

if (paths.empty()) {
LOG(FATAL) << "All disks are broken, exit.";
exit(-1);
}

// initilize libcurl here to avoid concurrent initialization
auto curl_ret = curl_global_init(CURL_GLOBAL_ALL);
Expand Down
13 changes: 13 additions & 0 deletions docs/en/administrator-guide/config/be_config.md
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,19 @@ Since this is a brpc configuration, users can also modify this parameter directl
### `ignore_broken_disk`

### `ignore_load_tablet_failure`
When BE starts, it will check all the paths under the `storage_root_path` in configuration.

- `ignore_broken_disk=true`

If the path does not exist or the file under the path cannot be read or written (broken disk), it will be ignored. If there are any other available paths, the startup will not be interrupted.

- `ignore_broken_disk=false`

If the path does not exist or the file under the path cannot be read or written (bad disk), the startup will fail and exit.

The default value is `false`.

### inc_rowset_expired_sec

* Type: boolean
* Description: Whether to continue to start be when load tablet from header failed.
Expand Down
12 changes: 12 additions & 0 deletions docs/zh-CN/administrator-guide/config/be_config.md
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,18 @@ under the License.

### `ignore_broken_disk`

​ 当BE启动时,会检查``storage_root_path`` 配置下的所有路径。

- `ignore_broken_disk=true`

如果路径不存在或路径下无法进行读写文件(坏盘),将忽略此路径,如果有其他可用路径则不中断启动。

- `ignore_broken_disk=false`

如果路径不存在或路径下无法进行读写文件(坏盘),将中断启动失败退出。

​ 默认为false

### `inc_rowset_expired_sec`

### `index_stream_cache_capacity`
Expand Down