Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -70,8 +70,8 @@ apachedoris/doris-dev build-env-1.3 c9665fbee395 5 days ago
> |---|---|---|
> | apachedoris/doris-dev:build-env | before [ff0dd0d](https://github.com/apache/incubator-doris/commit/ff0dd0d2daa588f18b6db56f947e813a56d8ec81) | 0.8.x, 0.9.x |
> | apachedoris/doris-dev:build-env-1.1 | [ff0dd0d](https://github.com/apache/incubator-doris/commit/ff0dd0d2daa588f18b6db56f947e813a56d8ec81) or later | 0.10.x or 0.11.x |
> | apachedoris/doris-dev:build-env-1.2 | [1648226](https://github.com/apache/incubator-doris/commit/1648226927c5b4e33f33ce2e12bf0e06369b7f6e) or later | 0.12.x or 0.13 |
> | apachedoris/doris-dev:build-env-1.3 | [ad67dd3](https://github.com/apache/incubator-doris/commit/ad67dd34a04c1ca960cff38e5b335b30fc7d559f) or later | 0.14.x or later |
> | apache/incubator-doris:build-env-1.2 | [4ef5a8c](https://github.com/apache/incubator-doris/commit/4ef5a8c8560351d7fff7ff8fd51c4c7a75e006a8) | 0.12.x - 0.14.0 |
> | apache/incubator-doris:build-env-1.3 | [ad67dd3](https://github.com/apache/incubator-doris/commit/ad67dd34a04c1ca960cff38e5b335b30fc7d559f) | later version |



Expand Down
57 changes: 42 additions & 15 deletions be/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -294,17 +294,20 @@ set_target_properties(aws-s2n PROPERTIES IMPORTED_LOCATION ${THIRDPARTY_DIR}/lib
add_library(minzip STATIC IMPORTED)
set_target_properties(minzip PROPERTIES IMPORTED_LOCATION ${THIRDPARTY_DIR}/lib64/libminizip.a)

add_library(hdfs3 STATIC IMPORTED)
set_target_properties(hdfs3 PROPERTIES IMPORTED_LOCATION ${THIRDPARTY_DIR}/lib64/libhdfs3.a)
if (ARCH_AMD64)
# libhdfs3 only support x86 or amd64
add_library(hdfs3 STATIC IMPORTED)
set_target_properties(hdfs3 PROPERTIES IMPORTED_LOCATION ${THIRDPARTY_DIR}/lib64/libhdfs3.a)

add_library(gsasl STATIC IMPORTED)
set_target_properties(gsasl PROPERTIES IMPORTED_LOCATION ${THIRDPARTY_DIR}/lib64/libgsasl.a)
add_library(gsasl STATIC IMPORTED)
set_target_properties(gsasl PROPERTIES IMPORTED_LOCATION ${THIRDPARTY_DIR}/lib64/libgsasl.a)

add_library(xml2 STATIC IMPORTED)
set_target_properties(xml2 PROPERTIES IMPORTED_LOCATION ${THIRDPARTY_DIR}/lib64/libxml2.a)
add_library(xml2 STATIC IMPORTED)
set_target_properties(xml2 PROPERTIES IMPORTED_LOCATION ${THIRDPARTY_DIR}/lib64/libxml2.a)

add_library(lzma STATIC IMPORTED)
set_target_properties(lzma PROPERTIES IMPORTED_LOCATION ${THIRDPARTY_DIR}/lib64/liblzma.a)
add_library(lzma STATIC IMPORTED)
set_target_properties(lzma PROPERTIES IMPORTED_LOCATION ${THIRDPARTY_DIR}/lib64/liblzma.a)
endif()

find_program(THRIFT_COMPILER thrift ${CMAKE_SOURCE_DIR}/bin)

Expand Down Expand Up @@ -434,7 +437,7 @@ set(WL_END_GROUP "-Wl,--end-group")

set(AWS_LIBS aws-sdk-s3 aws-sdk-core aws-checksums aws-c-io aws-c-event-stream aws-c-common aws-c-cal aws-s2n)

# Set Palo libraries
# Set Doris libraries
set(DORIS_LINK_LIBS
${WL_START_GROUP}
Agent
Expand All @@ -459,10 +462,10 @@ set(DORIS_LINK_LIBS
${WL_END_GROUP}
)

# Set thirdparty libraries
set(DORIS_DEPENDENCIES
${DORIS_DEPENDENCIES}
${WL_START_GROUP}
# COMMON_THIRDPARTY are thirdparty dependencies that can run on all platform
# When adding new dependencies, If you don’t know if it can run on all platforms,
# add it here first.
set(COMMON_THIRDPARTY
rocksdb
librdkafka_cpp
librdkafka
Expand Down Expand Up @@ -506,14 +509,35 @@ set(DORIS_DEPENDENCIES
odbc
cctz
minzip
${AWS_LIBS}
)

# thirdparties dependescies that can only run on X86 platform
set(X86_DEPENDENCIES
${COMMON_THIRDPARTY}
hdfs3
gsasl
xml2
lzma
${AWS_LIBS}
${WL_END_GROUP}
)

if(ARCH_AARCH64)
# Set thirdparty libraries
set(DORIS_DEPENDENCIES
${DORIS_DEPENDENCIES}
${WL_START_GROUP}
${COMMON_THIRDPARTY}
${WL_END_GROUP}
)
else()
set(DORIS_DEPENDENCIES
${DORIS_DEPENDENCIES}
${WL_START_GROUP}
${X86_DEPENDENCIES}
${WL_END_GROUP}
)
endif()

if(WITH_LZO)
set(DORIS_DEPENDENCIES ${DORIS_DEPENDENCIES}
lzo
Expand All @@ -526,12 +550,15 @@ if (WITH_MYSQL)
)
endif()

message(STATUS "DORIS_DEPENDENCIES is ${DORIS_DEPENDENCIES}")

# Add all external dependencies. They should come after the palo libs.
# static link gcc's lib
set(DORIS_LINK_LIBS ${DORIS_LINK_LIBS}
${DORIS_DEPENDENCIES}
-static-libstdc++
-static-libgcc
-lstdc++fs
)

if ("${CMAKE_BUILD_TYPE}" STREQUAL "BCC")
Expand Down
8 changes: 7 additions & 1 deletion be/src/exec/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,6 @@ set(EXEC_FILES
hash_join_node.cpp
hash_join_node_ir.cpp
hash_table.cpp
hdfs_file_reader.cpp
local_file_reader.cpp
merge_node.cpp
merge_join_node.cpp
Expand Down Expand Up @@ -107,6 +106,13 @@ set(EXEC_FILES
s3_writer.cpp
)

if (ARCH_AMD64)
set(EXEC_FILES
${EXEC_FILES}
hdfs_file_reader.cpp
)
endif()

if (WITH_MYSQL)
set(EXEC_FILES
${EXEC_FILES}
Expand Down
9 changes: 8 additions & 1 deletion be/src/exec/broker_scanner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@
#include "exec/buffered_reader.h"
#include "exec/decompressor.h"
#include "exec/exec_node.h"
#include "exec/hdfs_file_reader.h"
#include "exec/local_file_reader.h"
#include "exec/plain_text_line_reader.h"
#include "exec/s3_reader.h"
Expand All @@ -40,6 +39,10 @@
#include "runtime/tuple.h"
#include "util/utf8_check.h"

#if defined(__x86_64__)
#include "exec/hdfs_file_reader.h"
#endif

namespace doris {

BrokerScanner::BrokerScanner(RuntimeState* state, RuntimeProfile* profile,
Expand Down Expand Up @@ -163,12 +166,16 @@ Status BrokerScanner::open_file_reader() {
break;
}
case TFileType::FILE_HDFS: {
#if defined(__x86_64__)
BufferedReader* file_reader =
new BufferedReader(new HdfsFileReader(range.hdfs_params, range.path, start_offset),
config::remote_storage_read_buffer_mb * 1024 * 1024);
RETURN_IF_ERROR(file_reader->open());
_cur_file_reader = file_reader;
break;
#else
return Status::InternalError("HdfsFileReader do not support on non x86 platform");
#endif
}
case TFileType::FILE_BROKER: {
BrokerReader* broker_reader =
Expand Down
9 changes: 8 additions & 1 deletion be/src/exec/parquet_scanner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,13 +36,16 @@
#include "exprs/expr.h"
#include "exec/text_converter.h"
#include "exec/text_converter.hpp"
#include "exec/hdfs_file_reader.h"
#include "exec/local_file_reader.h"
#include "exec/broker_reader.h"
#include "exec/buffered_reader.h"
#include "exec/decompressor.h"
#include "exec/parquet_reader.h"

#if defined(__x86_64__)
#include "exec/hdfs_file_reader.h"
#endif

namespace doris {

ParquetScanner::ParquetScanner(RuntimeState* state, RuntimeProfile* profile,
Expand Down Expand Up @@ -128,9 +131,13 @@ Status ParquetScanner::open_next_reader() {
break;
}
case TFileType::FILE_HDFS: {
#if defined(__x86_64__)
file_reader.reset(new HdfsFileReader(
range.hdfs_params, range.path, range.start_offset));
break;
#else
return Status::InternalError("HdfsFileReader do not support on non x86 platform");
#endif
}
case TFileType::FILE_BROKER: {
int64_t file_size = 0;
Expand Down
41 changes: 29 additions & 12 deletions be/src/olap/rowset/segment_v2/binary_dict_page.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
#include "common/logging.h"
#include "gutil/strings/substitute.h" // for Substitute
#include "olap/rowset/segment_v2/bitshuffle_page.h"
#include "runtime/mem_pool.h"
#include "util/slice.h" // for Slice

namespace doris {
Expand Down Expand Up @@ -238,8 +239,8 @@ Status BinaryDictPageDecoder::next_batch(size_t* n, ColumnBlockView* dst) {
// dictionary encoding
DCHECK(_parsed);
DCHECK(_dict_decoder != nullptr) << "dict decoder pointer is nullptr";

if (PREDICT_FALSE(*n == 0)) {
*n = 0;
return Status::OK();
}
Slice* out = reinterpret_cast<Slice*>(dst->data());
Expand All @@ -248,21 +249,37 @@ Status BinaryDictPageDecoder::next_batch(size_t* n, ColumnBlockView* dst) {
ColumnBlock column_block(_batch.get(), dst->column_block()->pool());
ColumnBlockView tmp_block_view(&column_block);
RETURN_IF_ERROR(_data_page_decoder->next_batch(n, &tmp_block_view));
for (int i = 0; i < *n; ++i) {
const auto len = *n;

size_t mem_len[len];
for (int i = 0; i < len; ++i) {
int32_t codeword = *reinterpret_cast<const int32_t*>(column_block.cell_ptr(i));
// get the string from the dict decoder
Slice element = _dict_decoder->string_at_index(codeword);
if (element.size > 0) {
char* destination = (char*)dst->column_block()->pool()->allocate(element.size);
if (destination == nullptr) {
return Status::MemoryAllocFailed(
strings::Substitute("memory allocate failed, size:$0", element.size));
}
element.relocate(destination);
}
*out = element;
*out = _dict_decoder->string_at_index(codeword);
mem_len[i] = out->size;
out++;
}

// use SIMD instruction to speed up call function `RoundUpToPowerOfTwo`
auto mem_size = 0;
for (int i = 0; i < len; ++i) {
mem_len[i] = BitUtil::RoundUpToPowerOf2Int32(mem_len[i], MemPool::DEFAULT_ALIGNMENT);
mem_size += mem_len[i];
}

// allocate a batch of memory and do memcpy
out = reinterpret_cast<Slice*>(dst->data());
char* destination = (char*)dst->column_block()->pool()->allocate(mem_size);
if (destination == nullptr) {
return Status::MemoryAllocFailed(
strings::Substitute("memory allocate failed, size:$0", mem_size));
}
for (int i = 0; i < len; ++i) {
out->relocate(destination);
destination += mem_len[i];
++out;
}

return Status::OK();
}

Expand Down
34 changes: 25 additions & 9 deletions be/src/olap/rowset/segment_v2/binary_plain_page.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
#pragma once

#include "common/logging.h"
#include "gutil/strings/substitute.h"
#include "olap/olap_common.h"
#include "olap/rowset/segment_v2/options.h"
#include "olap/rowset/segment_v2/page_builder.h"
Expand Down Expand Up @@ -193,18 +194,33 @@ class BinaryPlainPageDecoder : public PageDecoder {
*n = 0;
return Status::OK();
}
size_t max_fetch = std::min(*n, static_cast<size_t>(_num_elems - _cur_idx));
const size_t max_fetch = std::min(*n, static_cast<size_t>(_num_elems - _cur_idx));

Slice* out = reinterpret_cast<Slice*>(dst->data());

size_t mem_len[max_fetch];
for (size_t i = 0; i < max_fetch; i++, out++, _cur_idx++) {
Slice elem(string_at_index(_cur_idx));
out->size = elem.size;
if (elem.size != 0) {
out->data =
reinterpret_cast<char*>(dst->pool()->allocate(elem.size * sizeof(uint8_t)));
memcpy(out->data, elem.data, elem.size);
}
*out = string_at_index(_cur_idx);
mem_len[i] = out->size;
}

// use SIMD instruction to speed up call function `RoundUpToPowerOfTwo`
auto mem_size = 0;
for (int i = 0; i < max_fetch; ++i) {
mem_len[i] = BitUtil::RoundUpToPowerOf2Int32(mem_len[i], MemPool::DEFAULT_ALIGNMENT);
mem_size += mem_len[i];
}

// allocate a batch of memory and do memcpy
out = reinterpret_cast<Slice*>(dst->data());
char* destination = (char*)dst->column_block()->pool()->allocate(mem_size);
if (destination == nullptr) {
return Status::MemoryAllocFailed(
strings::Substitute("memory allocate failed, size:$0", mem_size));
}
for (int i = 0; i < max_fetch; ++i) {
out->relocate(destination);
destination += mem_len[i];
++out;
}

*n = max_fetch;
Expand Down
2 changes: 1 addition & 1 deletion be/src/olap/rowset/segment_v2/bloom_filter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ uint32_t BloomFilter::_optimal_bit_num(uint64_t n, double fpp) {
}

// Get closest power of 2 if bits is not power of 2.
if ((num_bits && (num_bits - 1)) != 0) {
if ((num_bits & (num_bits - 1)) != 0) {
num_bits = 1 << ser::used_bits(num_bits);
}
if (num_bits < MINIMUM_BYTES << 3) {
Expand Down
2 changes: 1 addition & 1 deletion be/src/runtime/mem_pool.h
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,7 @@ class MemPool {

MemTracker* mem_tracker() { return mem_tracker_; }

static const int DEFAULT_ALIGNMENT = 8;
static constexpr int DEFAULT_ALIGNMENT = 8;

private:
friend class MemPoolTest;
Expand Down
6 changes: 6 additions & 0 deletions be/src/util/bit_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -300,6 +300,12 @@ class BitUtil {
return (value + (factor - 1)) & ~(factor - 1);
}

// speed up function compute for SIMD
static inline size_t RoundUpToPowerOf2Int32(size_t value, size_t factor) {
DCHECK((factor > 0) && ((factor & (factor - 1)) == 0));
return (value + (factor - 1)) & ~(factor - 1);
}

// Returns the ceil of value/divisor
static inline int Ceil(int value, int divisor) {
return value / divisor + (value % divisor != 0);
Expand Down
2 changes: 2 additions & 0 deletions docs/.vuepress/sidebar/en.js
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ module.exports = [
directoryPath: "installing/",
children: [
"compilation",
"compilation-arm",
"install-deploy",
"upgrade",
],
Expand Down Expand Up @@ -483,6 +484,7 @@ module.exports = [
"SHOW ALTER",
"SHOW BACKUP",
"SHOW CREATE FUNCTION",
"SHOW CREATE ROUTINE LOAD",
"SHOW DATA",
"SHOW DATABASES",
"SHOW DELETE",
Expand Down
2 changes: 2 additions & 0 deletions docs/.vuepress/sidebar/zh-CN.js
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ module.exports = [
directoryPath: "installing/",
children: [
"compilation",
"compilation-arm",
"install-deploy",
"upgrade",
],
Expand Down Expand Up @@ -486,6 +487,7 @@ module.exports = [
"SHOW ALTER",
"SHOW BACKUP",
"SHOW CREATE FUNCTION",
"SHOW CREATE ROUTINE LOAD",
"SHOW DATA",
"SHOW DATABASES",
"SHOW DELETE",
Expand Down
Loading