diff --git a/be/CMakeLists.txt b/be/CMakeLists.txt index d56da2a7e15a04..4c0f2817a901eb 100644 --- a/be/CMakeLists.txt +++ b/be/CMakeLists.txt @@ -466,7 +466,7 @@ else() endif() set(DORIS_LINK_LIBS ${DORIS_LINK_LIBS} - -lrt -lbfd -liberty -lc -lm -ldl -lltdl -pthread + -lrt -lbfd -liberty -lc -lm -ldl -pthread ) # Set libraries for test @@ -571,6 +571,7 @@ install(FILES install(FILES ${BASE_DIR}/../conf/be.conf + ${BASE_DIR}/../conf/odbcinst.ini DESTINATION ${OUTPUT_DIR}/conf) install(DIRECTORY diff --git a/be/src/exec/CMakeLists.txt b/be/src/exec/CMakeLists.txt index 39ab4c86deee3f..916b3d5ce72392 100644 --- a/be/src/exec/CMakeLists.txt +++ b/be/src/exec/CMakeLists.txt @@ -88,12 +88,14 @@ set(EXEC_FILES partitioned_hash_table_ir.cc partitioned_aggregation_node.cc partitioned_aggregation_node_ir.cc + odbc_scan_node.cpp local_file_writer.cpp broker_writer.cpp parquet_scanner.cpp parquet_reader.cpp parquet_writer.cpp orc_scanner.cpp + odbc_scanner.cpp json_scanner.cpp assert_num_rows_node.cpp ) @@ -108,7 +110,7 @@ endif() if (WITH_LZO) set(EXEC_FILES ${EXEC_FILES} - lzo_decompressor.cpp + lzo_decompressor.cpp ) endif() diff --git a/be/src/exec/exec_node.cpp b/be/src/exec/exec_node.cpp index 5d94c8cbce03ad..4713864dedd75f 100644 --- a/be/src/exec/exec_node.cpp +++ b/be/src/exec/exec_node.cpp @@ -41,6 +41,7 @@ #include "exec/mysql_scan_node.h" #include "exec/olap_rewrite_node.h" #include "exec/olap_scan_node.h" +#include "exec/odbc_scan_node.h" #include "exec/partitioned_aggregation_node.h" #include "exec/repeat_node.h" #include "exec/schema_scan_node.h" @@ -58,6 +59,7 @@ #include "runtime/runtime_state.h" #include "util/debug_util.h" #include "util/runtime_profile.h" +#include "odbc_scan_node.h" namespace doris { @@ -350,6 +352,9 @@ Status ExecNode::create_node(RuntimeState* state, ObjectPool* pool, const TPlanN #else return Status::InternalError("Don't support MySQL table, you should rebuild Doris with WITH_MYSQL option ON"); #endif + case TPlanNodeType::ODBC_SCAN_NODE: + *node = pool->add(new OdbcScanNode(pool, tnode, descs)); + return Status::OK(); case TPlanNodeType::ES_SCAN_NODE: *node = pool->add(new EsScanNode(pool, tnode, descs)); diff --git a/be/src/exec/odbc_scan_node.cpp b/be/src/exec/odbc_scan_node.cpp new file mode 100644 index 00000000000000..69949e4b41256c --- /dev/null +++ b/be/src/exec/odbc_scan_node.cpp @@ -0,0 +1,271 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "odbc_scan_node.h" + +#include + +#include "exec/text_converter.hpp" +#include "gen_cpp/PlanNodes_types.h" +#include "runtime/runtime_state.h" +#include "runtime/row_batch.h" +#include "runtime/string_value.h" +#include "runtime/tuple_row.h" +#include "util/runtime_profile.h" + +namespace doris { + +OdbcScanNode::OdbcScanNode(ObjectPool* pool, const TPlanNode& tnode, + const DescriptorTbl& descs) + : ScanNode(pool, tnode, descs), + _is_init(false), + _table_name(tnode.odbc_scan_node.table_name), + _tuple_id(tnode.odbc_scan_node.tuple_id), + _columns(tnode.odbc_scan_node.columns), + _filters(tnode.odbc_scan_node.filters), + _tuple_desc(nullptr) { +} + +OdbcScanNode::~OdbcScanNode() { +} + +Status OdbcScanNode::prepare(RuntimeState* state) { + VLOG(1) << "OdbcScanNode::Prepare"; + + if (_is_init) { + return Status::OK(); + } + + if (NULL == state) { + return Status::InternalError("input pointer is NULL."); + } + + RETURN_IF_ERROR(ScanNode::prepare(state)); + // get tuple desc + _tuple_desc = state->desc_tbl().get_tuple_descriptor(_tuple_id); + + if (NULL == _tuple_desc) { + return Status::InternalError("Failed to get tuple descriptor."); + } + + _slot_num = _tuple_desc->slots().size(); + // get odbc table info + const ODBCTableDescriptor* odbc_table = + static_cast(_tuple_desc->table_desc()); + + if (NULL == odbc_table) { + return Status::InternalError("odbc table pointer is NULL."); + } + + _odbc_param.host = odbc_table->host(); + _odbc_param.port = odbc_table->port(); + _odbc_param.user = odbc_table->user(); + _odbc_param.passwd = odbc_table->passwd(); + _odbc_param.db = odbc_table->db(); + _odbc_param.drivier = odbc_table->driver(); + _odbc_param.type = odbc_table->type(); + _odbc_param.tuple_desc = _tuple_desc; + + _odbc_scanner.reset(new (std::nothrow)ODBCScanner(_odbc_param)); + + if (_odbc_scanner.get() == nullptr) { + return Status::InternalError("new a odbc scanner failed."); + } + + _tuple_pool.reset(new(std::nothrow) MemPool(mem_tracker().get())); + + if (_tuple_pool.get() == NULL) { + return Status::InternalError("new a mem pool failed."); + } + + _text_converter.reset(new(std::nothrow) TextConverter('\\')); + + if (_text_converter.get() == NULL) { + return Status::InternalError("new a text convertor failed."); + } + + _is_init = true; + + return Status::OK(); +} + +Status OdbcScanNode::open(RuntimeState* state) { + RETURN_IF_ERROR(ExecNode::open(state)); + VLOG(1) << "OdbcScanNode::Open"; + + if (NULL == state) { + return Status::InternalError("input pointer is NULL."); + } + + if (!_is_init) { + return Status::InternalError("used before initialize."); + } + + RETURN_IF_ERROR(exec_debug_action(TExecNodePhase::OPEN)); + RETURN_IF_CANCELLED(state); + SCOPED_TIMER(_runtime_profile->total_time_counter()); + RETURN_IF_ERROR(_odbc_scanner->open()); + RETURN_IF_ERROR(_odbc_scanner->query(_table_name, _columns, _filters)); + // check materialize slot num + + return Status::OK(); +} + +Status OdbcScanNode::write_text_slot(char* value, int value_length, + SlotDescriptor* slot, RuntimeState* state) { + if (!_text_converter->write_slot(slot, _tuple, value, value_length, + true, false, _tuple_pool.get())) { + std::stringstream ss; + ss << "fail to convert odbc value '" << value << "' TO " << slot->type(); + return Status::InternalError(ss.str()); + } + + return Status::OK(); +} + +Status OdbcScanNode::get_next(RuntimeState* state, RowBatch* row_batch, bool* eos) { + VLOG(1) << "OdbcScanNode::GetNext"; + + if (NULL == state || NULL == row_batch || NULL == eos) { + return Status::InternalError("input is NULL pointer"); + } + + if (!_is_init) { + return Status::InternalError("used before initialize."); + } + + RETURN_IF_ERROR(exec_debug_action(TExecNodePhase::GETNEXT)); + RETURN_IF_CANCELLED(state); + SCOPED_TIMER(_runtime_profile->total_time_counter()); + SCOPED_TIMER(materialize_tuple_timer()); + + if (reached_limit()) { + *eos = true; + return Status::OK(); + } + + // create new tuple buffer for row_batch + int tuple_buffer_size = row_batch->capacity() * _tuple_desc->byte_size(); + void* tuple_buffer = _tuple_pool->allocate(tuple_buffer_size); + + if (NULL == tuple_buffer) { + return Status::InternalError("Allocate memory failed."); + } + + _tuple = reinterpret_cast(tuple_buffer); + // Indicates whether there are more rows to process. Set in _odbc_scanner.next(). + bool odbc_eos = false; + + while (true) { + RETURN_IF_CANCELLED(state); + + if (reached_limit() || row_batch->is_full()) { + // hang on to last allocated chunk in pool, we'll keep writing into it in the + // next get_next() call + row_batch->tuple_data_pool()->acquire_data(_tuple_pool.get(), !reached_limit()); + *eos = reached_limit(); + return Status::OK(); + } + + RETURN_IF_ERROR(_odbc_scanner->get_next_row(&odbc_eos)); + + if (odbc_eos) { + row_batch->tuple_data_pool()->acquire_data(_tuple_pool.get(), false); + *eos = true; + return Status::OK(); + } + + int row_idx = row_batch->add_row(); + TupleRow* row = row_batch->get_row(row_idx); + // scan node is the first tuple of tuple row + row->set_tuple(0, _tuple); + memset(_tuple, 0, _tuple_desc->num_null_bytes()); + int j = 0; + + for (int i = 0; i < _slot_num; ++i) { + auto slot_desc = _tuple_desc->slots()[i]; + // because the fe planner filter the non_materialize column + if (!slot_desc->is_materialized()) { + continue; + } + + const auto& column_data = _odbc_scanner->get_column_data(j); + if (column_data.strlen_or_ind == SQL_NULL_DATA) { + if (slot_desc->is_nullable()) { + _tuple->set_null(slot_desc->null_indicator_offset()); + } else { + std::stringstream ss; + ss << "nonnull column contains NULL. table=" << _table_name + << ", column=" << slot_desc->col_name(); + return Status::InternalError(ss.str()); + } + } else if (column_data.strlen_or_ind > column_data.buffer_length) { + std::stringstream ss; + ss << "nonnull column contains NULL. table=" << _table_name + << ", column=" << slot_desc->col_name(); + return Status::InternalError(ss.str()); + } else { + RETURN_IF_ERROR( + write_text_slot(static_cast(column_data.target_value_ptr), column_data.strlen_or_ind, slot_desc, state)); + } + j++; + } + + ExprContext* const* ctxs = &_conjunct_ctxs[0]; + int num_ctxs = _conjunct_ctxs.size(); + + // ODBC scanner can not filter conjunct with function, need check conjunct again. + if (ExecNode::eval_conjuncts(ctxs, num_ctxs, row)) { + row_batch->commit_last_row(); + ++_num_rows_returned; + COUNTER_SET(_rows_returned_counter, _num_rows_returned); + char* new_tuple = reinterpret_cast(_tuple); + new_tuple += _tuple_desc->byte_size(); + _tuple = reinterpret_cast(new_tuple); + } + } + + return Status::OK(); +} + +Status OdbcScanNode::close(RuntimeState* state) { + if (is_closed()) { + return Status::OK(); + } + RETURN_IF_ERROR(exec_debug_action(TExecNodePhase::CLOSE)); + SCOPED_TIMER(_runtime_profile->total_time_counter()); + + _tuple_pool.reset(); + + return ExecNode::close(state); +} + +void OdbcScanNode::debug_string(int indentation_level, stringstream* out) const { + *out << string(indentation_level * 2, ' '); + *out << "OdbcScanNode(tupleid=" << _tuple_id << " table=" << _table_name; + *out << ")" << std::endl; + + for (int i = 0; i < _children.size(); ++i) { + _children[i]->debug_string(indentation_level + 1, out); + } +} + +Status OdbcScanNode::set_scan_ranges(const vector& scan_ranges) { + return Status::OK(); +} + +} diff --git a/be/src/exec/odbc_scan_node.h b/be/src/exec/odbc_scan_node.h new file mode 100644 index 00000000000000..48e28aab87677a --- /dev/null +++ b/be/src/exec/odbc_scan_node.h @@ -0,0 +1,96 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#ifndef DORIS_BE_SRC_QUERY_EXEC_ODBC_SCAN_NODE_H +#define DORIS_BE_SRC_QUERY_EXEC_ODBC_SCAN_NODE_H + +#include + +#include "runtime/descriptors.h" +#include "exec/scan_node.h" +#include "exec/odbc_scanner.h" + +namespace doris { + +class TextConverter; +class Tuple; +class TupleDescriptor; +class RuntimeState; +class MemPool; +class Status; + +class OdbcScanNode : public ScanNode { +public: + OdbcScanNode(ObjectPool* pool, const TPlanNode& tnode, const DescriptorTbl& descs); + ~OdbcScanNode(); + + // initialize _odbc_scanner, and create _text_converter. + virtual Status prepare(RuntimeState* state); + + // Start ODBC scan using _odbc_scanner. + virtual Status open(RuntimeState* state); + + // Fill the next row batch by calling next() on the _odbc_scanner, + // converting text data in ODBC cells to binary data. + virtual Status get_next(RuntimeState* state, RowBatch* row_batch, bool* eos); + + // Close the _odbc_scanner, and report errors. + virtual Status close(RuntimeState* state); + + // No use + virtual Status set_scan_ranges(const std::vector& scan_ranges); + +protected: + // Write debug string of this into out. + virtual void debug_string(int indentation_level, std::stringstream* out) const; + +private: + // Writes a slot in _tuple from an MySQL value containing text data. + // The Odbc value is converted into the appropriate target type. + Status write_text_slot(char* value, int value_length, SlotDescriptor* slot, + RuntimeState* state); + + bool _is_init; + // Name of Odbc table + std::string _table_name; + + // Tuple id resolved in prepare() to set _tuple_desc; + TupleId _tuple_id; + + // select columns + std::vector _columns; + // where clause + std::vector _filters; + + // Descriptor of tuples read from ODBC table. + const TupleDescriptor* _tuple_desc; + // Tuple index in tuple row. + int _slot_num; + // Pool for allocating tuple data, including all varying-length slots. + std::unique_ptr _tuple_pool; + + // Scanner of ODBC. + std::unique_ptr _odbc_scanner; + ODBCScannerParam _odbc_param; + // Helper class for converting text to other types; + std::unique_ptr _text_converter; + // Current tuple. + Tuple* _tuple = nullptr; +}; +} + +#endif diff --git a/be/src/exec/odbc_scanner.cpp b/be/src/exec/odbc_scanner.cpp new file mode 100644 index 00000000000000..94f4876e1c63d0 --- /dev/null +++ b/be/src/exec/odbc_scanner.cpp @@ -0,0 +1,262 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include +#include +#include + +#include "exec/odbc_scanner.h" +#include "common/logging.h" +#include "runtime/primitive_type.h" + +#define ODBC_DISPOSE(h, ht, x, op) { auto rc = x;\ + if (rc != SQL_SUCCESS && rc != SQL_SUCCESS_WITH_INFO) \ + { \ + return error_status(op, handle_diagnostic_record(h, ht, rc)); \ + } \ + if (rc == SQL_ERROR) \ + { \ + auto err_msg = std::string("Errro in") + std::string(op); \ + return Status::InternalError(err_msg.c_str()); \ + } \ + } \ + +static constexpr uint32_t SMALL_COLUMN_SIZE_BUFFER = 100; +// Now we only treat HLL, CHAR, VARCHAR as big column +static constexpr uint32_t BIG_COLUMN_SIZE_BUFFER = 65535; + +static std::u16string utf8_to_wstring(const std::string& str) { + std::wstring_convert, char16_t> utf8_ucs2_cvt; + return utf8_ucs2_cvt.from_bytes(str); +} + +namespace doris { + +ODBCScanner::ODBCScanner(const ODBCScannerParam& param) + : _connect_string(build_connect_string(param)), + _type(param.type), + _tuple_desc(param.tuple_desc), + _is_open(false), + _field_num(0), + _row_count(0), + _env(nullptr), + _dbc(nullptr), + _stmt(nullptr) { +} + +ODBCScanner::~ODBCScanner() { + if (_stmt != nullptr) { + SQLFreeHandle(SQL_HANDLE_STMT, _stmt); + } + + if (_dbc != nullptr) { + SQLDisconnect(_dbc); + SQLFreeHandle(SQL_HANDLE_DBC, _dbc); + } + + if (_env != nullptr) { + SQLFreeHandle(SQL_HANDLE_ENV, _env); + } +} + +Status ODBCScanner::open() { + if (_is_open) { + LOG(INFO) << "this scanner already opened"; + return Status::OK(); + } + + // Allocate an environment + if (SQLAllocHandle(SQL_HANDLE_ENV, SQL_NULL_HANDLE, &_env) != SQL_SUCCESS) { + return Status::InternalError("alloc env failed"); + } + // We want ODBC 3 support + ODBC_DISPOSE(_env, SQL_HANDLE_ENV, SQLSetEnvAttr(_env, SQL_ATTR_ODBC_VERSION, (void *) SQL_OV_ODBC3, 0), "set env attr"); + // Allocate a connection handle + ODBC_DISPOSE(_env, SQL_HANDLE_ENV, SQLAllocHandle(SQL_HANDLE_DBC, _env, &_dbc), "alloc dbc"); + // Connect to the Database + ODBC_DISPOSE(_dbc, SQL_HANDLE_DBC, SQLDriverConnect(_dbc, NULL, (SQLCHAR*)_connect_string.c_str(), SQL_NTS, + NULL, 0, NULL, SQL_DRIVER_COMPLETE_REQUIRED), "driver connect"); + + LOG(INFO) << "connect success:" << _connect_string.substr(0, _connect_string.find("Pwd=")); + + _is_open = true; + return Status::OK(); +} + +Status ODBCScanner::query(const std::string& query) { + if (!_is_open) { + return Status::InternalError( "Query before open."); + } + + // Allocate a statement handle + ODBC_DISPOSE(_dbc, SQL_HANDLE_DBC, SQLAllocHandle(SQL_HANDLE_STMT, _dbc, &_stmt), "alloc statement"); + + // Translate utf8 string to utf16 to use unicode codeing + auto wquery = utf8_to_wstring(query); + ODBC_DISPOSE(_stmt, SQL_HANDLE_STMT, SQLExecDirectW(_stmt, (SQLWCHAR*)(wquery.c_str()), SQL_NTS), "exec direct"); + + // How many columns are there */ + ODBC_DISPOSE(_stmt, SQL_HANDLE_STMT, SQLNumResultCols(_stmt, &_field_num), "count num colomn"); + + LOG(INFO) << "execute success:" << query << " column count:" << _field_num; + + // check materialize num equal _field_num + int materialize_num = 0; + for (int i = 0; i < _tuple_desc->slots().size(); ++i) { + if (_tuple_desc->slots()[i]->is_materialized()) { + materialize_num++; + } + } + if (_field_num != materialize_num) { + return Status::InternalError("input and output not equal."); + } + + // allocate memory for the binding + for (int i = 0 ; i < _field_num ; i++ ) { + DataBinding* column_data = new DataBinding; + column_data->target_type = SQL_C_CHAR; + auto type = _tuple_desc->slots()[i]->type().type; + column_data->buffer_length = (type == TYPE_HLL || type == TYPE_CHAR || type == TYPE_VARCHAR) ? BIG_COLUMN_SIZE_BUFFER : + SMALL_COLUMN_SIZE_BUFFER; + column_data->target_value_ptr = malloc(sizeof(char) * column_data->buffer_length); + _columns_data.push_back(column_data); + } + + // setup the binding + for (int i = 0 ; i < _field_num ; i++ ) { + ODBC_DISPOSE(_stmt, SQL_HANDLE_STMT, SQLBindCol(_stmt, (SQLUSMALLINT)i + 1, _columns_data[i].target_type, + _columns_data[i].target_value_ptr, _columns_data[i].buffer_length, &(_columns_data[i].strlen_or_ind)), "bind col"); + } + + return Status::OK(); +} + +Status ODBCScanner::query(const std::string& table, const std::vector& fields, + const std::vector& filters) { + if (!_is_open) { + return Status::InternalError("Query before open."); + } + + _sql_str = "SELECT "; + + for (int i = 0; i < fields.size(); ++i) { + if (0 != i) { + _sql_str += ","; + } + + _sql_str += fields[i]; + } + + _sql_str += " FROM " + table; + + if (!filters.empty()) { + _sql_str += " WHERE "; + + for (int i = 0; i < filters.size(); ++i) { + if (0 != i) { + _sql_str += " AND"; + } + + _sql_str += " (" + filters[i] + ") "; + } + } + + return query(_sql_str); +} + +Status ODBCScanner::get_next_row(bool* eos) { + if (!_is_open) { + return Status::InternalError("GetNextRow before open."); + } + + auto ret = SQLFetch(_stmt); + if (ret == SQL_SUCCESS || ret == SQL_SUCCESS_WITH_INFO) { + return Status::OK(); + } else if (ret != SQL_NO_DATA_FOUND) { + return error_status("result fetch", handle_diagnostic_record(_stmt, SQL_HANDLE_STMT, ret)); + } + + *eos = true; + return Status::OK(); +} + +Status ODBCScanner::error_status(const std::string& prefix, const std::string& error_msg) { + std::stringstream msg; + msg << prefix << " Error: " << error_msg; + LOG(WARNING) << msg.str(); + return Status::InternalError(msg.str()); +} + +// handle_diagnostic_record : use SQLGetDiagRec to get the display error/warning information +// +// Parameters: +// hHandle ODBC handle +// hType Type of handle (HANDLE_STMT, HANDLE_ENV, HANDLE_DBC) +// RetCode Return code of failing command +std::string ODBCScanner::handle_diagnostic_record(SQLHANDLE hHandle, + SQLSMALLINT hType, + RETCODE RetCode) { + SQLSMALLINT rec = 0; + SQLINTEGER error; + CHAR message[1000]; + CHAR state[SQL_SQLSTATE_SIZE+1]; + + if (RetCode == SQL_INVALID_HANDLE) { + return "Invalid handle!"; + } + + std::string diagnostic_msg; + + while (SQLGetDiagRec(hType, + hHandle, + ++rec, + (SQLCHAR *)(state), + &error, + reinterpret_cast(message), + (SQLSMALLINT)(sizeof(message) / sizeof(WCHAR)), + (SQLSMALLINT *)NULL) == SQL_SUCCESS) { + // Hide data truncated.. + if (wcsncmp(reinterpret_cast(state), L"01004", 5)) { + boost::format msg_string("%s %s (%d)"); + msg_string % state % message % error; + diagnostic_msg += msg_string.str(); + } + } + + return diagnostic_msg; +} + +std::string ODBCScanner::build_connect_string(const ODBCScannerParam& param) { + // different database have different connection string + // oracle connect string + if (param.type == TOdbcTableType::ORACLE) { + boost::format connect_string("Driver=%s;Dbq=//%s:%s/%s;DataBase=%s;Uid=%s;Pwd=%s;charset=%s"); + connect_string % param.drivier % param.host % param.port % param.db % param.db % param.user % param.passwd % + param.charest; + + return connect_string.str(); + } else if (param.type == TOdbcTableType::MYSQL) { + boost::format connect_string("Driver=%s;Server=%s;Port=%s;DataBase=%s;Uid=%s;Pwd=%s;charset=%s"); + connect_string % param.drivier % param.host % param.port % param.db % param.user % param.passwd % + param.charest; + return connect_string.str(); + } + + return ""; +} + +} diff --git a/be/src/exec/odbc_scanner.h b/be/src/exec/odbc_scanner.h new file mode 100644 index 00000000000000..50f7d84b337950 --- /dev/null +++ b/be/src/exec/odbc_scanner.h @@ -0,0 +1,109 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#ifndef DORIS_BE_SRC_QUERY_EXEC_ODBC_SCANNER_H +#define DORIS_BE_SRC_QUERY_EXEC_ODBC_SCANNER_H + +#include +#include +#include +#include +#include +#include + +#include "common/status.h" +#include "gen_cpp/Types_types.h" +#include "runtime/descriptors.h" + +namespace doris { + +struct ODBCScannerParam { + std::string host; + std::string port; + std::string user; + std::string passwd; + std::string db; + std::string drivier; + std::string charest = "utf8"; + + TOdbcTableType::type type; + const TupleDescriptor* tuple_desc; +}; + +// Because the DataBinding have the mem alloc, so +// this class should not be copyable +struct DataBinding : public boost::noncopyable { + SQLSMALLINT target_type; + SQLINTEGER buffer_length; + SQLLEN strlen_or_ind; + SQLPOINTER target_value_ptr; + + DataBinding() = default; + + ~DataBinding() { + free(target_value_ptr); + } +}; + +// ODBC Scanner for scan data from ODBC +class ODBCScanner { +public: + ODBCScanner(const ODBCScannerParam& param); + ~ODBCScanner(); + + Status open(); + + Status query(const std::string& query); + + // query for DORIS + Status query(const std::string& table, const std::vector& fields, + const std::vector& filters); + + Status get_next_row(bool* eos); + + const DataBinding& get_column_data(int i) const { + return _columns_data.at(i); + } + +private: + static std::string build_connect_string(const ODBCScannerParam& param); + + static Status error_status(const std::string& prefix, const std::string& error_msg); + + static std::string handle_diagnostic_record (SQLHANDLE hHandle, + SQLSMALLINT hType, + RETCODE RetCode); + + std::string _connect_string; + std::string _sql_str; + TOdbcTableType::type _type; + const TupleDescriptor* _tuple_desc; + + bool _is_open; + SQLSMALLINT _field_num; + uint64_t _row_count; + + SQLHENV _env; + SQLHDBC _dbc; + SQLHSTMT _stmt; + + boost::ptr_vector _columns_data; +}; + +} + +#endif \ No newline at end of file diff --git a/be/src/runtime/descriptors.cpp b/be/src/runtime/descriptors.cpp index b2b3a9105759cc..855a434a7a4842 100644 --- a/be/src/runtime/descriptors.cpp +++ b/be/src/runtime/descriptors.cpp @@ -174,6 +174,26 @@ std::string MySQLTableDescriptor::debug_string() const { return out.str(); } +ODBCTableDescriptor::ODBCTableDescriptor(const TTableDescriptor& tdesc) + : TableDescriptor(tdesc), + _db(tdesc.odbcTable.db), + _table(tdesc.odbcTable.table), + _host(tdesc.odbcTable.host), + _port(tdesc.odbcTable.port), + _user(tdesc.odbcTable.user), + _passwd(tdesc.odbcTable.passwd), + _driver(tdesc.odbcTable.driver), + _type(tdesc.odbcTable.type){ +} + +std::string ODBCTableDescriptor::debug_string() const { + std::stringstream out; + out << "ODBCTable(" << TableDescriptor::debug_string() << " _db" << _db << " table=" << + _table << " host=" << _host << " port=" << _port << " user=" << _user << " passwd=" << _passwd + << " driver=" << _driver << " type" << _type; + return out.str(); +} + TupleDescriptor::TupleDescriptor(const TTupleDescriptor& tdesc) : _id(tdesc.id), _table_desc(NULL), @@ -473,6 +493,10 @@ Status DescriptorTbl::create(ObjectPool* pool, const TDescriptorTable& thrift_tb desc = pool->add(new MySQLTableDescriptor(tdesc)); break; + case TTableType::ODBC_TABLE: + desc = pool->add(new ODBCTableDescriptor(tdesc)); + break; + case TTableType::OLAP_TABLE: desc = pool->add(new OlapTableDescriptor(tdesc)); break; diff --git a/be/src/runtime/descriptors.h b/be/src/runtime/descriptors.h index 2665dd2af8fc73..195c0cda4516bc 100644 --- a/be/src/runtime/descriptors.h +++ b/be/src/runtime/descriptors.h @@ -251,6 +251,45 @@ class MySQLTableDescriptor : public TableDescriptor { std::string _passwd; }; +class ODBCTableDescriptor : public TableDescriptor { +public: + ODBCTableDescriptor(const TTableDescriptor& tdesc); + virtual std::string debug_string() const; + const std::string db() const { + return _db; + } + const std::string table() const { + return _table; + } + const std::string host() const { + return _host; + } + const std::string port() const { + return _port; + } + const std::string user() const { + return _user; + } + const std::string passwd() const { + return _passwd; + } + const std::string driver() const { + return _driver; + } + const TOdbcTableType::type type() const { + return _type; + } +private: + std::string _db; + std::string _table; + std::string _host; + std::string _port; + std::string _user; + std::string _passwd; + std::string _driver; + TOdbcTableType::type _type; +}; + class TupleDescriptor { public: // virtual ~TupleDescriptor() {} diff --git a/bin/start_be.sh b/bin/start_be.sh index c412199fed63a7..50fe2bb0cbfd14 100755 --- a/bin/start_be.sh +++ b/bin/start_be.sh @@ -47,6 +47,12 @@ export UDF_RUNTIME_DIR=${DORIS_HOME}/lib/udf-runtime export LOG_DIR=${DORIS_HOME}/log export PID_DIR=`cd "$curdir"; pwd` +# set odbc conf path +export ODBCSYSINI=$DORIS_HOME/conf + +# support utf8 for oracle database +export NLS_LANG=AMERICAN_AMERICA.AL32UTF8 + while read line; do envline=`echo $line | sed 's/[[:blank:]]*=[[:blank:]]*/=/g' | sed 's/^[[:blank:]]*//g' | egrep "^[[:upper:]]([[:upper:]]|_|[[:digit:]])*="` envline=`eval "echo $envline"` diff --git a/conf/odbcinst.ini b/conf/odbcinst.ini new file mode 100644 index 00000000000000..89500e9d3f1b64 --- /dev/null +++ b/conf/odbcinst.ini @@ -0,0 +1,26 @@ +# Example driver definitions, you should not use the example odbc driver +# before you prepare env in your server + +# Driver from the postgresql-odbc package +# Setup from the unixODBC package +[PostgreSQL] +Description = ODBC for PostgreSQL +Driver = /usr/lib/psqlodbc.so +Setup = /usr/lib/libodbcpsqlS.so +FileUsage = 1 + + +# Driver from the mysql-connector-odbc package +# Setup from the unixODBC package +[MySQL ODBC 8.0 Unicode Driver] +Description = ODBC for MySQL +Driver = /usr/lib64/libmyodbc8w.so +FileUsage = 1 + +# Driver from the oracle-connector-odbc package +# Setup from the unixODBC package +[Oracle 19 ODBC driver] +Description=Oracle ODBC driver for Oracle 19 +Driver=/usr/lib/libsqora.so.19.1 + + diff --git a/docs/en/extending-doris/odbc-of-doris.md b/docs/en/extending-doris/odbc-of-doris.md new file mode 100644 index 00000000000000..7740b4f5ede688 --- /dev/null +++ b/docs/en/extending-doris/odbc-of-doris.md @@ -0,0 +1,229 @@ +--- +{ + "title": "ODBC of Doris", + "language": "en" +} +--- + + + + +# ODBC External Table Of Doris + +ODBC external table of Doris provides Doris access to external tables through the standard interface for database access (ODBC). The external table eliminates the tedious data import work and enables Doris to have the ability to access all kinds of databases. It solves the data analysis problem of external tables with Doris' OLAP capability. + +1. Support various data sources to access Doris +2. Support Doris query with tables in various data sources to perform more complex analysis operations + + +This document mainly introduces the implementation principle and usage of this ODBC external table. + +## Glossary + +### Noun in Doris + +* FE: Frontend, the front-end node of Doris. Responsible for metadata management and request access. +* BE: Backend, Doris's back-end node. Responsible for query execution and data storage. + +## How To Use + +### Create ODBC External Table + +#### 1. Creating ODBC external table without resource + +``` +CREATE EXTERNAL TABLE `baseall_oracle` ( + `k1` decimal(9, 3) NOT NULL COMMENT "", + `k2` char(10) NOT NULL COMMENT "", + `k3` datetime NOT NULL COMMENT "", + `k5` varchar(20) NOT NULL COMMENT "", + `k6` double NOT NULL COMMENT "" +) ENGINE=ODBC +COMMENT "ODBC" +PROPERTIES ( +"host" = "192.168.0.1", +"port" = "8086", +"user" = "test", +"password" = "test", +"database" = "test", +"table" = "baseall", +"driver" = "Oracle 19 ODBC driver", +"type" = "oracle" +); +``` + +#### 2. Creating ODBC external table by resource (recommended) +``` +CREATE EXTERNAL RESOURCE `oracle_odbc` +PROPERTIES ( +"type" = "odbc_catalog", +"host" = "192.168.0.1", +"port" = "8086", +"user" = "test", +"password" = "test", +"database" = "test", +"odbc_type" = "oracle", +"driver" = "Oracle 19 ODBC driver" +); + +CREATE EXTERNAL TABLE `baseall_oracle` ( + `k1` decimal(9, 3) NOT NULL COMMENT "", + `k2` char(10) NOT NULL COMMENT "", + `k3` datetime NOT NULL COMMENT "", + `k5` varchar(20) NOT NULL COMMENT "", + `k6` double NOT NULL COMMENT "" +) ENGINE=ODBC +COMMENT "ODBC" +PROPERTIES ( +"odbc_catalog_resource" = "oracle_odbc", +"database" = "test", +"table" = "baseall" +); +``` + +The following parameters are accepted by ODBC external table:: + +Parameter | Description +---|--- +**hosts** | IP address of external database +**driver** | The driver name of ODBC Driver, which needs to be/conf/odbcinst.ini. The driver names should be consistent. +**type** | The type of external database, currently supports Oracle and MySQL +**user** | The user name of database +**password** | password for the user + + +##### Installation and configuration of ODBC driver + + +Each database will provide ODBC access driver. Users can install the corresponding ODBC driver lib library according to the official recommendation of each database. + +After installation of ODBC driver, find the path of the driver lib Library of the corresponding database. The modify be/conf/odbcinst.ini Configuration like: + +``` +[MySQL Driver] +Description = ODBC for MySQL +Driver = /usr/lib64/libmyodbc8w.so +FileUsage = 1 +``` +* `[]`:The corresponding driver name in is the driver name. When creating an external table, the driver name of the external table should be consistent with that in the configuration file. +* `Driver=`: This should be setted in according to the actual be installation path of the driver. It is essentially the path of a dynamic library. Here, we need to ensure that the pre dependencies of the dynamic library are met. + +**Remember, all BE nodes are required to have the same driver installed, the same installation path and the same be/conf/odbcinst.ini config.** + + +### Query usage + +After the ODBC external table is built in Doris, it is no different from ordinary Doris tables except that the data model (rollup, pre aggregation, materialized view, etc.) in Doris cannot be used. + +``` +select * from oracle_table where k1 > 1000 and k3 ='term' or k4 like '%doris' +``` + + + +## Data type mapping + +There are different data types among different database. Here, the types in each database and the data type matching in Doris are listed. + +### MySQL Type + +| MySQL | Doris | Alternation rules | +| :------: | :----: | :-------------------------------: | +| BOOLEAN | BOOLEAN | | +| CHAR | CHAR | Only utf8 encoding is supported | +| VARCHAR | VARCHAR | Only utf8 encoding is supported | +| DATE | DATE | | +| FLOAT | FLOAT | | +| TINYINT | TINYINT | | +| SMALLINT | SMALLINT | | +| INT | INT | | +| BIGINT | BIGINT | | +| DOUBLE | DOUBLE | | +| DATE | DATE | | +| DATETIME | DATETIME | | +| DECIMAL | DECIMAL | | + +### Oracle Type + +| Oracle | Doris | Alternation rules | +| :------: | :----: | :-------------------------------: | +| not support | BOOLEAN | Oracle can replace Boolean with number (1) | +| CHAR | CHAR | | +| VARCHAR | VARCHAR | | +| DATE | DATE | | +| FLOAT | FLOAT | | +| not support | TINYINT | Oracle can be replaced by NUMBER | +| SMALLINT | SMALLINT | | +| INT | INT | | +| not support | BIGINT | Oracle can be replaced by NUMBER | +| not support | DOUBLE | Oracle can be replaced by NUMBER | +| DATE | DATE | | +| DATETIME | DATETIME | | +| NUMBER | DECIMAL | | + +## Q&A + +1. Relationship with the original external table of MySQL + +After accessing the ODBC external table, the original way to access the MySQL external table will be gradually abandoned. If you have not used the MySQL external table before, it is recommended that the newly accessed MySQL tables use ODBC external table directly. + +2. Besides MySQL and Oracle, can doris support more databases + +Currently, Doris only adapts to MySQL and Oracle. The adaptation of other databases is under planning. In principle, any database that supports ODBC access can be accessed through the ODBC external table. If you need to access other database, you are welcome to modify the code and contribute to Doris. + +3. When is it appropriate to use ODBC external tables. + + Generally, when the amount of external data is small and less than 100W. It can be accessed through ODBC external table. Since external table the can not play the role of Doris in the storage engine and will bring additional network overhead. it is recommended to determine whether to access through external tables or import data into Doris according to the actual access delay requirements for queries. + +4. Garbled code in Oracle access + + Add the following parameters to the BE start up script:`export NLS_LANG=AMERICAN_AMERICA.AL32UTF8`, Restart all be + +5. ANSI Driver or Unicode Driver ? + + Currently, ODBC supports both ANSI and Unicode driver forms, while Doris only supports Unicode driver. If you force the use of ANSI driver, the query results may be wrong. + +6. Report Errors: `driver connect Err: 01000 [unixODBC][Driver Manager]Can't open lib 'Xxx' : file not found (0)` + + The driver for the corresponding data is not installed on each BE, or it is not installed in the be/conf/odbcinst.ini configure the correct path, or create the table with the driver namebe/conf/odbcinst.ini different + +7. Report Errors: `fail to convert odbc value 'PALO ' TO INT` + + Type conversion error, type mapping of column needs to be modified + +8. BE crash occurs when using old MySQL table and ODBC external driver at the same time + + +This is the compatibility problem between MySQL database ODBC driver and existing Doris depending on MySQL lib. The recommended solutions are as follows: + +* Method 1: replace the old MySQL External Table by ODBC External Table, recompile BE close options **WITH_MySQL** + +* Method 2: Do not use the latest 8. X MySQL ODBC driver replace with the 5. X MySQL ODBC driver + +9. Push down the filtering condition + + The current ODBC appearance supports push down under filtering conditions。MySQL external table can support push down under all conditions. The functions of other databases are different from Doris, which will cause the push down query to fail. At present, except for the MySQL, other databases do not support push down of function calls. Whether Doris pushes down the required filter conditions can be confirmed by the 'explain' query statement. + +10. Report Errors: `driver connect Err: xxx` + + Connection to the database fails. The` Err: part` represents the error of different database connection failures. This is usually a configuration problem. You should check whether the IP address, port or account password are mismatched. + + + diff --git a/docs/zh-CN/extending-doris/odbc-of-doris.md b/docs/zh-CN/extending-doris/odbc-of-doris.md new file mode 100644 index 00000000000000..32c1e34cbae415 --- /dev/null +++ b/docs/zh-CN/extending-doris/odbc-of-doris.md @@ -0,0 +1,217 @@ +--- +{ + "title": "ODBC of Doris", + "language": "zh-CN" +} +--- + + + +# ODBC External Table Of Doris + +ODBC External Table Of Doris 提供了Doris通过数据库访问的标准接口(ODBC)来访问外部表,外部表省去了繁琐的数据导入工作,让Doris可以具有了访问各式数据库的能力,并借助Doris本身的OLAP的能力来解决外部表的数据分析问题: + + 1. 支持各种数据源接入Doris + 2. 支持Doris与各种数据源中的表联合查询,进行更加复杂的分析操作 + +本文档主要介绍该功能的实现原理、使用方式等。 + +## 名词解释 + +### Doirs相关 +* FE:Frontend,Doris 的前端节点,负责元数据管理和请求接入 +* BE:Backend,Doris 的后端节点,负责查询执行和数据存储 + +## 使用方法 + +### Doris中创建ODBC的外表 + +#### 1. 不使用Resource创建ODBC的外表 + +``` +CREATE EXTERNAL TABLE `baseall_oracle` ( + `k1` decimal(9, 3) NOT NULL COMMENT "", + `k2` char(10) NOT NULL COMMENT "", + `k3` datetime NOT NULL COMMENT "", + `k5` varchar(20) NOT NULL COMMENT "", + `k6` double NOT NULL COMMENT "" +) ENGINE=ODBC +COMMENT "ODBC" +PROPERTIES ( +"host" = "192.168.0.1", +"port" = "8086", +"user" = "test", +"password" = "test", +"database" = "test", +"table" = "baseall", +"driver" = "Oracle 19 ODBC driver", +"odbc_type" = "oracle" +); +``` + +#### 2. 通过ODBC_Resource来创建ODBC外表 (推荐使用的方式) +``` +CREATE EXTERNAL RESOURCE `oracle_odbc` +PROPERTIES ( +"type" = "odbc_catalog", +"host" = "192.168.0.1", +"port" = "8086", +"user" = "test", +"password" = "test", +"database" = "test", +"odbc_type" = "oracle", +"driver" = "Oracle 19 ODBC driver" +); + +CREATE EXTERNAL TABLE `baseall_oracle` ( + `k1` decimal(9, 3) NOT NULL COMMENT "", + `k2` char(10) NOT NULL COMMENT "", + `k3` datetime NOT NULL COMMENT "", + `k5` varchar(20) NOT NULL COMMENT "", + `k6` double NOT NULL COMMENT "" +) ENGINE=ODBC +COMMENT "ODBC" +PROPERTIES ( +"odbc_catalog_resource" = "oracle_odbc", +"database" = "test", +"table" = "baseall" +); +``` +参数说明: + +参数 | 说明 +---|--- +**hosts** | 外表数据库的IP地址 +**driver** | ODBC外表的Driver名,该名字需要和be/conf/odbcinst.ini中的Driver名一致。 +**odbc_type** | 外表数据库的类型,当前支持oracle与mysql +**user** | 外表数据库的用户名 +**password** | 对应用户的密码信息 + + + +##### ODBC Driver的安装和配置 + +各大主流数据库都会提供ODBC的访问Driver,用户可以执行参照参照各数据库官方推荐的方式安装对应的ODBC Driver LiB库。 + + +安装完成之后,查找对应的数据库的Driver Lib库的路径,并且修改be/conf/odbcinst.ini的配置: +``` +[MySQL Driver] +Description = ODBC for MySQL +Driver = /usr/lib64/libmyodbc8w.so +FileUsage = 1 +``` +* 上述配置`[]`里的对应的是Driver名,在建立外部表时需要保持外部表的Driver名和配置文件之中的一致。 +* `Driver=` 这个要根据实际BE安装Driver的路径来填写,本质上就是一个动态库的路径,这里需要保证该动态库的前置依赖都被满足。 + +**切记,这里要求所有的BE节点都安装上相同的Driver,并且安装路径相同,同时有相同的be/conf/odbcinst.ini的配置。** + + +### 查询用法 + +完成在Doris中建立ODBC外表后,除了无法使用Doris中的数据模型(rollup、预聚合、物化视图等)外,与普通的Doris表并无区别 + + +``` +select * from oracle_table where k1 > 1000 and k3 ='term' or k4 like '%doris' +``` + + + +## 类型匹配 + +各个数据之间数据类型存在不同,这里列出了各个数据库中的类型和Doris之中数据类型匹配的情况。 + +### MySQL类型 + +| MySQL | Doris | 替换方案 | +| :------: | :----: | :-------------------------------: | +| BOOLEAN | BOOLEAN | | +| CHAR | CHAR | 当前仅支持UTF8编码 | +| VARCHAR | VARCHAR | 当前仅支持UTF8编码 | +| DATE | DATE | | +| FLOAT | FLOAT | | +| TINYINT | TINYINT | | +| SMALLINT | SMALLINT | | +| INT | INT | | +| BIGINT | BIGINT | | +| DOUBLE | DOUBLE | | +| DATETIME | DATETIME | | +| DECIMAL | DECIMAL | | + +### Oracle类型 + +| Oracle | Doris | 替换方案 | +| :------: | :----: | :-------------------------------: | +| 不支持 | BOOLEAN | Oracle可用number(1) 替换boolean | +| CHAR | CHAR | | +| VARCHAR | VARCHAR | | +| DATE | DATE | | +| FLOAT | FLOAT | | +| 无 | TINYINT | Oracle可由NUMMBER替换 | +| SMALLINT | SMALLINT | | +| INT | INT | | +| 无 | BIGINT | Oracle可由NUMMBER替换 | +| 无 | DOUBLE | Oracle可由NUMMBER替换 | +| DATETIME | DATETIME | | +| NUMBER | DECIMAL | | + +## Q&A + +1. 与原先的MySQL外表的关系 + + 在接入ODBC外表之后,原先的访问MySQL外表的方式将被逐渐弃用。如果之前没有使用过MySQL外表,建议新接入的MySQL表直接使用ODBC的MySQL外表。 + +2. 除了MySQL和Oracle,是否能够支持更多的数据库 + + 目前Doris只适配了MySQL和Oracle,关于其他的数据库的适配工作正在规划之中,原则上来说任何支持ODBC访问的数据库都能通过ODBC外表来访问。如果您有访问其他外表的需求,欢迎修改代码并贡献给Doris。 + +3. 什么场合适合通过外表访问 + + 通常在外表数据量较小,少于100W条时,可以通过外部表的方式访问。由于外表无法发挥Doris在存储引擎部分的能力和会带来额外的网络开销,所以建议根据实际对查询的访问时延要求来确定是否通过外部表访问还是将数据导入Doris之中。 + +4. 通过Oracle访问出现乱码 + + 尝试在BE启动脚本之中添加如下参数:`export NLS_LANG=AMERICAN_AMERICA.AL32UTF8`, 并重新启动所有BE + +5. ANSI Driver or Unicode Driver ? + + 当前ODBC支持ANSI 与 Unicode 两种Driver形式,当前Doris只支持Unicode Driver。如果强行使用ANSI Driver可能会导致查询结果出错。 + +6. 报错 `driver connect Err: 01000 [unixODBC][Driver Manager]Can't open lib 'Xxx' : file not found (0)` + + 没有在每一个BE上安装好对应数据的Driver,或者是没有在be/conf/odbcinst.ini配置正确的路径,亦或是建表是Driver名与be/conf/odbcinst.ini不同 + +7. 报错 `fail to convert odbc value 'PALO ' TO INT` + + 类型转换出错,需要修改列的类型映射 + +8. 同时使用旧的MySQL表与ODBC外表的Driver时出现程序Crash + + 这个是MySQL数据库的Driver与现有Doris依赖MySQL外表的兼容问题。推荐解决的方式如下: + * 方式1:通过ODBC外表替换旧的MySQL外表,并重新编译BE,关闭WITH_MYSQL的选项 + * 方式2:不使用最新8.X的MySQL的ODBC Driver,而是使用5.X的MySQL的ODBC Driver + +9. 过滤条件下推 + 当前ODBC外表支持过滤条件下推,目前MySQL的外表是能够支持所有条件下推的。其他的数据库的函数与Doris不同会导致下推查询失败。目前除MySQL外表之外,其他的数据库不支持函数调用的条件下推。Doris是否将所需过滤条件下推,可以通过`explain` 查询语句进行确认。 + +10. 报错`driver connect Err: xxx` + + 通常是连接数据库失败,Err部分代表了不同的数据库连接失败的报错。这种情况通常是配置存在问题。可以检查是否错配了ip地址,端口或账号密码。 \ No newline at end of file diff --git a/gensrc/proto/olap_file.proto b/gensrc/proto/olap_file.proto index d4f9cf758523de..89f7dbbcdd763b 100644 --- a/gensrc/proto/olap_file.proto +++ b/gensrc/proto/olap_file.proto @@ -270,7 +270,7 @@ message TabletSchemaPB { optional uint32 next_column_unique_id = 7; // OLAPHeaderMessage.next_column_unique_id optional bool is_in_memory = 8 [default=false]; optional int32 delete_sign_idx = 9 [default = -1]; - + optional int32 sequence_col_idx = 10 [default= -1]; } enum TabletStatePB { @@ -308,6 +308,7 @@ message TabletMetaPB { optional int64 end_rowset_id = 15; optional RowsetTypePB preferred_rowset_type = 16; optional TabletTypePB tablet_type = 17; + repeated RowsetMetaPB stale_rs_metas = 18; } message OLAPIndexHeaderMessage { diff --git a/gensrc/script/doris_builtins_functions.py b/gensrc/script/doris_builtins_functions.py index b960ba6afbbba0..bd44884a32092c 100755 --- a/gensrc/script/doris_builtins_functions.py +++ b/gensrc/script/doris_builtins_functions.py @@ -478,6 +478,7 @@ [['if'], 'DATE', ['BOOLEAN', 'DATE', 'DATE'], ''], [['if'], 'DECIMAL', ['BOOLEAN', 'DECIMAL', 'DECIMAL'], ''], [['if'], 'DECIMALV2', ['BOOLEAN', 'DECIMALV2', 'DECIMALV2'], ''], + [['if'], 'BITMAP', ['BOOLEAN', 'BITMAP', 'BITMAP'], ''], # The priority of varchar should be lower than decimal in IS_SUPERTYPE_OF mode. [['if'], 'VARCHAR', ['BOOLEAN', 'VARCHAR', 'VARCHAR'], ''], @@ -510,6 +511,7 @@ [['ifnull'], 'DATETIME', ['DATETIME', 'DATE'], ''], [['ifnull'], 'DECIMAL', ['DECIMAL', 'DECIMAL'], ''], [['ifnull'], 'DECIMALV2', ['DECIMALV2', 'DECIMALV2'], ''], + [['ifnull'], 'BITMAP', ['BITMAP', 'BITMAP'], ''], # The priority of varchar should be lower than decimal in IS_SUPERTYPE_OF mode. [['ifnull'], 'VARCHAR', ['VARCHAR', 'VARCHAR'], ''], @@ -525,6 +527,7 @@ [['coalesce'], 'DATE', ['DATE', '...'], ''], [['coalesce'], 'DECIMAL', ['DECIMAL', '...'], ''], [['coalesce'], 'DECIMALV2', ['DECIMALV2', '...'], ''], + [['coalesce'], 'BITMAP', ['BITMAP', '...'], ''], # The priority of varchar should be lower than decimal in IS_SUPERTYPE_OF mode. [['coalesce'], 'VARCHAR', ['VARCHAR', '...'], ''], diff --git a/gensrc/thrift/AgentService.thrift b/gensrc/thrift/AgentService.thrift index f4184ab74dca85..bcbc57698ef89f 100644 --- a/gensrc/thrift/AgentService.thrift +++ b/gensrc/thrift/AgentService.thrift @@ -47,6 +47,7 @@ struct TTabletSchema { 7: optional list indexes 8: optional bool is_in_memory 9: optional i32 delete_sign_idx = -1 + 10: optional i32 sequence_col_idx = -1 } // this enum stands for different storage format in src_backends diff --git a/gensrc/thrift/FrontendService.thrift b/gensrc/thrift/FrontendService.thrift index 6de605b2a74aab..3aec2f70021609 100644 --- a/gensrc/thrift/FrontendService.thrift +++ b/gensrc/thrift/FrontendService.thrift @@ -554,6 +554,7 @@ struct TStreamLoadPutRequest { 26: optional string json_root 27: optional Types.TMergeType merge_type 28: optional string delete_condition + 29: optional string sequence_col } struct TStreamLoadPutResult { diff --git a/gensrc/thrift/PlanNodes.thrift b/gensrc/thrift/PlanNodes.thrift index 0fa426ae818313..f0af3a293ed1da 100644 --- a/gensrc/thrift/PlanNodes.thrift +++ b/gensrc/thrift/PlanNodes.thrift @@ -26,7 +26,6 @@ include "Partitions.thrift" enum TPlanNodeType { OLAP_SCAN_NODE, MYSQL_SCAN_NODE, - ODBC_SCAN_NODE, CSV_SCAN_NODE, SCHEMA_SCAN_NODE, HASH_JOIN_NODE, @@ -50,7 +49,8 @@ enum TPlanNodeType { REPEAT_NODE, ASSERT_NUM_ROWS_NODE, INTERSECT_NODE, - EXCEPT_NODE + EXCEPT_NODE, + ODBC_SCAN_NODE } // phases of an execution node diff --git a/gensrc/thrift/Types.thrift b/gensrc/thrift/Types.thrift index aa64c0e54f9d73..194cf30bdeec12 100644 --- a/gensrc/thrift/Types.thrift +++ b/gensrc/thrift/Types.thrift @@ -318,12 +318,12 @@ enum TEtlState { enum TTableType { MYSQL_TABLE, // Deprecated - ODBC_TABLE, OLAP_TABLE, SCHEMA_TABLE, KUDU_TABLE, // Deprecated BROKER_TABLE, - ES_TABLE + ES_TABLE, + ODBC_TABLE } enum TOdbcTableType {